The Generalized Logistic Model¶
In [1]:
Copied!
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact
import plotly.graph_objects as go
import pandas as pd
import patsy as pt
from yeastdnnexplorer.utils.sigmoid import sigmoid
from yeastdnnexplorer.ml_models.GeneralizedLogisticModel import GeneralizedLogisticModel
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact
import plotly.graph_objects as go
import pandas as pd
import patsy as pt
from yeastdnnexplorer.utils.sigmoid import sigmoid
from yeastdnnexplorer.ml_models.GeneralizedLogisticModel import GeneralizedLogisticModel
The Sigmoid Function¶
The sigmoid()
function takes the following arguments:
- The observations as a model matrix, where the first column is a constant vector
- Upper Asymptote: The upper bound of the sigmoid curve.
- Lower Asymptote: The lower bound of the sigmoid curve.
- Coefficients (B): The coefficients of the linear combination of input variables that will control the shape of the sigmoid curve.
$$ Y(X) = \frac{right\_asymptote - left\_asymptote}{1 + e^{- X \cdot B }} + left\_asymptote $$
In [4]:
Copied!
glm_single_variable = GeneralizedLogisticModel()
# Generate sample data
np.random.seed(42)
X1 = np.linspace(-10, 10, 100)
X0 = np.ones_like(X1)
X = np.column_stack([X0, X1])
# True sigmoid parameters
true_left_asymptote = 1.2
true_right_asymptote = 6.0
true_slope = [-9, 5]
# Generate Y data using the true sigmoid function
Y_true = sigmoid(
X,
true_left_asymptote,
true_right_asymptote,
np.array(true_slope))
# Add some noise to the Y values
noise = 0.05 * np.random.randn(len(X1))
Y_noisy = Y_true.ravel() + noise
glm_single_variable.model(Y_noisy, X)
glm_single_variable.fit(method="lm") # , max_nfev=10000
Y_fitted = glm_single_variable.predict(X)
# Plot the results
plt.figure(figsize=(10, 6))
plt.scatter(X1, Y_noisy, color="gray", alpha=0.5, label="Noisy Data")
plt.plot(X1, Y_fitted, color="blue", label="Fitted Sigmoid Curve")
plt.title("Sigmoid Curve Fit for Single Predictor Variable")
plt.xlabel("X1")
plt.ylabel("Y")
plt.legend()
plt.grid(True)
plt.show()
glm_single_variable = GeneralizedLogisticModel()
# Generate sample data
np.random.seed(42)
X1 = np.linspace(-10, 10, 100)
X0 = np.ones_like(X1)
X = np.column_stack([X0, X1])
# True sigmoid parameters
true_left_asymptote = 1.2
true_right_asymptote = 6.0
true_slope = [-9, 5]
# Generate Y data using the true sigmoid function
Y_true = sigmoid(
X,
true_left_asymptote,
true_right_asymptote,
np.array(true_slope))
# Add some noise to the Y values
noise = 0.05 * np.random.randn(len(X1))
Y_noisy = Y_true.ravel() + noise
glm_single_variable.model(Y_noisy, X)
glm_single_variable.fit(method="lm") # , max_nfev=10000
Y_fitted = glm_single_variable.predict(X)
# Plot the results
plt.figure(figsize=(10, 6))
plt.scatter(X1, Y_noisy, color="gray", alpha=0.5, label="Noisy Data")
plt.plot(X1, Y_fitted, color="blue", label="Fitted Sigmoid Curve")
plt.title("Sigmoid Curve Fit for Single Predictor Variable")
plt.xlabel("X1")
plt.ylabel("Y")
plt.legend()
plt.grid(True)
plt.show()
In [5]:
Copied!
glm_single_variable.plot()
glm_single_variable.summary()
glm_single_variable.plot()
glm_single_variable.summary()
Generalized Logistic Model Summary | Parameter | Estimate | |:----------------|-----------:| | left_asymptote | 1.1904 | | right_asymptote | 5.9996 | | coef_0 | -8.9032 | | coef_1 | 4.9538 | Model Diagnostics Comparison | Metric | Sigmoid Model | Linear Model | |:-------------------------------------|----------------:|---------------:| | Variance Explained (R-squared) | 0.9996 | 0.7537 | | Akaike Information Criterion (AIC) | -329.001 | 315.751 | | Bayesian Information Criterion (BIC) | -318.58 | 320.962 | Likelihood Ratio Test (LRT) vs Linear Model |:-----------------------------|---------:| | Linear Model Log-Likelihood | -155.876 | | Sigmoid Model Log-Likelihood | 168.5 | | LRT Statistic | 648.752 | | p-value | 0 | LRT Comparisons with Reduced Models | Model | Log-Likelihood | LRT Statistic | p-value | |:-------------------------------------|-----------------:|----------------:|----------:| | Reduced Model (with first 1 columns) | -225.936 | 788.873 | 0 |
Multivariate Example¶
In [6]:
Copied!
glm_two_variable = GeneralizedLogisticModel()
# Generate sample data
np.random.seed(42)
X1 = np.linspace(-10, 10, 100)
X2 = np.linspace(-10, 10, 100)
X0 = np.ones_like(X1)
# True sigmoid parameters
true_left_asymptote = 1.3
true_right_asymptote = 9.5
true_slope = np.array([-7, 1.6, 1.6]) # Slopes for both variables
# Stack X1 and X2 to form a design matrix with two variables
X_two_vars = np.column_stack([X0, X1, X2])
# Generate Y data using the true sigmoid function
Y_true = sigmoid(
X_two_vars,
true_left_asymptote,
true_right_asymptote,
true_slope
)
# Add some noise to the Y values
noise = 0.75 * np.random.randn(len(X1))
Y_noisy = Y_true.ravel() + noise
# Fit the model to the noisy data
glm_two_variable.model(Y_noisy, X_two_vars)
# note that bounds and other initial parameters can be passed to the fit method
# there is one additional method, dogbox, which is available
# from scipy.optimize.curve_fit
glm_two_variable.fit(method="trf", max_nfev=100000)
# Predict using the fitted model
Y_fitted = glm_two_variable.predict(X_two_vars)
# glm_two_variable.plot()
glm_two_variable.summary()
glm_two_variable = GeneralizedLogisticModel()
# Generate sample data
np.random.seed(42)
X1 = np.linspace(-10, 10, 100)
X2 = np.linspace(-10, 10, 100)
X0 = np.ones_like(X1)
# True sigmoid parameters
true_left_asymptote = 1.3
true_right_asymptote = 9.5
true_slope = np.array([-7, 1.6, 1.6]) # Slopes for both variables
# Stack X1 and X2 to form a design matrix with two variables
X_two_vars = np.column_stack([X0, X1, X2])
# Generate Y data using the true sigmoid function
Y_true = sigmoid(
X_two_vars,
true_left_asymptote,
true_right_asymptote,
true_slope
)
# Add some noise to the Y values
noise = 0.75 * np.random.randn(len(X1))
Y_noisy = Y_true.ravel() + noise
# Fit the model to the noisy data
glm_two_variable.model(Y_noisy, X_two_vars)
# note that bounds and other initial parameters can be passed to the fit method
# there is one additional method, dogbox, which is available
# from scipy.optimize.curve_fit
glm_two_variable.fit(method="trf", max_nfev=100000)
# Predict using the fitted model
Y_fitted = glm_two_variable.predict(X_two_vars)
# glm_two_variable.plot()
glm_two_variable.summary()
Generalized Logistic Model Summary | Parameter | Estimate | |:----------------|-----------:| | left_asymptote | 1.1573 | | right_asymptote | 9.5234 | | coef_0 | -6.2911 | | coef_1 | -2.2532 | | coef_2 | 5.1284 | Model Diagnostics Comparison | Metric | Sigmoid Model | Linear Model | |:-------------------------------------|----------------:|---------------:| | Variance Explained (R-squared) | 0.9716 | 0.7273 | | Akaike Information Criterion (AIC) | 214.452 | 434.633 | | Bayesian Information Criterion (BIC) | 227.478 | 439.844 | Likelihood Ratio Test (LRT) vs Linear Model |:-----------------------------|---------:| | Linear Model Log-Likelihood | -215.317 | | Sigmoid Model Log-Likelihood | -102.226 | | LRT Statistic | 226.181 | | p-value | 0 | LRT Comparisons with Reduced Models | Model | Log-Likelihood | LRT Statistic | p-value | |:-------------------------------------|-----------------:|----------------:|----------:| | Reduced Model (with first 2 columns) | -102.226 | 0 | 0.9999 | | Reduced Model (with first 1 columns) | -280.286 | 356.121 | 0 |
In [8]:
Copied!
glm_two_variable.plot()
glm_two_variable.plot()
In [9]:
Copied!
glm_two_variable.summary()
glm_two_variable.summary()
Generalized Logistic Model Summary | Parameter | Estimate | |:----------------|-----------:| | left_asymptote | 1.1573 | | right_asymptote | 9.5234 | | coef_0 | -6.2911 | | coef_1 | -2.2532 | | coef_2 | 5.1284 | Model Diagnostics Comparison | Metric | Sigmoid Model | Linear Model | |:-------------------------------------|----------------:|---------------:| | Variance Explained (R-squared) | 0.9716 | 0.7273 | | Akaike Information Criterion (AIC) | 214.452 | 434.633 | | Bayesian Information Criterion (BIC) | 227.478 | 439.844 | Likelihood Ratio Test (LRT) vs Linear Model |:-----------------------------|---------:| | Linear Model Log-Likelihood | -215.317 | | Sigmoid Model Log-Likelihood | -102.226 | | LRT Statistic | 226.181 | | p-value | 0 | LRT Comparisons with Reduced Models | Model | Log-Likelihood | LRT Statistic | p-value | |:-------------------------------------|-----------------:|----------------:|----------:| | Reduced Model (with first 2 columns) | -102.226 | 0 | 0.9999 | | Reduced Model (with first 1 columns) | -280.286 | 356.121 | 0 |
Fitting the Sigmoid model to real data¶
In [10]:
Copied!
# Note that for this to work in your environment, this path must point to a real file
met28_met31 = pd.read_csv("../../tmp/met28_met31.csv")
model_string = "met28_shifted_lrr ~ met28_shifted_lrb + met28_shifted_lrb:met31_shifted_lrb"
y_met28_met31, X_met28_met31_interaction = pt.dmatrices(model_string, data=met28_met31)
y_met28_met31 = np.asanyarray(y_met28_met31).ravel()
X_met28_met31_interaction = np.asarray(X_met28_met31_interaction)
met28_met31_model_interaction = GeneralizedLogisticModel()
met28_met31_model_interaction.model(y_met28_met31, X_met28_met31_interaction)
met28_met31_model_interaction.fit(method="lm")
met28_met31_model_interaction.summary()
met28_met31_model_interaction.plot(interactor_diagnostic=True)
# Note that for this to work in your environment, this path must point to a real file
met28_met31 = pd.read_csv("../../tmp/met28_met31.csv")
model_string = "met28_shifted_lrr ~ met28_shifted_lrb + met28_shifted_lrb:met31_shifted_lrb"
y_met28_met31, X_met28_met31_interaction = pt.dmatrices(model_string, data=met28_met31)
y_met28_met31 = np.asanyarray(y_met28_met31).ravel()
X_met28_met31_interaction = np.asarray(X_met28_met31_interaction)
met28_met31_model_interaction = GeneralizedLogisticModel()
met28_met31_model_interaction.model(y_met28_met31, X_met28_met31_interaction)
met28_met31_model_interaction.fit(method="lm")
met28_met31_model_interaction.summary()
met28_met31_model_interaction.plot(interactor_diagnostic=True)
Generalized Logistic Model Summary | Parameter | Estimate | |:----------------|-----------:| | left_asymptote | 0.3136 | | right_asymptote | 1.4538 | | coef_0 | -5.2551 | | coef_1 | 0.6793 | | coef_2 | 1.0907 | Model Diagnostics Comparison | Metric | Sigmoid Model | Linear Model | |:-------------------------------------|----------------:|---------------:| | Variance Explained (R-squared) | 0.1155 | 0.1028 | | Akaike Information Criterion (AIC) | 1510.23 | 1594.22 | | Bayesian Information Criterion (BIC) | 1543.85 | 1614.39 | Likelihood Ratio Test (LRT) vs Linear Model |:-----------------------------|----------:| | Linear Model Log-Likelihood | -794.11 | | Sigmoid Model Log-Likelihood | -750.115 | | LRT Statistic | 87.9894 | | p-value | 0 | LRT Comparisons with Reduced Models | Model | Log-Likelihood | LRT Statistic | p-value | |:-------------------------------------|-----------------:|----------------:|----------:| | Reduced Model (with first 2 columns) | -775.808 | 51.3855 | 7.589e-13 | | Reduced Model (with first 1 columns) | -1127.68 | 755.134 | 0 |
<Figure size 640x480 with 0 Axes>