Skip to content

GeneralizedLogisticModel

GeneralizedLogisticModel

Generalized logistic model for fitting sigmoidal curves to data.

Source code in yeastdnnexplorer/ml_models/GeneralizedLogisticModel.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
class GeneralizedLogisticModel:
    """Generalized logistic model for fitting sigmoidal curves to data."""

    def __init__(self):
        """Initialize the generalized logistic model."""
        self._X: np.ndarray | None = None
        self._y: np.ndarray | None = None
        self._right_asymptote: float | None = None
        self._left_asymptote: float | None = None
        self._coefficients: np.ndarray | None = None
        self._cov: np.ndarray | None = None
        self._residuals: np.ndarray | None = None
        self._jacobian: np.ndarray | None = None

    @property
    def X(self) -> np.ndarray | None:
        """
        Set the predictor variables for the model.

        :param value: The input data matrix. Must be two dimensional even if there is
            only one predictor.
        :return: The input data matrix.
        :raises TypeError: if X is not a NumPy array.
        :raises ValueError: if X is not 2D.
        :raises ValueError: if the number of columns in X does not match the length of
            the inflection point or coefficients.

        """
        return self._X

    @X.setter
    def X(self, value: np.ndarray) -> None:
        if not isinstance(value, np.ndarray):
            raise TypeError("X must be a NumPy array.")

        # Ensure that X is 2D
        if value.ndim != 2:
            raise ValueError("X must be a 2-dimensional array.")

        if not np.all(value[:, 0] == 1):
            raise ValueError("The first column of X must be a constant vector of ones.")

        # Check compatibility with coefficients
        if self.coefficients is not None and len(self.coefficients) != value.shape[1]:
            raise ValueError(
                "Coefficients must have the same number of elements as columns in X."
            )

        self._X = value

    @property
    def y(self) -> np.ndarray | None:
        """
        Set the response variable for the model.

        :param value: The observed output data.
        :return: The observed output data.
        :raises TypeError: if y is not a NumPy array or a list.
        :raises ValueError: if the number of rows in y does not match the number of rows
            in X.

        """
        return self._y

    @y.setter
    def y(self, value: np.ndarray) -> None:
        if not isinstance(value, (np.ndarray, list)):
            raise TypeError("y must be a NumPy array or a list.")

        value = np.asarray(value)

        if self.X is not None and value.shape[0] != self.X.shape[0]:
            raise ValueError("y must have the same number of rows as X.")

        self._y = value

    @property
    def right_asymptote(self) -> float | None:
        """
        Set the upper asymptote for the model. This parameter can be inferred by `fit()`

        :param value: The upper asymptote of the sigmoid function.
        :return: The upper asymptote of the sigmoid function.
        :raises TypeError: if the upper asymptote is not a real number.
        :raises ValueError: if the upper asymptote is less than the lower asymptote.

        """
        return self._right_asymptote

    @right_asymptote.setter
    def right_asymptote(self, value: float) -> None:
        if not isinstance(value, numbers.Real):
            raise TypeError("Upper asymptote must be a real number.")

        self._right_asymptote = value

    @property
    def left_asymptote(self) -> float | None:
        """
        The lower asymptote of the sigmoid function. This parameter can be inferred by
        `fit()`

        :return: The lower asymptote of the sigmoid function.
        :raises TypeError: if the lower asymptote is not a real number.
        :raises ValueError: if the lower asymptote is greater than the upper asymptote.

        """
        return self._left_asymptote

    @left_asymptote.setter
    def left_asymptote(self, value: float) -> None:
        if not isinstance(value, numbers.Real):
            raise TypeError("Lower asymptote must be a real number.")

        self._left_asymptote = value

    @property
    def coefficients(self) -> np.ndarray | None:
        """
        Set the coefficients for the model. This parameter can be inferred by `fit()`

        :param value: The coefficients of the sigmoid function.
        :return: The coefficients of the sigmoid function.
        :raises TypeError: if the coefficients are not a NumPy array or a list.
        :raises ValueError: if the length of the coefficients does not match the number
            of columns in X or the number of inflection points.

        """
        return self._coefficients

    @coefficients.setter
    def coefficients(self, value: Sequence[float] | np.ndarray) -> None:
        # validate type
        if not isinstance(value, (np.ndarray, list)):
            raise TypeError("Coefficients must be a NumPy array or a list.")

        value = np.asarray(value)

        if self.X is not None and len(value) != self.X.shape[1]:
            raise ValueError(
                "Coefficients must have the same number of elements as columns in X."
            )

        self._coefficients = value

    @property
    def cov(self) -> np.ndarray | None:
        """
        The covariance matrix of the model parameters. This parameter can be inferred by
        `fit()`

        :return: The covariance matrix of the model parameters.
        :raises TypeError: if the covariance matrix is not a NumPy array.

        """
        return self._cov

    @cov.setter
    def cov(self, value: np.ndarray) -> None:
        if not isinstance(value, np.ndarray):
            raise TypeError("cov must be a NumPy array.")
        self._cov = value

    @property
    def residuals(self) -> np.ndarray | None:
        """
        The residuals of the model. This parameter can be inferred by `fit()`

        :return: The residuals of the model.
        :raises TypeError: if the residuals are not a NumPy array.

        """
        return self._residuals

    @residuals.setter
    def residuals(self, value: np.ndarray) -> None:
        if not isinstance(value, np.ndarray):
            raise TypeError("residuals must be a NumPy array.")
        self._residuals = value

    @property
    def jacobian(self) -> np.ndarray | None:
        """
        The Jacobian matrix of the model. This parameter can be inferred by `fit()`

        :return: The Jacobian matrix of the model.
        :raises TypeError: if the Jacobian matrix is not a NumPy array.

        """
        return self._jacobian

    @jacobian.setter
    def jacobian(self, value: np.ndarray) -> None:
        # if not isinstance(value, np.ndarray):
        #     raise TypeError("Jacobian must be a NumPy array.")
        self._jacobian = value

    @property
    def n_params(self) -> int:
        """
        The number of parameters in the model.

        :return: The number of parameters in the model.
        :raises AttributeError: if the coefficients are not available.

        """
        if self.X is None:
            raise AttributeError("X not available. Set with `model()`.")
        # Number of parameters = number of coefficients + 2 (for the two asymptotes)
        return self.X.shape[1] + 2

    @property
    def df(self) -> int:
        """
        The residual degrees of freedom of the model.

        Residual degrees of freedom = number of observations - number of parameters

        :return: The residual degrees of freedom of the model.

        :raises AssertionError: if the input data matrix X is not available.

        """
        assert self.X is not None, "Input data matrix X is not available."
        # Number of parameters = number of coefficients + 2 (for the two asymptotes)
        return self.X.shape[0] - self.n_params

    @property
    def mse(self) -> float | None:
        """
        The mean squared error of the model.

        :return: The mean squared error of the model.
        :raises AttributeError: if the residuals are not available.

        """
        if self.residuals is None:
            raise AttributeError("Residuals are not available.")
        return np.mean(self.residuals**2)

    @property
    def rss(self) -> float | None:
        """
        The residual sum of squares of the model.

        :return: The residual sum of squares of the model.
        :raises AttributeError: if the residuals are not available.

        """
        if self.residuals is None:
            raise AttributeError("Residuals are not available.")
        return np.sum(self.residuals**2)

    @property
    def tss(self) -> float | None:
        """
        The total sum of squares of the model.

        :return: The total sum of squares of the model.
        :raises AttributeError: if the output data y is not available.

        """
        if self.y is None:
            raise AttributeError("Output data y is not available.")
        return np.sum((self.y - np.mean(self.y)) ** 2)

    @property
    def r_squared(self) -> float | None:
        """
        The variance explained by the model.

        :return: The variance explained by the model.

        :raises AttributeError: `rss` or `tss` is not available

        """
        if self.rss is None:
            raise AttributeError(
                "`rss` is not available. Check that `fit()` has been run."
            )
        if self.tss is None:
            raise AttributeError(
                "`tss` is not available. Check that `model()` has been run."
            )
        return 1 - (self.rss / self.tss)

    @property
    def llf(self) -> float | None:
        """
        The log-likelihood of the model. Note that this assumes Gaussian residuals.

        :return: The log-likelihood of the model.
        :raises AttributeError: if the residuals or y are not available.

        """
        if self.residuals is None:
            raise AttributeError("Residuals are not available.")
        # Number of observations
        if self.y is None:
            raise AttributeError("Output data y is not available.")
        n = len(self.y)

        # Variance of the residuals (sigma^2). denominator is N, not N-1. ddof=1 is
        # for the sample variance
        # see https://numpy.org/doc/stable/reference/generated/numpy.var.html
        sigma_squared = np.var(self.residuals, ddof=0)

        if sigma_squared <= 0:
            raise ValueError("Variance of residuals is zero or negative.")

        # Sum of squared residuals
        sum_squared_residuals = np.sum(self.residuals**2)

        # Log-likelihood using Gaussian assumption (standard OLS log likelihood)
        log_likelihood = (
            -(n / 2) * np.log(2 * np.pi * sigma_squared)
            - (1 / (2 * sigma_squared)) * sum_squared_residuals
        )

        return log_likelihood

    def aic(self) -> float | None:
        """
        Calculate the Akaike Information Criterion (AIC) for the model.

        :return: The Akaike Information Criterion (AIC) for the model.
        :raises AttributeError: if the log-likelihood is not available.

        """
        if self.llf is None:
            raise AttributeError("Log-likelihood is not available.")
        # AIC = 2p − 2log(L)
        return 2 * self.n_params - 2 * self.llf

    def bic(self) -> float | None:
        """
        Calculate the Bayesian Information Criterion (BIC) for the model.

        :return: The Bayesian Information Criterion (BIC) for the model.
        :raises AttributeError: if the log-likelihood or X is not available.

        """
        if self.llf is None:
            raise AttributeError("Log-likelihood is not available.")
        if self.X is None:
            raise AttributeError("Input data matrix X is not available.")
        # BIC = plog(n) − 2log(L)
        return self.n_params * np.log(self.X.shape[0]) - 2 * self.llf

    def predict(self, X: np.ndarray) -> np.ndarray:
        """
        Make predictions using the generalized logistic model.

        :param X: Input data matrix
        :return: Predictions based on the learned model parameters
        :raises ValueError: if the model has not been fitted.

        """
        if self.right_asymptote is None or self.left_asymptote is None:
            raise ValueError("Model must be fitted before making predictions.")

        assert self.coefficients is not None, "Coefficients are not available."

        return sigmoid(
            X,
            self.left_asymptote,
            self.right_asymptote,
            self.coefficients,
        )

    def model(self, y: np.ndarray, X: np.ndarray) -> None:
        """
        Set the predictor and response variables for the model.

        :param X: The input data matrix. Must be two dimensional even if there is only
            one predictor.
        :param y: The observed output data.

        """
        self.y = y
        self.X = X  # This should set the 2D array correctly

    def fit(self, **kwargs) -> None:
        """
        Fit the model to the data. This uses `scipy.optimize.curve_fit` to optimize the
        model parameters. See
        https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html

        :param kwargs: Additional keyword arguments to pass to `curve_fit`.

        - **right_asymptote**: Initial guess for the upper asymptote.
            Defaults to 1.0.
        - **left_asymptote**: Initial guess for the lower asymptote.
            Defaults to 0.0.
        - **coefficients**: Initial guess for the coefficients.
        - Any other kwargs are passed on to `scipy.optimize.curve_fit`.
        """

        assert (
            self.X is not None
        ), "Input data matrix X is not available. Set via `model()`"
        assert self.y is not None, "Output data y is not available. Set via `model()`"
        initial_left_asymptote = kwargs.pop("left_asymptote", 0.0)
        initial_right_asymptote = kwargs.pop("right_asymptote", 1.0)
        initial_coefficients = kwargs.pop("coefficients", np.ones(self.X.shape[1]))

        # Flatten the parameters into a single array for curve_fit
        initial_params = [
            initial_left_asymptote,
            initial_right_asymptote,
        ] + initial_coefficients.tolist()

        # Define the model for curve_fit to optimize
        def model_to_fit(X, left_asymptote, right_asymptote, *params):
            n = X.shape[1]
            coefficients = np.array(params[:n])
            return sigmoid(
                X,
                left_asymptote,
                right_asymptote,
                coefficients,
            )

        # Use curve_fit to optimize the model parameters
        logger.info("Fitting the generalized logistic model to the data.")
        popt, cov, infodict, mesg, ier = curve_fit(
            f=model_to_fit,
            xdata=self.X,
            ydata=self.y,
            p0=initial_params,
            full_output=True,
            **kwargs,
        )

        # Extract the optimized parameters from popt
        self.left_asymptote = popt[0]
        self.right_asymptote = popt[1]
        self.coefficients = np.array(popt[-self.X.shape[1] :])
        self.cov = cov
        self.residuals = infodict.get("fvec")
        self.jacobian = infodict.get("fjac")

    def summary(self) -> None:
        """
        Print a summary of the generalized logistic model.

        This method automatically performs LRT comparisons between the full model and
        models with one less predictor in each iteration.

        :raises ValueError: if the model has not been fitted.

        """
        if self.X is None or self.y is None or self.coefficients is None:
            raise ValueError("Model must be fitted before generating a summary.")

        # Calculate the log-likelihood for the full model (self)
        assert self.llf is not None, "Log-likelihood not available."
        self_log_likelihood = self.llf

        # Fit the OLS model for comparison
        ols_model = sm.OLS(self.y, self.X)
        ols_results = ols_model.fit()
        log_likelihood_linear = ols_results.llf

        # LRT comparing the full sigmoid model to the linear model
        lrt_statistic_linear = -2 * (log_likelihood_linear - self_log_likelihood)
        p_value_linear = 1 - stats.chi2.cdf(lrt_statistic_linear, df=2)

        # Prepare the summary table for the sigmoid model
        param_names = ["left_asymptote", "right_asymptote"] + [
            f"coef_{i}" for i in range(len(self.coefficients))
        ]
        estimates = np.concatenate(
            [[self.left_asymptote, self.right_asymptote], self.coefficients]
        )

        # Display the model summary in a formatted table
        table_data = [
            [param, f"{est:12.4f}"] for param, est in zip(param_names, estimates)
        ]
        print("\nGeneralized Logistic Model Summary\n")
        print(tabulate(table_data, headers=["Parameter", "Estimate"], tablefmt="pipe"))

        # Improved model diagnostics table
        diagnostics_table = [
            ["Metric", "Sigmoid Model", "Linear Model"],
            [
                "Variance Explained (R-squared)",
                f"{self.r_squared:.4f}",
                f"{ols_results.rsquared:.4f}",
            ],
            [
                "Akaike Information Criterion (AIC)",
                f"{self.aic():.4f}",
                f"{ols_results.aic:.4f}",
            ],
            [
                "Bayesian Information Criterion (BIC)",
                f"{self.bic():.4f}",
                f"{ols_results.bic:.4f}",
            ],
        ]

        print("\nModel Diagnostics Comparison")
        print(tabulate(diagnostics_table, headers="firstrow", tablefmt="pipe"))

        # LRT vs linear model
        lrt_table = [
            ["Linear Model Log-Likelihood", f"{log_likelihood_linear:.4f}"],
            ["Sigmoid Model Log-Likelihood", f"{self_log_likelihood:.4f}"],
            ["LRT Statistic", f"{lrt_statistic_linear:.4f}"],
            ["p-value", f"{p_value_linear:.4g}"],
        ]
        print("\nLikelihood Ratio Test (LRT) vs Linear Model")
        print(tabulate(lrt_table, tablefmt="pipe"))

        # Iterate over columns of X, removing one column at a time and performing LRT
        lrt_comparisons = []
        # Iterate over columns of X, removing columns from the end to the beginning
        for i in range(self.X.shape[1] - 1, 0, -1):
            X_reduced = self.X[
                :, :i
            ]  # Take the first i columns (this removes the last column first)

            # Fit a reduced model with fewer columns
            reduced_model = GeneralizedLogisticModel()
            reduced_model.model(self.y, X_reduced)
            reduced_model.fit()

            # Calculate log-likelihood for the reduced model
            assert reduced_model.llf is not None, "Log-likelihood not available."
            log_likelihood_reduced = reduced_model.llf

            # Perform LRT between the full model and the reduced model
            lrt_statistic_reduced = -2 * (log_likelihood_reduced - self_log_likelihood)
            param_diff = 1  # Since we're removing one predictor at a time
            p_value_reduced = 1 - stats.chi2.cdf(lrt_statistic_reduced, df=param_diff)

            # Collect the results for the reduced model
            lrt_comparisons.append(
                [
                    f"Reduced Model (with first {i} columns)",
                    f"{log_likelihood_reduced:.4f}",
                    f"{lrt_statistic_reduced:.4f}",
                    f"{p_value_reduced:.4g}",
                ]
            )

        # Output the results for the reduced models
        print("\nLRT Comparisons with Reduced Models")
        print(
            tabulate(
                lrt_comparisons,
                headers=["Model", "Log-Likelihood", "LRT Statistic", "p-value"],
                tablefmt="pipe",
            )
        )

    def plot(
        self,
        plots_to_display: list[int] = [1, 2, 3, 4],
        interactor_diagnostic: bool = False,
    ):
        """
        Diagnostic plots for the generalized logistic model.

        This function can generate various plots including:

        - 1: Normal Q-Q plot.
        - Optionally: Interactor Diagnostic Plot when `interactor_diagnostic` is True.

        :param plots_to_display: A list of integers (1 to 4) indicating which
            plots to show.
        :param interactor_diagnostic: Boolean to include interactor diagnostic
            plot (default False).

        """
        if self.X is None or self.y is None or self.residuals is None:
            raise ValueError("Model must be fitted before plotting diagnostics.")

        # Create subplots grid based on number of plots to display
        n_plots = len(plots_to_display) + (1 if interactor_diagnostic else 0)
        fig, axes = plt.subplots(1, n_plots, figsize=(5 * n_plots, 5))

        if n_plots == 1:
            axes = [axes]  # Ensure axes is always iterable

        for i, plot_num in enumerate(plots_to_display):
            if plot_num == 1:
                # 1. Normal Q-Q Plot
                stats.probplot(self.residuals, dist="norm", plot=axes[i])
                axes[i].set_title("Normal Q-Q")

        # If the user requested the interactor diagnostic plot
        if interactor_diagnostic:
            assert (
                self.coefficients is not None
            ), "Input data matrix X is not available."
            # Call the interactor diagnostic plot class
            interactor_plotter = InteractorDiagnosticPlot(
                df=pd.DataFrame(self.X),  # Your data goes here
                quantile=0.1,  # Use an appropriate quantile value
                B=self.coefficients,  # Pass the model coefficients
                model_type="sigmoid",
                left_asymptote=self.left_asymptote,
                right_asymptote=self.right_asymptote,
            )
            plt_obj = interactor_plotter.plot()  # Create the diagnostic plot
            plt_obj.show()  # Show the plot in a separate figure

        plt.tight_layout()
        plt.show()

X: np.ndarray | None property writable

Set the predictor variables for the model.

Parameters:

Name Type Description Default
value

The input data matrix. Must be two dimensional even if there is only one predictor.

required

Returns:

Type Description

The input data matrix.

Raises:

Type Description
TypeError

if X is not a NumPy array.

ValueError

if X is not 2D.

ValueError

if the number of columns in X does not match the length of the inflection point or coefficients.

coefficients: np.ndarray | None property writable

Set the coefficients for the model. This parameter can be inferred by fit()

Parameters:

Name Type Description Default
value

The coefficients of the sigmoid function.

required

Returns:

Type Description

The coefficients of the sigmoid function.

Raises:

Type Description
TypeError

if the coefficients are not a NumPy array or a list.

ValueError

if the length of the coefficients does not match the number of columns in X or the number of inflection points.

cov: np.ndarray | None property writable

The covariance matrix of the model parameters. This parameter can be inferred by fit()

Returns:

Type Description

The covariance matrix of the model parameters.

Raises:

Type Description
TypeError

if the covariance matrix is not a NumPy array.

df: int property

The residual degrees of freedom of the model.

Residual degrees of freedom = number of observations - number of parameters

Returns:

Type Description

The residual degrees of freedom of the model.

Raises:

Type Description
AssertionError

if the input data matrix X is not available.

jacobian: np.ndarray | None property writable

The Jacobian matrix of the model. This parameter can be inferred by fit()

Returns:

Type Description

The Jacobian matrix of the model.

Raises:

Type Description
TypeError

if the Jacobian matrix is not a NumPy array.

left_asymptote: float | None property writable

The lower asymptote of the sigmoid function. This parameter can be inferred by fit()

Returns:

Type Description

The lower asymptote of the sigmoid function.

Raises:

Type Description
TypeError

if the lower asymptote is not a real number.

ValueError

if the lower asymptote is greater than the upper asymptote.

llf: float | None property

The log-likelihood of the model. Note that this assumes Gaussian residuals.

Returns:

Type Description

The log-likelihood of the model.

Raises:

Type Description
AttributeError

if the residuals or y are not available.

mse: float | None property

The mean squared error of the model.

Returns:

Type Description

The mean squared error of the model.

Raises:

Type Description
AttributeError

if the residuals are not available.

n_params: int property

The number of parameters in the model.

Returns:

Type Description

The number of parameters in the model.

Raises:

Type Description
AttributeError

if the coefficients are not available.

r_squared: float | None property

The variance explained by the model.

Returns:

Type Description

The variance explained by the model.

Raises:

Type Description
AttributeError

rss or tss is not available

residuals: np.ndarray | None property writable

The residuals of the model. This parameter can be inferred by fit()

Returns:

Type Description

The residuals of the model.

Raises:

Type Description
TypeError

if the residuals are not a NumPy array.

right_asymptote: float | None property writable

Set the upper asymptote for the model. This parameter can be inferred by fit()

Parameters:

Name Type Description Default
value

The upper asymptote of the sigmoid function.

required

Returns:

Type Description

The upper asymptote of the sigmoid function.

Raises:

Type Description
TypeError

if the upper asymptote is not a real number.

ValueError

if the upper asymptote is less than the lower asymptote.

rss: float | None property

The residual sum of squares of the model.

Returns:

Type Description

The residual sum of squares of the model.

Raises:

Type Description
AttributeError

if the residuals are not available.

tss: float | None property

The total sum of squares of the model.

Returns:

Type Description

The total sum of squares of the model.

Raises:

Type Description
AttributeError

if the output data y is not available.

y: np.ndarray | None property writable

Set the response variable for the model.

Parameters:

Name Type Description Default
value

The observed output data.

required

Returns:

Type Description

The observed output data.

Raises:

Type Description
TypeError

if y is not a NumPy array or a list.

ValueError

if the number of rows in y does not match the number of rows in X.

__init__()

Initialize the generalized logistic model.

Source code in yeastdnnexplorer/ml_models/GeneralizedLogisticModel.py
22
23
24
25
26
27
28
29
30
31
def __init__(self):
    """Initialize the generalized logistic model."""
    self._X: np.ndarray | None = None
    self._y: np.ndarray | None = None
    self._right_asymptote: float | None = None
    self._left_asymptote: float | None = None
    self._coefficients: np.ndarray | None = None
    self._cov: np.ndarray | None = None
    self._residuals: np.ndarray | None = None
    self._jacobian: np.ndarray | None = None

aic()

Calculate the Akaike Information Criterion (AIC) for the model.

Returns:

Type Description
float | None

The Akaike Information Criterion (AIC) for the model.

Raises:

Type Description
AttributeError

if the log-likelihood is not available.

Source code in yeastdnnexplorer/ml_models/GeneralizedLogisticModel.py
340
341
342
343
344
345
346
347
348
349
350
351
def aic(self) -> float | None:
    """
    Calculate the Akaike Information Criterion (AIC) for the model.

    :return: The Akaike Information Criterion (AIC) for the model.
    :raises AttributeError: if the log-likelihood is not available.

    """
    if self.llf is None:
        raise AttributeError("Log-likelihood is not available.")
    # AIC = 2p − 2log(L)
    return 2 * self.n_params - 2 * self.llf

bic()

Calculate the Bayesian Information Criterion (BIC) for the model.

Returns:

Type Description
float | None

The Bayesian Information Criterion (BIC) for the model.

Raises:

Type Description
AttributeError

if the log-likelihood or X is not available.

Source code in yeastdnnexplorer/ml_models/GeneralizedLogisticModel.py
353
354
355
356
357
358
359
360
361
362
363
364
365
366
def bic(self) -> float | None:
    """
    Calculate the Bayesian Information Criterion (BIC) for the model.

    :return: The Bayesian Information Criterion (BIC) for the model.
    :raises AttributeError: if the log-likelihood or X is not available.

    """
    if self.llf is None:
        raise AttributeError("Log-likelihood is not available.")
    if self.X is None:
        raise AttributeError("Input data matrix X is not available.")
    # BIC = plog(n) − 2log(L)
    return self.n_params * np.log(self.X.shape[0]) - 2 * self.llf

fit(**kwargs)

Fit the model to the data. This uses scipy.optimize.curve_fit to optimize the model parameters. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html

Parameters:

Name Type Description Default
kwargs

Additional keyword arguments to pass to curve_fit. - right_asymptote: Initial guess for the upper asymptote. Defaults to 1.0. - left_asymptote: Initial guess for the lower asymptote. Defaults to 0.0. - coefficients: Initial guess for the coefficients. - Any other kwargs are passed on to scipy.optimize.curve_fit.

{}
Source code in yeastdnnexplorer/ml_models/GeneralizedLogisticModel.py
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
def fit(self, **kwargs) -> None:
    """
    Fit the model to the data. This uses `scipy.optimize.curve_fit` to optimize the
    model parameters. See
    https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html

    :param kwargs: Additional keyword arguments to pass to `curve_fit`.

    - **right_asymptote**: Initial guess for the upper asymptote.
        Defaults to 1.0.
    - **left_asymptote**: Initial guess for the lower asymptote.
        Defaults to 0.0.
    - **coefficients**: Initial guess for the coefficients.
    - Any other kwargs are passed on to `scipy.optimize.curve_fit`.
    """

    assert (
        self.X is not None
    ), "Input data matrix X is not available. Set via `model()`"
    assert self.y is not None, "Output data y is not available. Set via `model()`"
    initial_left_asymptote = kwargs.pop("left_asymptote", 0.0)
    initial_right_asymptote = kwargs.pop("right_asymptote", 1.0)
    initial_coefficients = kwargs.pop("coefficients", np.ones(self.X.shape[1]))

    # Flatten the parameters into a single array for curve_fit
    initial_params = [
        initial_left_asymptote,
        initial_right_asymptote,
    ] + initial_coefficients.tolist()

    # Define the model for curve_fit to optimize
    def model_to_fit(X, left_asymptote, right_asymptote, *params):
        n = X.shape[1]
        coefficients = np.array(params[:n])
        return sigmoid(
            X,
            left_asymptote,
            right_asymptote,
            coefficients,
        )

    # Use curve_fit to optimize the model parameters
    logger.info("Fitting the generalized logistic model to the data.")
    popt, cov, infodict, mesg, ier = curve_fit(
        f=model_to_fit,
        xdata=self.X,
        ydata=self.y,
        p0=initial_params,
        full_output=True,
        **kwargs,
    )

    # Extract the optimized parameters from popt
    self.left_asymptote = popt[0]
    self.right_asymptote = popt[1]
    self.coefficients = np.array(popt[-self.X.shape[1] :])
    self.cov = cov
    self.residuals = infodict.get("fvec")
    self.jacobian = infodict.get("fjac")

model(y, X)

Set the predictor and response variables for the model.

Parameters:

Name Type Description Default
X ndarray

The input data matrix. Must be two dimensional even if there is only one predictor.

required
y ndarray

The observed output data.

required
Source code in yeastdnnexplorer/ml_models/GeneralizedLogisticModel.py
389
390
391
392
393
394
395
396
397
398
399
def model(self, y: np.ndarray, X: np.ndarray) -> None:
    """
    Set the predictor and response variables for the model.

    :param X: The input data matrix. Must be two dimensional even if there is only
        one predictor.
    :param y: The observed output data.

    """
    self.y = y
    self.X = X  # This should set the 2D array correctly

plot(plots_to_display=[1, 2, 3, 4], interactor_diagnostic=False)

Diagnostic plots for the generalized logistic model.

This function can generate various plots including:

  • 1: Normal Q-Q plot.
  • Optionally: Interactor Diagnostic Plot when interactor_diagnostic is True.

Parameters:

Name Type Description Default
plots_to_display list[int]

A list of integers (1 to 4) indicating which plots to show.

[1, 2, 3, 4]
interactor_diagnostic bool

Boolean to include interactor diagnostic plot (default False).

False
Source code in yeastdnnexplorer/ml_models/GeneralizedLogisticModel.py
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
def plot(
    self,
    plots_to_display: list[int] = [1, 2, 3, 4],
    interactor_diagnostic: bool = False,
):
    """
    Diagnostic plots for the generalized logistic model.

    This function can generate various plots including:

    - 1: Normal Q-Q plot.
    - Optionally: Interactor Diagnostic Plot when `interactor_diagnostic` is True.

    :param plots_to_display: A list of integers (1 to 4) indicating which
        plots to show.
    :param interactor_diagnostic: Boolean to include interactor diagnostic
        plot (default False).

    """
    if self.X is None or self.y is None or self.residuals is None:
        raise ValueError("Model must be fitted before plotting diagnostics.")

    # Create subplots grid based on number of plots to display
    n_plots = len(plots_to_display) + (1 if interactor_diagnostic else 0)
    fig, axes = plt.subplots(1, n_plots, figsize=(5 * n_plots, 5))

    if n_plots == 1:
        axes = [axes]  # Ensure axes is always iterable

    for i, plot_num in enumerate(plots_to_display):
        if plot_num == 1:
            # 1. Normal Q-Q Plot
            stats.probplot(self.residuals, dist="norm", plot=axes[i])
            axes[i].set_title("Normal Q-Q")

    # If the user requested the interactor diagnostic plot
    if interactor_diagnostic:
        assert (
            self.coefficients is not None
        ), "Input data matrix X is not available."
        # Call the interactor diagnostic plot class
        interactor_plotter = InteractorDiagnosticPlot(
            df=pd.DataFrame(self.X),  # Your data goes here
            quantile=0.1,  # Use an appropriate quantile value
            B=self.coefficients,  # Pass the model coefficients
            model_type="sigmoid",
            left_asymptote=self.left_asymptote,
            right_asymptote=self.right_asymptote,
        )
        plt_obj = interactor_plotter.plot()  # Create the diagnostic plot
        plt_obj.show()  # Show the plot in a separate figure

    plt.tight_layout()
    plt.show()

predict(X)

Make predictions using the generalized logistic model.

Parameters:

Name Type Description Default
X ndarray

Input data matrix

required

Returns:

Type Description
ndarray

Predictions based on the learned model parameters

Raises:

Type Description
ValueError

if the model has not been fitted.

Source code in yeastdnnexplorer/ml_models/GeneralizedLogisticModel.py
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
def predict(self, X: np.ndarray) -> np.ndarray:
    """
    Make predictions using the generalized logistic model.

    :param X: Input data matrix
    :return: Predictions based on the learned model parameters
    :raises ValueError: if the model has not been fitted.

    """
    if self.right_asymptote is None or self.left_asymptote is None:
        raise ValueError("Model must be fitted before making predictions.")

    assert self.coefficients is not None, "Coefficients are not available."

    return sigmoid(
        X,
        self.left_asymptote,
        self.right_asymptote,
        self.coefficients,
    )

summary()

Print a summary of the generalized logistic model.

This method automatically performs LRT comparisons between the full model and models with one less predictor in each iteration.

Raises:

Type Description
ValueError

if the model has not been fitted.

Source code in yeastdnnexplorer/ml_models/GeneralizedLogisticModel.py
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
def summary(self) -> None:
    """
    Print a summary of the generalized logistic model.

    This method automatically performs LRT comparisons between the full model and
    models with one less predictor in each iteration.

    :raises ValueError: if the model has not been fitted.

    """
    if self.X is None or self.y is None or self.coefficients is None:
        raise ValueError("Model must be fitted before generating a summary.")

    # Calculate the log-likelihood for the full model (self)
    assert self.llf is not None, "Log-likelihood not available."
    self_log_likelihood = self.llf

    # Fit the OLS model for comparison
    ols_model = sm.OLS(self.y, self.X)
    ols_results = ols_model.fit()
    log_likelihood_linear = ols_results.llf

    # LRT comparing the full sigmoid model to the linear model
    lrt_statistic_linear = -2 * (log_likelihood_linear - self_log_likelihood)
    p_value_linear = 1 - stats.chi2.cdf(lrt_statistic_linear, df=2)

    # Prepare the summary table for the sigmoid model
    param_names = ["left_asymptote", "right_asymptote"] + [
        f"coef_{i}" for i in range(len(self.coefficients))
    ]
    estimates = np.concatenate(
        [[self.left_asymptote, self.right_asymptote], self.coefficients]
    )

    # Display the model summary in a formatted table
    table_data = [
        [param, f"{est:12.4f}"] for param, est in zip(param_names, estimates)
    ]
    print("\nGeneralized Logistic Model Summary\n")
    print(tabulate(table_data, headers=["Parameter", "Estimate"], tablefmt="pipe"))

    # Improved model diagnostics table
    diagnostics_table = [
        ["Metric", "Sigmoid Model", "Linear Model"],
        [
            "Variance Explained (R-squared)",
            f"{self.r_squared:.4f}",
            f"{ols_results.rsquared:.4f}",
        ],
        [
            "Akaike Information Criterion (AIC)",
            f"{self.aic():.4f}",
            f"{ols_results.aic:.4f}",
        ],
        [
            "Bayesian Information Criterion (BIC)",
            f"{self.bic():.4f}",
            f"{ols_results.bic:.4f}",
        ],
    ]

    print("\nModel Diagnostics Comparison")
    print(tabulate(diagnostics_table, headers="firstrow", tablefmt="pipe"))

    # LRT vs linear model
    lrt_table = [
        ["Linear Model Log-Likelihood", f"{log_likelihood_linear:.4f}"],
        ["Sigmoid Model Log-Likelihood", f"{self_log_likelihood:.4f}"],
        ["LRT Statistic", f"{lrt_statistic_linear:.4f}"],
        ["p-value", f"{p_value_linear:.4g}"],
    ]
    print("\nLikelihood Ratio Test (LRT) vs Linear Model")
    print(tabulate(lrt_table, tablefmt="pipe"))

    # Iterate over columns of X, removing one column at a time and performing LRT
    lrt_comparisons = []
    # Iterate over columns of X, removing columns from the end to the beginning
    for i in range(self.X.shape[1] - 1, 0, -1):
        X_reduced = self.X[
            :, :i
        ]  # Take the first i columns (this removes the last column first)

        # Fit a reduced model with fewer columns
        reduced_model = GeneralizedLogisticModel()
        reduced_model.model(self.y, X_reduced)
        reduced_model.fit()

        # Calculate log-likelihood for the reduced model
        assert reduced_model.llf is not None, "Log-likelihood not available."
        log_likelihood_reduced = reduced_model.llf

        # Perform LRT between the full model and the reduced model
        lrt_statistic_reduced = -2 * (log_likelihood_reduced - self_log_likelihood)
        param_diff = 1  # Since we're removing one predictor at a time
        p_value_reduced = 1 - stats.chi2.cdf(lrt_statistic_reduced, df=param_diff)

        # Collect the results for the reduced model
        lrt_comparisons.append(
            [
                f"Reduced Model (with first {i} columns)",
                f"{log_likelihood_reduced:.4f}",
                f"{lrt_statistic_reduced:.4f}",
                f"{p_value_reduced:.4g}",
            ]
        )

    # Output the results for the reduced models
    print("\nLRT Comparisons with Reduced Models")
    print(
        tabulate(
            lrt_comparisons,
            headers=["Model", "Log-Likelihood", "LRT Statistic", "p-value"],
            tablefmt="pipe",
        )
    )