BootstrapModelResults(
ci_dict,
bootstrap_coefs_df,
alpha_list,
alpha_df=pd.DataFrame(),
)
Encapsulates the results from bootstrapped stratified cross-validation modeling.
This includes:
- Confidence intervals for model coefficients across bootstrap iterations
- Raw coefficient estimates from each iteration
- Alpha values (regularization strengths) selected during each iteration
- Methods for extracting statistically significant coefficients
- Visualization utilities
- Serialization and deserialization support
Initialize BootstrapModelResults.
| Parameters: |
-
ci_dict
(dict[str, dict[str, tuple[float, float]]])
–
Nested dictionary mapping confidence levels to (low, high) confidence intervals for each coefficient.
-
bootstrap_coefs_df
(DataFrame)
–
DataFrame of shape (n_bootstraps, n_features) containing coefficient values from each bootstrap sample.
-
alpha_list
(list[float])
–
List of alpha values (regularization strength) selected during each bootstrap iteration.
-
alpha_df
(DataFrame, default:
DataFrame()
)
–
a dataframe with the columns 'bootstrap_idx', 'alpha', 'fold', and 'mse'
|
Source code in tfbpmodeling/bootstrap_model_results.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51 | def __init__(
self,
ci_dict: dict[str, dict[str, tuple[float, float]]],
bootstrap_coefs_df: pd.DataFrame,
alpha_list: list[float],
alpha_df: pd.DataFrame = pd.DataFrame(),
):
"""
Initialize BootstrapModelResults.
:param ci_dict: Nested dictionary mapping confidence levels to (low, high)
confidence intervals for each coefficient.
:param bootstrap_coefs_df: DataFrame of shape (n_bootstraps, n_features)
containing coefficient values from each bootstrap sample.
:param alpha_list: List of alpha values (regularization strength) selected
during each bootstrap iteration.
:param alpha_df: a dataframe with the columns 'bootstrap_idx', 'alpha', 'fold',
and 'mse'
"""
self.ci_dict = ci_dict
self.bootstrap_coefs_df = bootstrap_coefs_df
self.alpha_list = alpha_list
self.alpha_df = alpha_df
|
deserialize
classmethod
deserialize(ci_dict_json, coefs_alphas_pkl)
Load model results from disk.
| Parameters: |
-
ci_dict_json
(str)
–
Path to the JSON file with confidence intervals.
-
coefs_alphas_pkl
(str)
–
Path to the Pickle file with coefficient matrix and alpha list.
|
| Raises: |
-
FileNotFoundError
–
If either file is missing.
-
ValueError
–
If the pickle file contents are invalid.
|
Source code in tfbpmodeling/bootstrap_model_results.py
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219 | @classmethod
def deserialize(
cls, ci_dict_json: str, coefs_alphas_pkl: str
) -> "BootstrapModelResults":
"""
Load model results from disk.
:param ci_dict_json: Path to the JSON file with confidence intervals.
:param coefs_alphas_pkl: Path to the Pickle file with coefficient matrix and
alpha list.
:return: A new BootstrapModelResults instance.
:raises FileNotFoundError: If either file is missing.
:raises ValueError: If the pickle file contents are invalid.
"""
# Ensure both files exist before proceeding
if not os.path.exists(ci_dict_json):
raise FileNotFoundError(
f"Confidence intervals file '{ci_dict_json}' not found."
)
if not os.path.exists(coefs_alphas_pkl):
raise FileNotFoundError(f"Pickle file '{coefs_alphas_pkl}' not found.")
# Load confidence intervals from JSON
with open(ci_dict_json) as f:
ci_dict = json.load(f)
# Load DataFrame and alpha_list from Pickle
with open(coefs_alphas_pkl, "rb") as f:
loaded_data = pickle.load(f)
# Validate loaded data
if not isinstance(loaded_data, tuple) or len(loaded_data) != 2:
raise ValueError(
"Pickle file does not contain expected (DataFrame, list) format."
)
bootstrap_coefs_df, alpha_list = loaded_data
return cls(ci_dict, bootstrap_coefs_df, alpha_list)
|
extract_significant_coefficients(
ci_level="95.0", threshold=0.0
)
Extract coefficients that are statistically significant based on their bootstrap
confidence intervals.
A coefficient is considered significant if its entire confidence interval
lies above threshold or below -threshold.
| Parameters: |
-
ci_level
(str, default:
'95.0'
)
–
Confidence interval level (e.g., "95.0").
-
threshold
(float, default:
0.0
)
–
Minimum effect size for significance.
|
| Returns: |
-
dict[str, tuple[float, float]]
–
Dictionary mapping coefficient names to their (low, high) CI bounds.
|
Source code in tfbpmodeling/bootstrap_model_results.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109 | def extract_significant_coefficients(
self, ci_level: str = "95.0", threshold: float = 0.0
) -> dict[str, tuple[float, float]]:
"""
Extract coefficients that are statistically significant based on their bootstrap
confidence intervals.
A coefficient is considered significant if its entire confidence interval
lies above `threshold` or below `-threshold`.
:param ci_level: Confidence interval level (e.g., "95.0").
:param threshold: Minimum effect size for significance.
:return: Dictionary mapping coefficient names to their (low, high) CI bounds.
"""
ci_dict_local = self.ci_dict.copy()
# If CI level is not precomputed, calculate it
if ci_level not in ci_dict_local:
ci_level_numeric = float(ci_level)
# log that the ci_level is not in the ci_dict
logger.debug(
f"Generating confidence intervals for ci level: {ci_level_numeric}"
)
ci_dict_local[ci_level] = {
colname: (
np.percentile(
self.bootstrap_coefs_df[colname], (100 - ci_level_numeric) / 2
),
np.percentile(
self.bootstrap_coefs_df[colname],
100 - (100 - ci_level_numeric) / 2,
),
)
for colname in self.bootstrap_coefs_df.columns
}
# Select significant coefficients based on the confidence interval threshold
significant_coefs_dict = {
coef: bounds
for coef, bounds in ci_dict_local[ci_level].items()
if bounds[0] > threshold or bounds[1] < -threshold
}
# remove the following terms from ci_dict:
keys_to_remove = [
"bootstrap_idx",
"final_training_score",
"alpha",
"left_asymptote",
"right_asymptote",
"Intercept",
]
for key in keys_to_remove:
significant_coefs_dict.pop(key, None)
return significant_coefs_dict
|
from_jsonl
classmethod
from_jsonl(
db_path,
bootstrap_results_table_name="bootstrap_results",
mse_table_name="mse_path",
)
Load bootstrap results from JSONL files. This is intended to be used with the
sigmoid bootstrap results.
| Parameters: |
-
db_path
(str)
–
Path to the directory containing the JSONL files for a given regulator
-
bootstrap_results_table_name
(str, default:
'bootstrap_results'
)
–
Name of the JSONL file containing bootstrap coefficient/final model results
-
mse_table_name
(str, default:
'mse_path'
)
–
Name of the JSONL file containing fold-wise MSE results by bootstrap_idx/alpha
|
| Raises: |
-
FileNotFoundError
–
If the JSONL files do not exist.
|
Source code in tfbpmodeling/bootstrap_model_results.py
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282 | @classmethod
def from_jsonl(
cls,
db_path: str,
bootstrap_results_table_name: str = "bootstrap_results",
mse_table_name: str = "mse_path",
) -> "BootstrapModelResults":
"""
Load bootstrap results from JSONL files. This is intended to be used with the
sigmoid bootstrap results.
:param db_path: Path to the directory containing the JSONL files for a given
regulator
:param bootstrap_results_table_name: Name of the JSONL file containing bootstrap
coefficient/final model results
:param mse_table_name: Name of the JSONL file containing fold-wise MSE results
by bootstrap_idx/alpha
:return: An instance of BootstrapModelResults
:raises FileNotFoundError: If the JSONL files do not exist.
"""
bootstrap_coef_results_path = os.path.join(
db_path, f"{bootstrap_results_table_name}.jsonl"
)
mse_path = os.path.join(db_path, f"{mse_table_name}.jsonl")
if not os.path.isfile(bootstrap_coef_results_path):
raise FileNotFoundError(
f"Results file not found: {bootstrap_coef_results_path}"
)
if not os.path.isfile(mse_path):
raise FileNotFoundError(f"Results file not found: {mse_path}")
results_rows = []
with open(bootstrap_coef_results_path) as f:
for line in f:
try:
results_rows.append(json.loads(line))
except json.JSONDecodeError:
continue
if not results_rows:
raise ValueError("No valid records found in the results JSONL file.")
bootstrap_coef_results_df = pd.DataFrame(results_rows)
# Handle optional MSE file
mse_rows = []
with open(mse_path) as f:
for line in f:
try:
mse_rows.append(json.loads(line))
except json.JSONDecodeError:
continue
alpha_df = pd.DataFrame(mse_rows) if mse_rows else pd.DataFrame()
return cls(
ci_dict={},
bootstrap_coefs_df=bootstrap_coef_results_df,
alpha_list=[],
alpha_df=alpha_df,
)
|
serialize
serialize(filename, output_dir=None)
Save the results to disk.
Creates two files:
- {filename}.json: confidence intervals
- {filename}.pkl: tuple of (bootstrap_coefs_df, alpha_list)
| Parameters: |
-
filename
(str)
–
Base filename (without extension).
-
output_dir
(str | None, default:
None
)
–
Optional directory to write files into. Uses current directory if not specified.
|
| Raises: |
-
FileNotFoundError
–
If the specified directory does not exist.
|
Source code in tfbpmodeling/bootstrap_model_results.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178 | def serialize(self, filename: str, output_dir: str | None = None) -> None:
"""
Save the results to disk.
Creates two files:
- `{filename}.json`: confidence intervals
- `{filename}.pkl`: tuple of (bootstrap_coefs_df, alpha_list)
:param filename: Base filename (without extension).
:param output_dir: Optional directory to write files into. Uses current
directory if not specified.
:raises FileNotFoundError: If the specified directory does not exist.
"""
# Validate that the output directory exists
if output_dir:
if not os.path.isdir(output_dir):
raise FileNotFoundError(
f"The output directory '{output_dir}' does not exist. "
"Please create it before saving."
)
filepath_json = os.path.join(output_dir, f"{filename}.json")
filepath_pkl = os.path.join(output_dir, f"{filename}.pkl")
else:
filepath_json = f"{filename}.json"
filepath_pkl = f"{filename}.pkl"
# Save confidence intervals as JSON
with open(filepath_json, "w") as f:
json.dump(self.ci_dict, f, indent=4)
# Save DataFrame and alpha_list as a Pickle file
with open(filepath_pkl, "wb") as f:
pickle.dump((self.bootstrap_coefs_df, self.alpha_list), f)
|
visualize_significant_coefficients
visualize_significant_coefficients(
ci_level="95.0", threshold=0.0
)
Visualize the distribution of coefficients that are significant at the specified
confidence level.
| Parameters: |
-
ci_level
(str, default:
'95.0'
)
–
Confidence interval level (e.g., "95.0").
-
threshold
(float, default:
0.0
)
–
Minimum absolute value for significance.
|
| Returns: |
-
Figure | None
–
Matplotlib figure, or None if no significant coefficients are found.
|
Source code in tfbpmodeling/bootstrap_model_results.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142 | def visualize_significant_coefficients(
self, ci_level: str = "95.0", threshold: float = 0.0
) -> plt.Figure | None:
"""
Visualize the distribution of coefficients that are significant at the specified
confidence level.
:param ci_level: Confidence interval level (e.g., "95.0").
:param threshold: Minimum absolute value for significance.
:return: Matplotlib figure, or None if no significant coefficients are found.
"""
significant_coefs = self.extract_significant_coefficients(ci_level, threshold)
if not significant_coefs:
print(
f"No significant coefficients found for CI {ci_level} "
"at threshold {threshold}."
)
return None
# Extract relevant coefficients for plotting
df_extracted = self.bootstrap_coefs_df[list(significant_coefs.keys())]
# Create the boxplot
fig = plt.figure(figsize=(10, 6))
sns.boxplot(data=df_extracted, orient="h")
plt.axvline(x=0, linestyle="--", color="black")
plt.xlabel("Coefficient Values")
plt.title(f"Coefficients with {ci_level}% CI outside ±{threshold}")
return fig
|