BootstrappedModelingInputData(
response_df,
model_df,
n_bootstraps,
normalize_sample_weights=True,
random_state=None,
)
This class handles bootstrapped resampling of a response vector and model matrix.
This class supports both on-the-fly generation and externally provided bootstrap
indices. For each bootstrap sample, it maintains sample weights derived from
frequency counts of resampled instances.
Initialize bootstrapped modeling input.
Either n_bootstraps or bootstrap_indices must be provided.
| Parameters: |
-
response_df
(DataFrame)
–
-
model_df
(DataFrame)
–
-
n_bootstraps
(int)
–
Number of bootstrap replicates to generate.
-
random_state
(int | None, default:
None
)
–
Random state for reproducibility. Can be an integer or a numpy RandomState object, or None. If None (default), then a random random state is chosen.
|
| Raises: |
-
ValueError
–
if the response_df and model_df do not have the same index or if arguments are not correct datatype.
|
Source code in tfbpmodeling/bootstrapped_input_data.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68 | def __init__(
self,
response_df: pd.DataFrame,
model_df: pd.DataFrame,
n_bootstraps: int,
normalize_sample_weights: bool = True,
random_state: int | None = None,
) -> None:
"""
Initialize bootstrapped modeling input.
Either `n_bootstraps` or `bootstrap_indices` must be provided.
:param response_df: Response variable.
:param model_df: Predictor matrix.
:param n_bootstraps: Number of bootstrap replicates to generate.
:param random_state: Random state for reproducibility. Can be an integer or a
numpy RandomState object, or None. If None (default), then a random
random state is chosen.
:raises ValueError: if the response_df and model_df do not have the same index
or if arguments are not correct datatype.
"""
self.response_df: pd.DataFrame = response_df
self.model_df: pd.DataFrame = model_df
if not response_df.index.equals(model_df.index):
raise IndexError("response_df and model_df must have the same index order.")
self.normalize_sample_weights = normalize_sample_weights
# If bootstrap_indices is provided, set n_bootstraps based on its length
self.n_bootstraps = n_bootstraps
# set the random number generator attribute
self.random_state = random_state
self._rng = check_random_state(self.random_state)
logger.info(
f"Using random state: {self.random_state}"
if self.random_state is not None
else "No explicit random state set."
)
# Initialize attributes
self._bootstrap_indices: list[np.ndarray] = []
self._sample_weights: dict[int, np.ndarray] = {}
self._generate_bootstrap_indices()
|
A list of arrays representing bootstrap sample indices.
Get the number of bootstrap samples.
| Returns: |
-
int
–
The number of bootstrap samples.
|
Get the normalization status for sample weights.
| Returns: |
-
bool
–
True if sample weights are normalized, False otherwise.
|
An integer used to set the random state when generating the bootstrap samples.
Set this explicitly for reproducibility
Get the response DataFrame.
Normalized sample weights corresponding to bootstrap samples.
| Returns: |
-
dict[int, ndarray]
–
A dictionary mapping bootstrap index to sample weights.
|
Resets the iterator and returns itself.
Source code in tfbpmodeling/bootstrapped_input_data.py
| def __iter__(self):
"""Resets the iterator and returns itself."""
self._current_index = 0
return self
|
Provides the next bootstrap sample for iteration.
| Returns: |
-
tuple[ndarray, ndarray]
–
Tuple of (sample_indices, sample_weights).
|
| Raises: |
-
StopIteration
–
When all bootstrap samples are exhausted.
|
Source code in tfbpmodeling/bootstrapped_input_data.py
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422 | def __next__(self) -> tuple[np.ndarray, np.ndarray]:
"""
Provides the next bootstrap sample for iteration.
:return: Tuple of (sample_indices, sample_weights).
:raises StopIteration: When all bootstrap samples are exhausted.
"""
if self._current_index >= self.n_bootstraps:
raise StopIteration
sample_indices, sample_weights = self.get_bootstrap_sample(self._current_index)
self._current_index += 1
return sample_indices, sample_weights
|
Loads the object from a JSON file.
| Parameters: |
-
filename
(str)
–
Path to the BootstrapModelingData JSON file.
|
Source code in tfbpmodeling/bootstrapped_input_data.py
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401 | @classmethod
def deserialize(cls, filename: str):
"""
Loads the object from a JSON file.
:param filename: Path to the BootstrapModelingData JSON file.
"""
with open(filename) as f:
data = json.load(f)
response_df = pd.DataFrame(**data["response_df"]).rename_axis(
index=data["index_name"]
)
model_df = pd.DataFrame(**data["model_df"]).rename_axis(
index=data["index_name"]
)
n_bootstraps = data["n_bootstraps"]
normalize_sample_weights = data["normalize_sample_weights"]
random_state = data["random_state"]
instance = cls(
response_df,
model_df,
n_bootstraps,
normalize_sample_weights=normalize_sample_weights,
random_state=random_state,
)
return instance
|
Retrieves a bootstrap sample by index.
| Returns: |
-
tuple[ndarray, ndarray]
–
Tuple of (sample_indices, sample_weights).
|
| Raises: |
-
IndexError
–
If the index exceeds the number of bootstraps.
|
Source code in tfbpmodeling/bootstrapped_input_data.py
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302 | def get_bootstrap_sample(self, i: int) -> tuple[np.ndarray, np.ndarray]:
"""
Retrieves a bootstrap sample by index.
:param i: Bootstrap sample index.
:return: Tuple of (sample_indices, sample_weights).
:raises IndexError: If the index exceeds the number of bootstraps.
"""
if i >= self.n_bootstraps or i < 0:
raise IndexError(
f"Bootstrap index {i} out of range. Max: {self.n_bootstraps - 1}"
)
sampled_indices = self.bootstrap_indices[i]
sample_weights = self.get_sample_weight(i)
return (
sampled_indices,
sample_weights,
)
|
Retrieves sample weights for a bootstrap sample.
Source code in tfbpmodeling/bootstrapped_input_data.py
304
305
306
307
308
309
310
311
312
313
314 | def get_sample_weight(self, i: int) -> np.ndarray:
"""
Retrieves sample weights for a bootstrap sample.
:param i: Bootstrap sample index.
:return: Array of sample weights.
"""
if i >= self.n_bootstraps or i < 0:
raise IndexError(f"Sample weight index {i} out of range.")
return self.sample_weights[i]
|
Re-generate, randomly, bootstrap samples and sample weights.
This should be called if the response or predictors change.
Source code in tfbpmodeling/bootstrapped_input_data.py
316
317
318
319
320
321
322
323 | def regenerate(self) -> None:
"""
Re-generate, randomly, bootstrap samples and sample weights.
This should be called if the response or predictors change.
"""
self._generate_bootstrap_indices()
|
Saves only the bootstrap indices to a JSON file.
Saves the bootstrap indices to a JSON file. This can be used to persist the
bootstrap indices for later use, allowing for reproducibility in analyses.
| Parameters: |
-
filename
(str)
–
Path to the JSON file where the bootstrap indices will be saved. This will overwrite the file if it exists.
|
Source code in tfbpmodeling/bootstrapped_input_data.py
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343 | def save_indices(self, filename: str) -> None:
"""
Saves only the bootstrap indices to a JSON file.
Saves the bootstrap indices to a JSON file. This can be used to persist the
bootstrap indices for later use, allowing for reproducibility in analyses.
:param filename: Path to the JSON file where the bootstrap indices will be
saved. This will overwrite the file if it exists.
"""
data = {
"n_bootstraps": self.n_bootstraps,
"bootstrap_indices": [
indices.tolist() for indices in self._bootstrap_indices
],
}
with open(filename, "w") as f:
json.dump(data, f)
|
Saves the object as a JSON file.
Serializes the current state of the BootstrappedModelingInputData object to a
JSON file, including the response and model DataFrames, number of bootstraps,
bootstrap indices, and sample weights.
| Parameters: |
-
filename
(str)
–
Path to the JSON file where the object will be saved.
|
| Raises: |
-
ValueError
–
If the filename is not a valid path or if the object cannot be serialized. This method will overwrite the file if it exists.
|
Source code in tfbpmodeling/bootstrapped_input_data.py
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368 | def serialize(self, filename: str) -> None:
"""
Saves the object as a JSON file.
Serializes the current state of the BootstrappedModelingInputData object to a
JSON file, including the response and model DataFrames, number of bootstraps,
bootstrap indices, and sample weights.
:param filename: Path to the JSON file where the object will be saved.
:raises ValueError: If the filename is not a valid path or if the object cannot
be serialized. This method will overwrite the file if it exists.
"""
data = {
"response_df": self.response_df.to_dict(orient="split"),
"index_name": self.response_df.index.name,
"model_df": self.model_df.to_dict(orient="split"),
"n_bootstraps": self.n_bootstraps,
"normalize_sample_weights": self.normalize_sample_weights,
"random_state": self.random_state,
}
with open(filename, "w") as f:
json.dump(data, f)
|