from typing import Any, Dict, Optional, Tuple, Union
import numpy as np
from matplotlib import pyplot as plt
from numpy.random import BitGenerator, Generator, RandomState, SeedSequence
from .bulk_fit import fit_estimators
from .plot.plot_methods import make_plots
[docs]
class TailEstimatorSet:
"""
Class for running estimation with multiple estimator methods at once and creating a plot for comparison.
Parameters
----------
data : np.ndarray
The data to plot.
output_file_path : str, optional
File path to which plots should be saved. If None, the figure is not saved.
number_of_bins : int, default=30
Number of log-bins for degree distribution.
r_smooth : int, default=2
Integer parameter controlling the width of smoothing window.
Typically small value such as 2 or 3.
alpha : float, default=0.6
Parameter controlling the amount of "smoothing" for the kernel-type estimator.
Should be greater than 0.5.
hsteps : int, default=200
Parameter controlling number of bandwidth steps of the kernel-type estimator.
bootstrap_flag : bool, default=True
Flag to switch on/off double-bootstrap procedure.
t_bootstrap : float, default=0.5
Parameter controlling the size of the 2nd bootstrap.
Defined from n2 = n*(t_bootstrap).
r_bootstrap : int, default=500
Number of bootstrap resamplings for the 1st and 2nd bootstraps.
diagnostic_plots : bool, default=False
Flag to switch on/off generation of AMSE diagnostic plots.
eps_stop : float, default=1.0
Parameter controlling range of AMSE minimization.
Defined as the fraction of order statistics to consider
during the AMSE minimization step.
theta1 : float, default=0.01
Lower bound of plotting range, defined as k_min = ceil(n^theta1).
Overwritten if plots behave badly within the range.
theta2 : float, default=0.99
Upper bound of plotting range, defined as k_max = floor(n^theta2).
Overwritten if plots behave badly within the range.
verbose : bool, default=False
Flag controlling bootstrap verbosity.
noise_flag : bool, default=True
Switch on/off uniform noise in range [-5*10^(-p), 5*10^(-p)]
that is added to each data point. Used for integer-valued sequences.
p_noise : int, default=1
Integer parameter controlling noise amplitude.
savedata : bool, default=False
Flag to save data files in the directory with plots.
auto_plot : bool, default=False
Whether to create the plots immediately upon initialization.
base_seed: None | SeedSequence | BitGenerator | Generator | RandomState, default=None
Base random seed for reproducibility of bootstrap. Only used for methods with bootstrap.
"""
def __init__(
self,
data: np.ndarray = None,
output_file_path: Optional[str] = None,
number_of_bins: int = 30,
r_smooth: int = 2,
alpha: float = 0.6,
hsteps: int = 200,
bootstrap_flag: bool = True,
t_bootstrap: float = 0.5,
r_bootstrap: int = 500,
diagnostic_plots: bool = False,
eps_stop: float = 1.0,
theta1: float = 0.01,
theta2: float = 0.99,
verbose: bool = False,
noise_flag: bool = True,
p_noise: int = 1,
savedata: bool = False,
auto_plot: bool = False,
base_seed: Union[
None, SeedSequence, BitGenerator, Generator, RandomState
] = None,
):
# Store parameters
self.output_file_path = output_file_path
self.number_of_bins = number_of_bins
self.r_smooth = r_smooth
self.alpha = alpha
self.hsteps = hsteps
self.bootstrap_flag = bootstrap_flag
self.t_bootstrap = t_bootstrap
self.r_bootstrap = r_bootstrap
self.diagnostic_plots = diagnostic_plots
self.eps_stop = eps_stop
self.theta1 = theta1
self.theta2 = theta2
self.verbose = verbose
self.noise_flag = noise_flag
self.p_noise = p_noise
self.savedata = savedata
self.base_seed = base_seed
# Initialize data-related attributes
self.data = None
self.ordered_data = None
self.results = None
# Store figure and axes as None initially
self.fig = None
self.axes = None
# Fit data if provided
if data is not None:
self.fit(data)
# Create the plots immediately if auto_plot is True and data is provided
if auto_plot and data is not None:
self.plot()
[docs]
def fit(self, data: np.ndarray) -> "TailEstimatorSet":
"""Fit the estimators to the data.
Parameters
----------
data : np.ndarray
The data to fit the estimators to.
Returns
-------
self : TailEstimatorSet
The fitted estimator set.
"""
# Make sure data is a numpy array
data_array = np.asarray(data)
# Store the data
self.data = data_array
self.ordered_data = np.sort(data_array)[::-1]
# Fit the estimators
self.results = fit_estimators(
ordered_data=self.ordered_data,
number_of_bins=self.number_of_bins,
r_smooth=self.r_smooth,
alpha=self.alpha,
hsteps=self.hsteps,
bootstrap_flag=self.bootstrap_flag,
t_bootstrap=self.t_bootstrap,
r_bootstrap=self.r_bootstrap,
diagn_plots=self.diagnostic_plots,
eps_stop=self.eps_stop,
verbose=self.verbose,
noise_flag=self.noise_flag,
p_noise=self.p_noise,
base_seed=self.base_seed,
)
# Reset figure and axes
self.fig = None
self.axes = None
return self
[docs]
def plot(self) -> Tuple[plt.Figure, np.ndarray]:
"""Create and return the plots.
Returns
-------
fig : matplotlib.figure.Figure
The figure object.
axes : numpy.ndarray
Array of axes objects.
"""
if self.ordered_data is None:
raise ValueError("No data has been fitted. Call fit() first.")
self.fig, self.axes = self._create_plots()
return self.fig, self.axes
[docs]
def plot_diagnostics(self) -> Tuple[plt.Figure, np.ndarray]:
"""Create and return the diagnostic plots.
Returns
-------
fig_d : matplotlib.figure.Figure
The diagnostic figure object.
axes_d : numpy.ndarray
Array of diagnostic axes objects.
Raises
------
ValueError
If no data has been fitted or if bootstrap is not enabled.
"""
if self.ordered_data is None:
raise ValueError("No data has been fitted. Call fit() first.")
if not self.bootstrap_flag:
raise ValueError(
"Diagnostic plots require bootstrap to be enabled. Set bootstrap_flag=True when creating the TailEstimatorSet."
)
if not self.diagnostic_plots:
raise ValueError(
"Diagnostic plots are not enabled. Set diagnostic_plots=True when creating the TailEstimatorSet."
)
return self._create_diagnostic_plots()
def _create_diagnostic_plots(self) -> Tuple[plt.Figure, np.ndarray]:
"""Create the diagnostic plots using the make_diagnostic_plots function.
Returns
-------
fig_d : matplotlib.figure.Figure
The diagnostic figure object.
axes_d : numpy.ndarray
Array of diagnostic axes objects.
"""
from .plot.plot_methods import make_diagnostic_plots
return make_diagnostic_plots(
results=self.results,
output_file_path=self.output_file_path,
hsteps=self.hsteps,
bootstrap_flag=self.bootstrap_flag,
verbose=self.verbose,
noise_flag=self.noise_flag,
savedata=self.savedata,
)
def _create_plots(self) -> Tuple[plt.Figure, np.ndarray]:
"""Create the plots using the make_plots function.
Returns
-------
fig : matplotlib.figure.Figure
The figure object.
axes : numpy.ndarray
Array of axes objects.
"""
return make_plots(
ordered_data=self.ordered_data,
results=self.results,
output_file_path=self.output_file_path,
alpha=self.alpha,
bootstrap_flag=self.bootstrap_flag,
diagn_plots=self.diagnostic_plots,
theta1=self.theta1,
theta2=self.theta2,
verbose=self.verbose,
noise_flag=self.noise_flag,
savedata=self.savedata,
)
[docs]
def get_params(self) -> Dict[str, Any]:
"""Get the parameters used for plotting.
Returns
-------
Dict[str, Any]
Dictionary of parameters used for plotting.
"""
return {
"data_length": len(self.data) if self.data is not None else 0,
"number_of_bins": self.number_of_bins,
"r_smooth": self.r_smooth,
"alpha": self.alpha,
"hsteps": self.hsteps,
"bootstrap_flag": self.bootstrap_flag,
"t_bootstrap": self.t_bootstrap,
"r_bootstrap": self.r_bootstrap,
"diagnostic_plots": self.diagnostic_plots,
"eps_stop": self.eps_stop,
"theta1": self.theta1,
"theta2": self.theta2,
"verbose": self.verbose,
"noise_flag": self.noise_flag,
"p_noise": self.p_noise,
"savedata": self.savedata,
"base_seed": self.base_seed,
}
[docs]
def __repr__(self) -> str:
"""Return a string representation of the object."""
if self.data is None:
return "TailEstimatorSet(not fitted)"
else:
return f"TailEstimatorSet(data_length={len(self.data)})"
[docs]
def __call__(self) -> Tuple[plt.Figure, np.ndarray]:
"""Return the figure and axes when the object is called."""
if self.ordered_data is None:
raise ValueError("No data has been fitted. Call fit() first.")
if self.fig is None:
self.plot()
return self.fig, self.axes