evidently.calculations
Subpackages
Submodules
classification_performance module
class ConfusionMatrix(labels: Sequence[Union[str, int]], values: list)
Attributes:
class DatasetClassificationQuality(accuracy: float, precision: float, recall: float, f1: float, roc_auc: Optional[float] = None, log_loss: Optional[float] = None, tpr: Optional[float] = None, tnr: Optional[float] = None, fpr: Optional[float] = None, fnr: Optional[float] = None, rate_plots_data: Optional[Dict] = None, plot_data: Optional[Dict] = None)
Attributes:
class PredictionData(predictions: pandas.core.series.Series, prediction_probas: Optional[pandas.core.frame.DataFrame], labels: List[Union[str, int]])
Attributes:
calculate_confusion_by_classes(confusion_matrix: ndarray, class_names: Sequence[Union[str, int]])
calculate_matrix(target: Series, prediction: Series, labels: List[Union[str, int]])
calculate_metrics(column_mapping: ColumnMapping, confusion_matrix: ConfusionMatrix, target: Series, prediction: PredictionData)
calculate_pr_table(binded)
collect_plot_data(prediction_probas: DataFrame)
get_prediction_data(data: DataFrame, data_columns: DatasetColumns, pos_label: Optional[Union[str, int]], threshold: float = 0.5)
k_probability_threshold(prediction_probas: DataFrame, k: Union[int, float])
threshold_probability_labels(prediction_probas: DataFrame, pos_label: Union[str, int], neg_label: Union[str, int], threshold: float)
data_drift module
class ColumnDataDriftMetrics(column_name: str, column_type: str, stattest_name: str, drift_score: float, drift_detected: bool, threshold: float, current_distribution: Distribution, reference_distribution: Distribution, current_small_distribution: Optional[list] = None, reference_small_distribution: Optional[list] = None, current_scatter: Optional[Dict[str, list]] = None, x_name: Optional[str] = None, plot_shape: Optional[Dict[str, float]] = None, current_correlations: Optional[Dict[str, float]] = None, reference_correlations: Optional[Dict[str, float]] = None)
Attributes:
class DatasetDrift(number_of_drifted_columns: int, dataset_drift_score: float, dataset_drift: bool)
Attributes:
class DatasetDriftMetrics(number_of_columns: int, number_of_drifted_columns: int, share_of_drifted_columns: float, dataset_drift: bool, drift_by_columns: Dict[str, ColumnDataDriftMetrics], options: DataDriftOptions, dataset_columns: DatasetColumns)
Attributes:
ensure_prediction_column_is_string(*, prediction_column: Optional[Union[str, Sequence]], current_data: DataFrame, reference_data: DataFrame, threshold: float = 0.5)
get_dataset_drift(drift_metrics, drift_share=0.5)
get_drift_for_columns(*, current_data: DataFrame, reference_data: DataFrame, dataset_columns: DatasetColumns, data_drift_options: DataDriftOptions, drift_share_threshold: Optional[float] = None, columns: Optional[List[str]] = None)
get_one_column_drift(*, current_data: DataFrame, reference_data: DataFrame, column_name: str, options: DataDriftOptions, dataset_columns: DatasetColumns, column_type: Optional[str] = None)
data_integration module
get_number_of_all_pandas_missed_values(dataset: DataFrame)
get_number_of_almost_constant_columns(dataset: DataFrame, threshold: float)
get_number_of_almost_duplicated_columns(dataset: DataFrame, threshold: float)
get_number_of_constant_columns(dataset: DataFrame)
get_number_of_duplicated_columns(dataset: DataFrame)
get_number_of_empty_columns(dataset: DataFrame)
data_quality module
class ColumnCorrelations(column_name: str, kind: str, values: Distribution)
Attributes:
class DataQualityGetPlotData()
Methods:
class DataQualityPlot(bins_for_hist: Dict[str, pandas.core.frame.DataFrame])
Attributes:
class DataQualityStats(rows_count: int, num_features_stats: Optional[Dict[str, FeatureQualityStats]] = None, cat_features_stats: Optional[Dict[str, FeatureQualityStats]] = None, datetime_features_stats: Optional[Dict[str, FeatureQualityStats]] = None, target_stats: Optional[Dict[str, FeatureQualityStats]] = None, prediction_stats: Optional[Dict[str, FeatureQualityStats]] = None)
Attributes:
Methods:
class FeatureQualityStats(feature_type: str, number_of_rows: int = 0, count: int = 0, infinite_count: Optional[int] = None, infinite_percentage: Optional[float] = None, missing_count: Optional[int] = None, missing_percentage: Optional[float] = None, unique_count: Optional[int] = None, unique_percentage: Optional[float] = None, percentile_25: Optional[float] = None, percentile_50: Optional[float] = None, percentile_75: Optional[float] = None, max: Optional[Union[int, float, bool, str]] = None, min: Optional[Union[int, float, bool, str]] = None, mean: Optional[float] = None, most_common_value: Optional[Union[int, float, bool, str]] = None, most_common_value_percentage: Optional[float] = None, std: Optional[float] = None, most_common_not_null_value: Optional[Union[int, float, bool, str]] = None, most_common_not_null_value_percentage: Optional[float] = None, new_in_current_values_count: Optional[int] = None, unused_in_current_values_count: Optional[int] = None)
Attributes:
Methods:
calculate_category_column_correlations(column_name: str, dataset: DataFrame, columns: List[str])
calculate_column_distribution(column: Series, column_type: str)
calculate_correlations(dataset: DataFrame, columns: DatasetColumns)
calculate_cramer_v_correlation(column_name: str, dataset: DataFrame, columns: List[str])
calculate_data_quality_stats(dataset: DataFrame, columns: DatasetColumns, task: Optional[str])
calculate_numerical_column_correlations(column_name: str, dataset: DataFrame, columns: List[str])
get_features_stats(feature: Series, feature_type: str)
get_pairwise_correlation(df, func: Callable[[Series, Series], float])
get_rows_count(data: Union[DataFrame, Series])
regression_performance module
class ErrorWithQuantiles(error, quantile_top, quantile_other)
class FeatureBias(feature_type: str, majority: float, under: float, over: float, range: float)
Attributes:
Methods:
class RegressionPerformanceMetrics(mean_error: float, mean_abs_error: float, mean_abs_perc_error: float, error_std: float, abs_error_max: float, abs_error_std: float, abs_perc_error_std: float, error_normality: dict, underperformance: dict, error_bias: dict)
Attributes:
calculate_regression_performance(dataset: DataFrame, columns: DatasetColumns, error_bias_prefix: str)
error_bias_table(dataset, err_quantiles, num_feature_names, cat_feature_names)
error_with_quantiles(dataset, prediction_column, target_column, quantile: float)
Last updated