LogoLogo
HomeBlogGitHub
latest
latest
  • New DOCS
  • What is Evidently?
  • Get Started
    • Evidently Cloud
      • Quickstart - LLM tracing
      • Quickstart - LLM evaluations
      • Quickstart - Data and ML checks
      • Quickstart - No-code evaluations
    • Evidently OSS
      • OSS Quickstart - LLM evals
      • OSS Quickstart - Data and ML monitoring
  • Presets
    • All Presets
    • Data Drift
    • Data Quality
    • Target Drift
    • Regression Performance
    • Classification Performance
    • NoTargetPerformance
    • Text Evals
    • Recommender System
  • Tutorials and Examples
    • All Tutorials
    • Tutorial - Tracing
    • Tutorial - Reports and Tests
    • Tutorial - Data & ML Monitoring
    • Tutorial - LLM Evaluation
    • Self-host ML Monitoring
    • LLM as a judge
    • LLM Regression Testing
  • Setup
    • Installation
    • Evidently Cloud
    • Self-hosting
  • User Guide
    • 📂Projects
      • Projects overview
      • Manage Projects
    • 📶Tracing
      • Tracing overview
      • Set up tracing
    • 🔢Input data
      • Input data overview
      • Column mapping
      • Data for Classification
      • Data for Recommendations
      • Load data to pandas
    • 🚦Tests and Reports
      • Reports and Tests Overview
      • Get a Report
      • Run a Test Suite
      • Evaluate Text Data
      • Output formats
      • Generate multiple Tests or Metrics
      • Run Evidently on Spark
    • 📊Evaluations
      • Evaluations overview
      • Generate snapshots
      • Run no code evals
    • 🔎Monitoring
      • Monitoring overview
      • Batch monitoring
      • Collector service
      • Scheduled evaluations
      • Send alerts
    • 📈Dashboard
      • Dashboard overview
      • Pre-built Tabs
      • Panel types
      • Adding Panels
    • 📚Datasets
      • Datasets overview
      • Work with Datasets
    • 🛠️Customization
      • Data drift parameters
      • Embeddings drift parameters
      • Feature importance in data drift
      • Text evals with LLM-as-judge
      • Text evals with HuggingFace
      • Add a custom text descriptor
      • Add a custom drift method
      • Add a custom Metric or Test
      • Customize JSON output
      • Show raw data in Reports
      • Add text comments to Reports
      • Change color schema
    • How-to guides
  • Reference
    • All tests
    • All metrics
      • Ranking metrics
    • Data drift algorithm
    • API Reference
      • evidently.calculations
        • evidently.calculations.stattests
      • evidently.metrics
        • evidently.metrics.classification_performance
        • evidently.metrics.data_drift
        • evidently.metrics.data_integrity
        • evidently.metrics.data_quality
        • evidently.metrics.regression_performance
      • evidently.metric_preset
      • evidently.options
      • evidently.pipeline
      • evidently.renderers
      • evidently.report
      • evidently.suite
      • evidently.test_preset
      • evidently.test_suite
      • evidently.tests
      • evidently.utils
  • Integrations
    • Integrations
      • Evidently integrations
      • Notebook environments
      • Evidently and Airflow
      • Evidently and MLflow
      • Evidently and DVCLive
      • Evidently and Metaflow
  • SUPPORT
    • Migration
    • Contact
    • F.A.Q.
    • Telemetry
    • Changelog
  • GitHub Page
  • Website
Powered by GitBook
On this page
  • Submodules
  • column_missing_values_metric module
  • class ColumnMissingValues(number_of_rows: int, different_missing_values: Dict[Any, int], number_of_different_missing_values: int, number_of_missing_values: int, share_of_missing_values: float)
  • class ColumnMissingValuesMetric(column_name: str, missing_values: Optional[list] = None, replace: bool = True)
  • class ColumnMissingValuesMetricRenderer(color_options: Optional[ColorOptions] = None)
  • class ColumnMissingValuesMetricResult(column_name: str, current: ColumnMissingValues, reference: Optional[ColumnMissingValues] = None)
  • column_regexp_metric module
  • class ColumnRegExpMetric(column_name: str, reg_exp: str, top: int = 10)
  • class ColumnRegExpMetricRenderer(color_options: Optional[ColorOptions] = None)
  • class DataIntegrityValueByRegexpMetricResult(column_name: str, reg_exp: str, top: int, current: DataIntegrityValueByRegexpStat, reference: Optional[DataIntegrityValueByRegexpStat] = None)
  • class DataIntegrityValueByRegexpStat(number_of_matched: int, number_of_not_matched: int, number_of_rows: int, table_of_matched: Dict[str, int], table_of_not_matched: Dict[str, int])
  • column_summary_metric module
  • class CategoricalCharacteristics(number_of_rows: int, count: int, unique: Optional[int], unique_percentage: Optional[float], most_common: Optional[object], most_common_percentage: Optional[float], missing: Optional[int], missing_percentage: Optional[float], new_in_current_values_count: Optional[int] = None, unused_in_current_values_count: Optional[int] = None)
  • class ColumnSummary(column_name: str, column_type: str, reference_characteristics: Union[NumericCharacteristics, CategoricalCharacteristics, DatetimeCharacteristics, NoneType], current_characteristics: Union[NumericCharacteristics, CategoricalCharacteristics, DatetimeCharacteristics], plot_data: DataQualityPlot)
  • class ColumnSummaryMetric(column_name: str)
  • class ColumnSummaryMetricRenderer(color_options: Optional[ColorOptions] = None)
  • class DataByTarget(data_for_plots: Dict[str, Dict[str, Union[list, pandas.core.frame.DataFrame]]], target_name: str, target_type: str)
  • class DataInTime(data_for_plots: Dict[str, pandas.core.frame.DataFrame], freq: str, datetime_name: str)
  • class DataQualityPlot(bins_for_hist: Dict[str, pandas.core.frame.DataFrame], data_in_time: Optional[DataInTime], data_by_target: Optional[DataByTarget], counts_of_values: Optional[Dict[str, pandas.core.frame.DataFrame]])
  • class DatetimeCharacteristics(number_of_rows: int, count: int, unique: Optional[int], unique_percentage: Optional[float], most_common: Optional[object], most_common_percentage: Optional[float], missing: Optional[int], missing_percentage: Optional[float], first: Optional[str], last: Optional[str])
  • class NumericCharacteristics(number_of_rows: int, count: int, mean: Union[float, int, NoneType], std: Union[float, int, NoneType], min: Union[float, int, NoneType], p25: Union[float, int, NoneType], p50: Union[float, int, NoneType], p75: Union[float, int, NoneType], max: Union[float, int, NoneType], unique: Optional[int], unique_percentage: Optional[float], missing: Optional[int], missing_percentage: Optional[float], infinite_count: Optional[int], infinite_percentage: Optional[float], most_common: Union[float, int, NoneType], most_common_percentage: Optional[float])
  • dataset_missing_values_metric module
  • class DatasetMissingValues(different_missing_values: Dict[Any, int], number_of_different_missing_values: int, different_missing_values_by_column: Dict[str, Dict[Any, int]], number_of_different_missing_values_by_column: Dict[str, int], number_of_missing_values: int, share_of_missing_values: float, number_of_missing_values_by_column: Dict[str, int], share_of_missing_values_by_column: Dict[str, float], number_of_rows: int, number_of_rows_with_missing_values: int, share_of_rows_with_missing_values: float, number_of_columns: int, columns_with_missing_values: List[str], number_of_columns_with_missing_values: int, share_of_columns_with_missing_values: float)
  • class DatasetMissingValuesMetric(missing_values: Optional[list] = None, replace: bool = True)
  • class DatasetMissingValuesMetricRenderer(color_options: Optional[ColorOptions] = None)
  • class DatasetMissingValuesMetricResult(current: DatasetMissingValues, reference: Optional[DatasetMissingValues] = None)
  • dataset_summary_metric module
  • class DatasetSummary(target: Optional[str], prediction: Optional[Union[str, Sequence[str]]], date_column: Optional[str], id_column: Optional[str], number_of_columns: int, number_of_rows: int, number_of_missing_values: int, number_of_categorical_columns: int, number_of_numeric_columns: int, number_of_datetime_columns: int, number_of_constant_columns: int, number_of_almost_constant_columns: int, number_of_duplicated_columns: int, number_of_almost_duplicated_columns: int, number_of_empty_rows: int, number_of_empty_columns: int, number_of_duplicated_rows: int, columns_type: dict, nans_by_columns: dict, number_uniques_by_columns: dict)
  • class DatasetSummaryMetric(almost_duplicated_threshold: float = 0.95, almost_constant_threshold: float = 0.95)
  • class DatasetSummaryMetricRenderer(color_options: Optional[ColorOptions] = None)
  • class DatasetSummaryMetricResult(almost_duplicated_threshold: float, current: DatasetSummary, reference: Optional[DatasetSummary] = None)
  1. Reference
  2. API Reference
  3. evidently.metrics

evidently.metrics.data_integrity

Previousevidently.metrics.data_driftNextevidently.metrics.data_quality

Last updated 2 months ago

Submodules

column_missing_values_metric module

class ColumnMissingValues(number_of_rows: int, different_missing_values: Dict[Any, int], number_of_different_missing_values: int, number_of_missing_values: int, share_of_missing_values: float)

Bases: object

Statistics about missing values in a column

Attributes:

different_missing_values : Dict[Any, int]

number_of_different_missing_values : int

number_of_missing_values : int

number_of_rows : int

share_of_missing_values : float

class ColumnMissingValuesMetric(column_name: str, missing_values: Optional[list] = None, replace: bool = True)

Bases: [ColumnMissingValuesMetricResult]

Count missing values in a column.

Missing value is a null or NaN value.

Calculate an amount of missing values kinds and count for such values. NA-types like numpy.NaN, pandas.NaT are counted as one type.

You can set you own missing values list with missing_values parameter. Value None in the list means that Pandas null values will be included in the calculation.

If replace parameter is False - add defaults to user’s list. If replace parameter is True - use values from missing_values list only.

Attributes:

DEFAULT_MISSING_VALUES = ['', inf, -inf, None]

column_name : str

missing_values : frozenset

Methods:

Attributes:

Methods:

render_html(obj: ColumnMissingValuesMetric)

render_json(obj: ColumnMissingValuesMetric)

class ColumnMissingValuesMetricResult(column_name: str, current: ColumnMissingValues, reference: Optional[ColumnMissingValues] = None)

Bases: object

Attributes:

column_name : str

current : ColumnMissingValues

reference : Optional[ColumnMissingValues] = None

column_regexp_metric module

class ColumnRegExpMetric(column_name: str, reg_exp: str, top: int = 10)

Count number of values in a column matched or not by a regular expression (regexp)

Attributes:

column_name : str

reg_exp : str

top : int

Methods:

Attributes:

Methods:

render_html(obj: ColumnRegExpMetric)

render_json(obj: ColumnRegExpMetric)

class DataIntegrityValueByRegexpMetricResult(column_name: str, reg_exp: str, top: int, current: DataIntegrityValueByRegexpStat, reference: Optional[DataIntegrityValueByRegexpStat] = None)

Bases: object

Attributes:

column_name : str

current : DataIntegrityValueByRegexpStat

reference : Optional[DataIntegrityValueByRegexpStat] = None

reg_exp : str

top : int

class DataIntegrityValueByRegexpStat(number_of_matched: int, number_of_not_matched: int, number_of_rows: int, table_of_matched: Dict[str, int], table_of_not_matched: Dict[str, int])

Bases: object

Statistics about matched by a regular expression values in a column for one dataset

Attributes:

number_of_matched : int

number_of_not_matched : int

number_of_rows : int

table_of_matched : Dict[str, int]

table_of_not_matched : Dict[str, int]

column_summary_metric module

class CategoricalCharacteristics(number_of_rows: int, count: int, unique: Optional[int], unique_percentage: Optional[float], most_common: Optional[object], most_common_percentage: Optional[float], missing: Optional[int], missing_percentage: Optional[float], new_in_current_values_count: Optional[int] = None, unused_in_current_values_count: Optional[int] = None)

Bases: object

Attributes:

count : int

missing : Optional[int]

missing_percentage : Optional[float]

most_common : Optional[object]

most_common_percentage : Optional[float]

new_in_current_values_count : Optional[int] = None

number_of_rows : int

unique : Optional[int]

unique_percentage : Optional[float]

unused_in_current_values_count : Optional[int] = None

class ColumnSummary(column_name: str, column_type: str, reference_characteristics: Union[NumericCharacteristics, CategoricalCharacteristics, DatetimeCharacteristics, NoneType], current_characteristics: Union[NumericCharacteristics, CategoricalCharacteristics, DatetimeCharacteristics], plot_data: DataQualityPlot)

Bases: object

Attributes:

column_name : str

column_type : str

current_characteristics : Union[NumericCharacteristics, CategoricalCharacteristics, DatetimeCharacteristics]

plot_data : DataQualityPlot

reference_characteristics : Optional[Union[NumericCharacteristics, CategoricalCharacteristics, DatetimeCharacteristics]]

class ColumnSummaryMetric(column_name: str)

Methods:

Attributes:

Methods:

render_html(obj: ColumnSummaryMetric)

render_json(obj: ColumnSummaryMetric)

class DataByTarget(data_for_plots: Dict[str, Dict[str, Union[list, pandas.core.frame.DataFrame]]], target_name: str, target_type: str)

Bases: object

Attributes:

data_for_plots : Dict[str, Dict[str, Union[list, DataFrame]]]

target_name : str

target_type : str

class DataInTime(data_for_plots: Dict[str, pandas.core.frame.DataFrame], freq: str, datetime_name: str)

Bases: object

Attributes:

data_for_plots : Dict[str, DataFrame]

datetime_name : str

freq : str

class DataQualityPlot(bins_for_hist: Dict[str, pandas.core.frame.DataFrame], data_in_time: Optional[DataInTime], data_by_target: Optional[DataByTarget], counts_of_values: Optional[Dict[str, pandas.core.frame.DataFrame]])

Bases: object

Attributes:

bins_for_hist : Dict[str, DataFrame]

counts_of_values : Optional[Dict[str, DataFrame]]

data_by_target : Optional[DataByTarget]

data_in_time : Optional[DataInTime]

class DatetimeCharacteristics(number_of_rows: int, count: int, unique: Optional[int], unique_percentage: Optional[float], most_common: Optional[object], most_common_percentage: Optional[float], missing: Optional[int], missing_percentage: Optional[float], first: Optional[str], last: Optional[str])

Bases: object

Attributes:

count : int

first : Optional[str]

last : Optional[str]

missing : Optional[int]

missing_percentage : Optional[float]

most_common : Optional[object]

most_common_percentage : Optional[float]

number_of_rows : int

unique : Optional[int]

unique_percentage : Optional[float]

class NumericCharacteristics(number_of_rows: int, count: int, mean: Union[float, int, NoneType], std: Union[float, int, NoneType], min: Union[float, int, NoneType], p25: Union[float, int, NoneType], p50: Union[float, int, NoneType], p75: Union[float, int, NoneType], max: Union[float, int, NoneType], unique: Optional[int], unique_percentage: Optional[float], missing: Optional[int], missing_percentage: Optional[float], infinite_count: Optional[int], infinite_percentage: Optional[float], most_common: Union[float, int, NoneType], most_common_percentage: Optional[float])

Bases: object

Attributes:

count : int

infinite_count : Optional[int]

infinite_percentage : Optional[float]

max : Optional[Union[float, int]]

mean : Optional[Union[float, int]]

min : Optional[Union[float, int]]

missing : Optional[int]

missing_percentage : Optional[float]

most_common : Optional[Union[float, int]]

most_common_percentage : Optional[float]

number_of_rows : int

p25 : Optional[Union[float, int]]

p50 : Optional[Union[float, int]]

p75 : Optional[Union[float, int]]

std : Optional[Union[float, int]]

unique : Optional[int]

unique_percentage : Optional[float]

dataset_missing_values_metric module

class DatasetMissingValues(different_missing_values: Dict[Any, int], number_of_different_missing_values: int, different_missing_values_by_column: Dict[str, Dict[Any, int]], number_of_different_missing_values_by_column: Dict[str, int], number_of_missing_values: int, share_of_missing_values: float, number_of_missing_values_by_column: Dict[str, int], share_of_missing_values_by_column: Dict[str, float], number_of_rows: int, number_of_rows_with_missing_values: int, share_of_rows_with_missing_values: float, number_of_columns: int, columns_with_missing_values: List[str], number_of_columns_with_missing_values: int, share_of_columns_with_missing_values: float)

Bases: object

Statistics about missed values in a dataset

Attributes:

columns_with_missing_values : List[str]

different_missing_values : Dict[Any, int]

different_missing_values_by_column : Dict[str, Dict[Any, int]]

number_of_columns : int

number_of_columns_with_missing_values : int

number_of_different_missing_values : int

number_of_different_missing_values_by_column : Dict[str, int]

number_of_missing_values : int

number_of_missing_values_by_column : Dict[str, int]

number_of_rows : int

number_of_rows_with_missing_values : int

share_of_columns_with_missing_values : float

share_of_missing_values : float

share_of_missing_values_by_column : Dict[str, float]

share_of_rows_with_missing_values : float

class DatasetMissingValuesMetric(missing_values: Optional[list] = None, replace: bool = True)

Count missing values in a dataset.

Missing value is a null or NaN value.

Calculate an amount of missing values kinds and count for such values. NA-types like numpy.NaN, pandas.NaT are counted as one type.

You can set you own missing values list with missing_values parameter. Value None in the list means that Pandas null values will be included in the calculation.

If replace parameter is False - add defaults to user’s list. If replace parameter is True - use values from missing_values list only.

Attributes:

DEFAULT_MISSING_VALUES = ['', inf, -inf, None]

missing_values : frozenset

Methods:

Attributes:

Methods:

render_html(obj: DatasetMissingValuesMetric)

render_json(obj: DatasetMissingValuesMetric)

class DatasetMissingValuesMetricResult(current: DatasetMissingValues, reference: Optional[DatasetMissingValues] = None)

Bases: object

Attributes:

current : DatasetMissingValues

reference : Optional[DatasetMissingValues] = None

dataset_summary_metric module

class DatasetSummary(target: Optional[str], prediction: Optional[Union[str, Sequence[str]]], date_column: Optional[str], id_column: Optional[str], number_of_columns: int, number_of_rows: int, number_of_missing_values: int, number_of_categorical_columns: int, number_of_numeric_columns: int, number_of_datetime_columns: int, number_of_constant_columns: int, number_of_almost_constant_columns: int, number_of_duplicated_columns: int, number_of_almost_duplicated_columns: int, number_of_empty_rows: int, number_of_empty_columns: int, number_of_duplicated_rows: int, columns_type: dict, nans_by_columns: dict, number_uniques_by_columns: dict)

Bases: object

Columns information in a dataset

Attributes:

columns_type : dict

date_column : Optional[str]

id_column : Optional[str]

nans_by_columns : dict

number_of_almost_constant_columns : int

number_of_almost_duplicated_columns : int

number_of_categorical_columns : int

number_of_columns : int

number_of_constant_columns : int

number_of_datetime_columns : int

number_of_duplicated_columns : int

number_of_duplicated_rows : int

number_of_empty_columns : int

number_of_empty_rows : int

number_of_missing_values : int

number_of_numeric_columns : int

number_of_rows : int

number_uniques_by_columns : dict

prediction : Optional[Union[str, Sequence[str]]]

target : Optional[str]

class DatasetSummaryMetric(almost_duplicated_threshold: float = 0.95, almost_constant_threshold: float = 0.95)

Common dataset(s) columns/features characteristics

Attributes:

almost_constant_threshold : float

almost_duplicated_threshold : float

Methods:

Attributes:

Methods:

render_html(obj: DatasetSummaryMetric)

render_json(obj: DatasetSummaryMetric)

class DatasetSummaryMetricResult(almost_duplicated_threshold: float, current: DatasetSummary, reference: Optional[DatasetSummary] = None)

Bases: object

Attributes:

almost_duplicated_threshold : float

current : DatasetSummary

reference : Optional[DatasetSummary] = None

calculate(data: )

class ColumnMissingValuesMetricRenderer(color_options: Optional[] = None)

Bases:

color_options :

Bases: [DataIntegrityValueByRegexpMetricResult]

calculate(data: )

class ColumnRegExpMetricRenderer(color_options: Optional[] = None)

Bases:

color_options :

Bases: [ColumnSummary]

calculate(data: )

static map_data(stats: )

class ColumnSummaryMetricRenderer(color_options: Optional[] = None)

Bases:

color_options :

Bases: [DatasetMissingValuesMetricResult]

calculate(data: )

class DatasetMissingValuesMetricRenderer(color_options: Optional[] = None)

Bases:

color_options :

Bases: [DatasetSummaryMetricResult]

calculate(data: )

class DatasetSummaryMetricRenderer(color_options: Optional[] = None)

Bases:

color_options :

ColorOptions
ColorOptions
ColorOptions
ColorOptions
ColorOptions
ColorOptions
ColorOptions
ColorOptions
ColorOptions
ColorOptions
MetricRenderer
MetricRenderer
MetricRenderer
MetricRenderer
MetricRenderer
FeatureQualityStats
Metric
InputData
Metric
InputData
Metric
InputData
Metric
InputData
Metric
InputData