Source code for sdmetrics.demos

"""Functions to load demos with real and synthetic data of different data modalities."""

import pathlib
import pickle


def load_demo(modality='multi_table', new_metadata=False):
    """Load demo data of the indicated data modality.

    By default, multi_table demo is loaded.

    Output is the real data, the synthetic data and the metadata dict.

    Args:
        modality (str):
            Data modality to load. It can be multi_table, single_table
            or timeseries.
        new_metadata (bool):
            Whether or not to return the new metadata.

    Returns:
        tuple:
            Real data, Synthetic data, Metadata.
    """
    demo_path = pathlib.Path(__file__).parent / 'demos' / f'{modality}.pkl'
    with open(demo_path, 'rb') as demo_file:
        # Return the new metadata if requested.
        last_index = 4 if new_metadata else 3
        return pickle.load(demo_file)[:last_index]


[docs]def load_multi_table_demo(): """Load multi-table demo data. The dataset is the ``SDV`` demo data, which consists of three tables, ``users``, ``sessions`` and ``transactions``, with simulated data about user browsing sessions and transactions made during those sessions, and a synthetic copy of it made by the ``sdv.relational.HMA1`` model. Returns: tuple: * dict: Real tables. * dict: Synthetic tables. * dict: Dataset Metadata. """ return load_demo('multi_table')
[docs]def load_single_table_demo(): """Load multi-table demo data. The dataset is the ``student_placements`` tabular demo from SDV and a synthetic copy of it made using he ``sdv.tabular.CTGAN`` model. Returns: tuple: * pandas.DataFrame: Real table. * pandas.DataFrame: Synthetic table. * dict: Table Metadata. """ return load_demo('single_table')
[docs]def load_timeseries_demo(): """Load time series demo data. The dataset is the ``sunglasses`` demo data from the DeepEcho project, which contains simulated data from a chain of sunglasses stores, and a synthetic copy of it made by the ``sdv.timeseries.PAR`` model. It has 1 entity column, 1 context column and a datetime sequence index. Returns: tuple: * pandas.DataFrame: Real table. * pandas.DataFrame: Synthetic table. * dict: Table Metadata. """ return load_demo('timeseries')