src.fairreckitlib.data.data_modifier

This module contains the base class and factory for data modification.

Classes:

DataModifier: the base class for data modifying.
DataModifierFactory: the factory that creates data modifiers related to a dataset matrix.

This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)

  1"""This module contains the base class and factory for data modification.
  2
  3Classes:
  4
  5    DataModifier: the base class for data modifying.
  6    DataModifierFactory: the factory that creates data modifiers related to a dataset matrix.
  7
  8This program has been developed by students from the bachelor Computer Science at
  9Utrecht University within the Software Project course.
 10© Copyright Utrecht University (Department of Information and Computing Sciences)
 11"""
 12
 13from abc import ABCMeta, abstractmethod
 14from typing import Any, Callable, Dict
 15
 16import pandas as pd
 17
 18from ..core.config.config_factories import Factory, GroupFactory, FUNC_CREATE_PARAMS
 19from ..core.config.config_parameters import ConfigParameters
 20from .set.dataset import Dataset
 21from .set.dataset_registry import DataRegistry
 22
 23class DataModifier(metaclass=ABCMeta):
 24    """Base class for FairRecKit data modifiers.
 25
 26    Public methods:
 27
 28    get_name
 29    get_params
 30    run
 31    """
 32
 33    def __init__(self, name: str, params: Dict[str, Any]):
 34        """Construct the DataModifier.
 35
 36        Args:
 37            name: the name of the modifier.
 38            params: the modifier parameters.
 39        """
 40        self.name = name
 41        self.params = params
 42
 43    def get_name(self) -> str:
 44        """Get the name of the modifier.
 45
 46        Returns:
 47            the modifier name.
 48        """
 49        return self.name
 50
 51    def get_params(self) -> Dict[str, Any]:
 52        """Get the parameters of the modifier.
 53
 54        Returns:
 55            the modifier parameters.
 56        """
 57        return dict(self.params)
 58
 59    @abstractmethod
 60    def run(self, dataframe: pd.DataFrame) -> Any:
 61        """Run the modifier on the specified dataframe.
 62
 63        Args:
 64            dataframe: source df to modify.
 65
 66        Returns:
 67            any modification to the dataframe.
 68        """
 69        raise NotImplementedError()
 70
 71
 72class DataModifierFactory(Factory):
 73    """Factory for data modifier creation.
 74
 75    The intended use is to associate the factory with a specific matrix of a dataset.
 76    Both the created parameters and the created data modifiers are supplied
 77    with a reference to the dataset and the name of the matrix they belong to.
 78    """
 79
 80    def __init__(self, matrix_name: str, dataset: Dataset):
 81        """Construct the DataModifierFactory.
 82
 83        Args:
 84            matrix_name: the name of the matrix that it relates to.
 85            dataset: the dataset associated with the matrix.
 86        """
 87        Factory.__init__(self, matrix_name)
 88        self.dataset = dataset
 89
 90    def create(self, obj_name: str, obj_params: Dict[str, Any]=None, **kwargs) -> DataModifier:
 91        """Create and return a new data modifier with the specified name.
 92
 93        The specified parameters are expected to be of the same structure as the defaults
 94        of the ConfigParameters that are associated with the desired data modifier.
 95        When no parameters are specified it will use the data modifier's defaults.
 96
 97        Args:
 98            obj_name: the name of the data modifier to create.
 99            obj_params: the parameters of the data modifier.
100
101        Keyword Args:
102            Any: extra arguments that need to be passed to the data modifier on creation.
103
104        Returns:
105            the created data modifier or None when it does not exist.
106        """
107        kwargs['dataset'] = self.dataset
108        kwargs['matrix_name'] = self.factory_name
109        return Factory.create(self, obj_name, obj_params, **kwargs)
110
111    def on_create_params(self, obj_name: str) -> ConfigParameters:
112        """Create parameters for the data modifier with the specified name.
113
114        Args:
115            obj_name: name of the data modifier to create parameters for.
116
117        Returns:
118            the configuration parameters of the object or empty parameters when it does not exist.
119        """
120        kwargs = {
121            'column_name': obj_name,
122            'dataset': self.dataset,
123            'matrix_name': self.factory_name
124        }
125        return self.factory[obj_name][FUNC_CREATE_PARAMS](**kwargs)
126
127
128def create_data_modifier_factory(
129        data_registry: DataRegistry,
130        factory_name: str,
131        func_on_add_entries: Callable[[DataModifierFactory, Dataset], None]) -> GroupFactory:
132    """Create a data modifier factory for each dataset-matrix pair.
133
134    Args:
135
136        data_registry: the data registry with available datasets.
137        factory_name: the name of the data modifier factory.
138        func_on_add_entries: callback for each dataset-matrix pair to add data modifiers.
139
140    Returns:
141        the factory with all available data modifiers per dataset-matrix pair.
142    """
143    factory = GroupFactory(factory_name)
144
145    for dataset_name in data_registry.get_available_sets():
146        dataset = data_registry.get_set(dataset_name)
147        dataset_factory = GroupFactory(dataset.get_name())
148
149        factory.add_factory(dataset_factory)
150
151        for matrix_name in dataset.get_available_matrices():
152            matrix_factory = DataModifierFactory(matrix_name, dataset)
153            func_on_add_entries(matrix_factory, dataset)
154            dataset_factory.add_factory(matrix_factory)
155
156    return factory
class DataModifier:
24class DataModifier(metaclass=ABCMeta):
25    """Base class for FairRecKit data modifiers.
26
27    Public methods:
28
29    get_name
30    get_params
31    run
32    """
33
34    def __init__(self, name: str, params: Dict[str, Any]):
35        """Construct the DataModifier.
36
37        Args:
38            name: the name of the modifier.
39            params: the modifier parameters.
40        """
41        self.name = name
42        self.params = params
43
44    def get_name(self) -> str:
45        """Get the name of the modifier.
46
47        Returns:
48            the modifier name.
49        """
50        return self.name
51
52    def get_params(self) -> Dict[str, Any]:
53        """Get the parameters of the modifier.
54
55        Returns:
56            the modifier parameters.
57        """
58        return dict(self.params)
59
60    @abstractmethod
61    def run(self, dataframe: pd.DataFrame) -> Any:
62        """Run the modifier on the specified dataframe.
63
64        Args:
65            dataframe: source df to modify.
66
67        Returns:
68            any modification to the dataframe.
69        """
70        raise NotImplementedError()

Base class for FairRecKit data modifiers.

Public methods:

get_name get_params run

DataModifier(name: str, params: Dict[str, Any])
34    def __init__(self, name: str, params: Dict[str, Any]):
35        """Construct the DataModifier.
36
37        Args:
38            name: the name of the modifier.
39            params: the modifier parameters.
40        """
41        self.name = name
42        self.params = params

Construct the DataModifier.

Args: name: the name of the modifier. params: the modifier parameters.

def get_name(self) -> str:
44    def get_name(self) -> str:
45        """Get the name of the modifier.
46
47        Returns:
48            the modifier name.
49        """
50        return self.name

Get the name of the modifier.

Returns: the modifier name.

def get_params(self) -> Dict[str, Any]:
52    def get_params(self) -> Dict[str, Any]:
53        """Get the parameters of the modifier.
54
55        Returns:
56            the modifier parameters.
57        """
58        return dict(self.params)

Get the parameters of the modifier.

Returns: the modifier parameters.

@abstractmethod
def run(self, dataframe: pandas.core.frame.DataFrame) -> Any:
60    @abstractmethod
61    def run(self, dataframe: pd.DataFrame) -> Any:
62        """Run the modifier on the specified dataframe.
63
64        Args:
65            dataframe: source df to modify.
66
67        Returns:
68            any modification to the dataframe.
69        """
70        raise NotImplementedError()

Run the modifier on the specified dataframe.

Args: dataframe: source df to modify.

Returns: any modification to the dataframe.

class DataModifierFactory(src.fairreckitlib.core.config.config_factories.Factory):
 73class DataModifierFactory(Factory):
 74    """Factory for data modifier creation.
 75
 76    The intended use is to associate the factory with a specific matrix of a dataset.
 77    Both the created parameters and the created data modifiers are supplied
 78    with a reference to the dataset and the name of the matrix they belong to.
 79    """
 80
 81    def __init__(self, matrix_name: str, dataset: Dataset):
 82        """Construct the DataModifierFactory.
 83
 84        Args:
 85            matrix_name: the name of the matrix that it relates to.
 86            dataset: the dataset associated with the matrix.
 87        """
 88        Factory.__init__(self, matrix_name)
 89        self.dataset = dataset
 90
 91    def create(self, obj_name: str, obj_params: Dict[str, Any]=None, **kwargs) -> DataModifier:
 92        """Create and return a new data modifier with the specified name.
 93
 94        The specified parameters are expected to be of the same structure as the defaults
 95        of the ConfigParameters that are associated with the desired data modifier.
 96        When no parameters are specified it will use the data modifier's defaults.
 97
 98        Args:
 99            obj_name: the name of the data modifier to create.
100            obj_params: the parameters of the data modifier.
101
102        Keyword Args:
103            Any: extra arguments that need to be passed to the data modifier on creation.
104
105        Returns:
106            the created data modifier or None when it does not exist.
107        """
108        kwargs['dataset'] = self.dataset
109        kwargs['matrix_name'] = self.factory_name
110        return Factory.create(self, obj_name, obj_params, **kwargs)
111
112    def on_create_params(self, obj_name: str) -> ConfigParameters:
113        """Create parameters for the data modifier with the specified name.
114
115        Args:
116            obj_name: name of the data modifier to create parameters for.
117
118        Returns:
119            the configuration parameters of the object or empty parameters when it does not exist.
120        """
121        kwargs = {
122            'column_name': obj_name,
123            'dataset': self.dataset,
124            'matrix_name': self.factory_name
125        }
126        return self.factory[obj_name][FUNC_CREATE_PARAMS](**kwargs)

Factory for data modifier creation.

The intended use is to associate the factory with a specific matrix of a dataset. Both the created parameters and the created data modifiers are supplied with a reference to the dataset and the name of the matrix they belong to.

DataModifierFactory( matrix_name: str, dataset: src.fairreckitlib.data.set.dataset.Dataset)
81    def __init__(self, matrix_name: str, dataset: Dataset):
82        """Construct the DataModifierFactory.
83
84        Args:
85            matrix_name: the name of the matrix that it relates to.
86            dataset: the dataset associated with the matrix.
87        """
88        Factory.__init__(self, matrix_name)
89        self.dataset = dataset

Construct the DataModifierFactory.

Args: matrix_name: the name of the matrix that it relates to. dataset: the dataset associated with the matrix.

def create( self, obj_name: str, obj_params: Dict[str, Any] = None, **kwargs) -> src.fairreckitlib.data.data_modifier.DataModifier:
 91    def create(self, obj_name: str, obj_params: Dict[str, Any]=None, **kwargs) -> DataModifier:
 92        """Create and return a new data modifier with the specified name.
 93
 94        The specified parameters are expected to be of the same structure as the defaults
 95        of the ConfigParameters that are associated with the desired data modifier.
 96        When no parameters are specified it will use the data modifier's defaults.
 97
 98        Args:
 99            obj_name: the name of the data modifier to create.
100            obj_params: the parameters of the data modifier.
101
102        Keyword Args:
103            Any: extra arguments that need to be passed to the data modifier on creation.
104
105        Returns:
106            the created data modifier or None when it does not exist.
107        """
108        kwargs['dataset'] = self.dataset
109        kwargs['matrix_name'] = self.factory_name
110        return Factory.create(self, obj_name, obj_params, **kwargs)

Create and return a new data modifier with the specified name.

The specified parameters are expected to be of the same structure as the defaults of the ConfigParameters that are associated with the desired data modifier. When no parameters are specified it will use the data modifier's defaults.

Args: obj_name: the name of the data modifier to create. obj_params: the parameters of the data modifier.

Keyword Args: Any: extra arguments that need to be passed to the data modifier on creation.

Returns: the created data modifier or None when it does not exist.

def on_create_params( self, obj_name: str) -> src.fairreckitlib.core.config.config_parameters.ConfigParameters:
112    def on_create_params(self, obj_name: str) -> ConfigParameters:
113        """Create parameters for the data modifier with the specified name.
114
115        Args:
116            obj_name: name of the data modifier to create parameters for.
117
118        Returns:
119            the configuration parameters of the object or empty parameters when it does not exist.
120        """
121        kwargs = {
122            'column_name': obj_name,
123            'dataset': self.dataset,
124            'matrix_name': self.factory_name
125        }
126        return self.factory[obj_name][FUNC_CREATE_PARAMS](**kwargs)

Create parameters for the data modifier with the specified name.

Args: obj_name: name of the data modifier to create parameters for.

Returns: the configuration parameters of the object or empty parameters when it does not exist.

def create_data_modifier_factory( data_registry: src.fairreckitlib.data.set.dataset_registry.DataRegistry, factory_name: str, func_on_add_entries: Callable[[src.fairreckitlib.data.data_modifier.DataModifierFactory, src.fairreckitlib.data.set.dataset.Dataset], NoneType]) -> src.fairreckitlib.core.config.config_factories.GroupFactory:
129def create_data_modifier_factory(
130        data_registry: DataRegistry,
131        factory_name: str,
132        func_on_add_entries: Callable[[DataModifierFactory, Dataset], None]) -> GroupFactory:
133    """Create a data modifier factory for each dataset-matrix pair.
134
135    Args:
136
137        data_registry: the data registry with available datasets.
138        factory_name: the name of the data modifier factory.
139        func_on_add_entries: callback for each dataset-matrix pair to add data modifiers.
140
141    Returns:
142        the factory with all available data modifiers per dataset-matrix pair.
143    """
144    factory = GroupFactory(factory_name)
145
146    for dataset_name in data_registry.get_available_sets():
147        dataset = data_registry.get_set(dataset_name)
148        dataset_factory = GroupFactory(dataset.get_name())
149
150        factory.add_factory(dataset_factory)
151
152        for matrix_name in dataset.get_available_matrices():
153            matrix_factory = DataModifierFactory(matrix_name, dataset)
154            func_on_add_entries(matrix_factory, dataset)
155            dataset_factory.add_factory(matrix_factory)
156
157    return factory

Create a data modifier factory for each dataset-matrix pair.

Args:

data_registry: the data registry with available datasets.
factory_name: the name of the data modifier factory.
func_on_add_entries: callback for each dataset-matrix pair to add data modifiers.

Returns: the factory with all available data modifiers per dataset-matrix pair.