src.fairreckitlib.data.data_modifier
This module contains the base class and factory for data modification.
Classes:
DataModifier: the base class for data modifying.
DataModifierFactory: the factory that creates data modifiers related to a dataset matrix.
This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)
1"""This module contains the base class and factory for data modification. 2 3Classes: 4 5 DataModifier: the base class for data modifying. 6 DataModifierFactory: the factory that creates data modifiers related to a dataset matrix. 7 8This program has been developed by students from the bachelor Computer Science at 9Utrecht University within the Software Project course. 10© Copyright Utrecht University (Department of Information and Computing Sciences) 11""" 12 13from abc import ABCMeta, abstractmethod 14from typing import Any, Callable, Dict 15 16import pandas as pd 17 18from ..core.config.config_factories import Factory, GroupFactory, FUNC_CREATE_PARAMS 19from ..core.config.config_parameters import ConfigParameters 20from .set.dataset import Dataset 21from .set.dataset_registry import DataRegistry 22 23class DataModifier(metaclass=ABCMeta): 24 """Base class for FairRecKit data modifiers. 25 26 Public methods: 27 28 get_name 29 get_params 30 run 31 """ 32 33 def __init__(self, name: str, params: Dict[str, Any]): 34 """Construct the DataModifier. 35 36 Args: 37 name: the name of the modifier. 38 params: the modifier parameters. 39 """ 40 self.name = name 41 self.params = params 42 43 def get_name(self) -> str: 44 """Get the name of the modifier. 45 46 Returns: 47 the modifier name. 48 """ 49 return self.name 50 51 def get_params(self) -> Dict[str, Any]: 52 """Get the parameters of the modifier. 53 54 Returns: 55 the modifier parameters. 56 """ 57 return dict(self.params) 58 59 @abstractmethod 60 def run(self, dataframe: pd.DataFrame) -> Any: 61 """Run the modifier on the specified dataframe. 62 63 Args: 64 dataframe: source df to modify. 65 66 Returns: 67 any modification to the dataframe. 68 """ 69 raise NotImplementedError() 70 71 72class DataModifierFactory(Factory): 73 """Factory for data modifier creation. 74 75 The intended use is to associate the factory with a specific matrix of a dataset. 76 Both the created parameters and the created data modifiers are supplied 77 with a reference to the dataset and the name of the matrix they belong to. 78 """ 79 80 def __init__(self, matrix_name: str, dataset: Dataset): 81 """Construct the DataModifierFactory. 82 83 Args: 84 matrix_name: the name of the matrix that it relates to. 85 dataset: the dataset associated with the matrix. 86 """ 87 Factory.__init__(self, matrix_name) 88 self.dataset = dataset 89 90 def create(self, obj_name: str, obj_params: Dict[str, Any]=None, **kwargs) -> DataModifier: 91 """Create and return a new data modifier with the specified name. 92 93 The specified parameters are expected to be of the same structure as the defaults 94 of the ConfigParameters that are associated with the desired data modifier. 95 When no parameters are specified it will use the data modifier's defaults. 96 97 Args: 98 obj_name: the name of the data modifier to create. 99 obj_params: the parameters of the data modifier. 100 101 Keyword Args: 102 Any: extra arguments that need to be passed to the data modifier on creation. 103 104 Returns: 105 the created data modifier or None when it does not exist. 106 """ 107 kwargs['dataset'] = self.dataset 108 kwargs['matrix_name'] = self.factory_name 109 return Factory.create(self, obj_name, obj_params, **kwargs) 110 111 def on_create_params(self, obj_name: str) -> ConfigParameters: 112 """Create parameters for the data modifier with the specified name. 113 114 Args: 115 obj_name: name of the data modifier to create parameters for. 116 117 Returns: 118 the configuration parameters of the object or empty parameters when it does not exist. 119 """ 120 kwargs = { 121 'column_name': obj_name, 122 'dataset': self.dataset, 123 'matrix_name': self.factory_name 124 } 125 return self.factory[obj_name][FUNC_CREATE_PARAMS](**kwargs) 126 127 128def create_data_modifier_factory( 129 data_registry: DataRegistry, 130 factory_name: str, 131 func_on_add_entries: Callable[[DataModifierFactory, Dataset], None]) -> GroupFactory: 132 """Create a data modifier factory for each dataset-matrix pair. 133 134 Args: 135 136 data_registry: the data registry with available datasets. 137 factory_name: the name of the data modifier factory. 138 func_on_add_entries: callback for each dataset-matrix pair to add data modifiers. 139 140 Returns: 141 the factory with all available data modifiers per dataset-matrix pair. 142 """ 143 factory = GroupFactory(factory_name) 144 145 for dataset_name in data_registry.get_available_sets(): 146 dataset = data_registry.get_set(dataset_name) 147 dataset_factory = GroupFactory(dataset.get_name()) 148 149 factory.add_factory(dataset_factory) 150 151 for matrix_name in dataset.get_available_matrices(): 152 matrix_factory = DataModifierFactory(matrix_name, dataset) 153 func_on_add_entries(matrix_factory, dataset) 154 dataset_factory.add_factory(matrix_factory) 155 156 return factory
24class DataModifier(metaclass=ABCMeta): 25 """Base class for FairRecKit data modifiers. 26 27 Public methods: 28 29 get_name 30 get_params 31 run 32 """ 33 34 def __init__(self, name: str, params: Dict[str, Any]): 35 """Construct the DataModifier. 36 37 Args: 38 name: the name of the modifier. 39 params: the modifier parameters. 40 """ 41 self.name = name 42 self.params = params 43 44 def get_name(self) -> str: 45 """Get the name of the modifier. 46 47 Returns: 48 the modifier name. 49 """ 50 return self.name 51 52 def get_params(self) -> Dict[str, Any]: 53 """Get the parameters of the modifier. 54 55 Returns: 56 the modifier parameters. 57 """ 58 return dict(self.params) 59 60 @abstractmethod 61 def run(self, dataframe: pd.DataFrame) -> Any: 62 """Run the modifier on the specified dataframe. 63 64 Args: 65 dataframe: source df to modify. 66 67 Returns: 68 any modification to the dataframe. 69 """ 70 raise NotImplementedError()
Base class for FairRecKit data modifiers.
Public methods:
get_name get_params run
34 def __init__(self, name: str, params: Dict[str, Any]): 35 """Construct the DataModifier. 36 37 Args: 38 name: the name of the modifier. 39 params: the modifier parameters. 40 """ 41 self.name = name 42 self.params = params
Construct the DataModifier.
Args: name: the name of the modifier. params: the modifier parameters.
44 def get_name(self) -> str: 45 """Get the name of the modifier. 46 47 Returns: 48 the modifier name. 49 """ 50 return self.name
Get the name of the modifier.
Returns: the modifier name.
52 def get_params(self) -> Dict[str, Any]: 53 """Get the parameters of the modifier. 54 55 Returns: 56 the modifier parameters. 57 """ 58 return dict(self.params)
Get the parameters of the modifier.
Returns: the modifier parameters.
60 @abstractmethod 61 def run(self, dataframe: pd.DataFrame) -> Any: 62 """Run the modifier on the specified dataframe. 63 64 Args: 65 dataframe: source df to modify. 66 67 Returns: 68 any modification to the dataframe. 69 """ 70 raise NotImplementedError()
Run the modifier on the specified dataframe.
Args: dataframe: source df to modify.
Returns: any modification to the dataframe.
73class DataModifierFactory(Factory): 74 """Factory for data modifier creation. 75 76 The intended use is to associate the factory with a specific matrix of a dataset. 77 Both the created parameters and the created data modifiers are supplied 78 with a reference to the dataset and the name of the matrix they belong to. 79 """ 80 81 def __init__(self, matrix_name: str, dataset: Dataset): 82 """Construct the DataModifierFactory. 83 84 Args: 85 matrix_name: the name of the matrix that it relates to. 86 dataset: the dataset associated with the matrix. 87 """ 88 Factory.__init__(self, matrix_name) 89 self.dataset = dataset 90 91 def create(self, obj_name: str, obj_params: Dict[str, Any]=None, **kwargs) -> DataModifier: 92 """Create and return a new data modifier with the specified name. 93 94 The specified parameters are expected to be of the same structure as the defaults 95 of the ConfigParameters that are associated with the desired data modifier. 96 When no parameters are specified it will use the data modifier's defaults. 97 98 Args: 99 obj_name: the name of the data modifier to create. 100 obj_params: the parameters of the data modifier. 101 102 Keyword Args: 103 Any: extra arguments that need to be passed to the data modifier on creation. 104 105 Returns: 106 the created data modifier or None when it does not exist. 107 """ 108 kwargs['dataset'] = self.dataset 109 kwargs['matrix_name'] = self.factory_name 110 return Factory.create(self, obj_name, obj_params, **kwargs) 111 112 def on_create_params(self, obj_name: str) -> ConfigParameters: 113 """Create parameters for the data modifier with the specified name. 114 115 Args: 116 obj_name: name of the data modifier to create parameters for. 117 118 Returns: 119 the configuration parameters of the object or empty parameters when it does not exist. 120 """ 121 kwargs = { 122 'column_name': obj_name, 123 'dataset': self.dataset, 124 'matrix_name': self.factory_name 125 } 126 return self.factory[obj_name][FUNC_CREATE_PARAMS](**kwargs)
Factory for data modifier creation.
The intended use is to associate the factory with a specific matrix of a dataset. Both the created parameters and the created data modifiers are supplied with a reference to the dataset and the name of the matrix they belong to.
81 def __init__(self, matrix_name: str, dataset: Dataset): 82 """Construct the DataModifierFactory. 83 84 Args: 85 matrix_name: the name of the matrix that it relates to. 86 dataset: the dataset associated with the matrix. 87 """ 88 Factory.__init__(self, matrix_name) 89 self.dataset = dataset
Construct the DataModifierFactory.
Args: matrix_name: the name of the matrix that it relates to. dataset: the dataset associated with the matrix.
91 def create(self, obj_name: str, obj_params: Dict[str, Any]=None, **kwargs) -> DataModifier: 92 """Create and return a new data modifier with the specified name. 93 94 The specified parameters are expected to be of the same structure as the defaults 95 of the ConfigParameters that are associated with the desired data modifier. 96 When no parameters are specified it will use the data modifier's defaults. 97 98 Args: 99 obj_name: the name of the data modifier to create. 100 obj_params: the parameters of the data modifier. 101 102 Keyword Args: 103 Any: extra arguments that need to be passed to the data modifier on creation. 104 105 Returns: 106 the created data modifier or None when it does not exist. 107 """ 108 kwargs['dataset'] = self.dataset 109 kwargs['matrix_name'] = self.factory_name 110 return Factory.create(self, obj_name, obj_params, **kwargs)
Create and return a new data modifier with the specified name.
The specified parameters are expected to be of the same structure as the defaults of the ConfigParameters that are associated with the desired data modifier. When no parameters are specified it will use the data modifier's defaults.
Args: obj_name: the name of the data modifier to create. obj_params: the parameters of the data modifier.
Keyword Args: Any: extra arguments that need to be passed to the data modifier on creation.
Returns: the created data modifier or None when it does not exist.
112 def on_create_params(self, obj_name: str) -> ConfigParameters: 113 """Create parameters for the data modifier with the specified name. 114 115 Args: 116 obj_name: name of the data modifier to create parameters for. 117 118 Returns: 119 the configuration parameters of the object or empty parameters when it does not exist. 120 """ 121 kwargs = { 122 'column_name': obj_name, 123 'dataset': self.dataset, 124 'matrix_name': self.factory_name 125 } 126 return self.factory[obj_name][FUNC_CREATE_PARAMS](**kwargs)
Create parameters for the data modifier with the specified name.
Args: obj_name: name of the data modifier to create parameters for.
Returns: the configuration parameters of the object or empty parameters when it does not exist.
129def create_data_modifier_factory( 130 data_registry: DataRegistry, 131 factory_name: str, 132 func_on_add_entries: Callable[[DataModifierFactory, Dataset], None]) -> GroupFactory: 133 """Create a data modifier factory for each dataset-matrix pair. 134 135 Args: 136 137 data_registry: the data registry with available datasets. 138 factory_name: the name of the data modifier factory. 139 func_on_add_entries: callback for each dataset-matrix pair to add data modifiers. 140 141 Returns: 142 the factory with all available data modifiers per dataset-matrix pair. 143 """ 144 factory = GroupFactory(factory_name) 145 146 for dataset_name in data_registry.get_available_sets(): 147 dataset = data_registry.get_set(dataset_name) 148 dataset_factory = GroupFactory(dataset.get_name()) 149 150 factory.add_factory(dataset_factory) 151 152 for matrix_name in dataset.get_available_matrices(): 153 matrix_factory = DataModifierFactory(matrix_name, dataset) 154 func_on_add_entries(matrix_factory, dataset) 155 dataset_factory.add_factory(matrix_factory) 156 157 return factory
Create a data modifier factory for each dataset-matrix pair.
Args:
data_registry: the data registry with available datasets.
factory_name: the name of the data modifier factory.
func_on_add_entries: callback for each dataset-matrix pair to add data modifiers.
Returns: the factory with all available data modifiers per dataset-matrix pair.