src.fairreckitlib.data.pipeline.data_config

This module contains the dataset configuration.

Classes:

DatasetConfig: dataset configuration.

This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)

 1"""This module contains the dataset configuration.
 2
 3Classes:
 4
 5    DatasetConfig: dataset configuration.
 6
 7This program has been developed by students from the bachelor Computer Science at
 8Utrecht University within the Software Project course.
 9© Copyright Utrecht University (Department of Information and Computing Sciences)
10"""
11
12from dataclasses import dataclass
13from typing import Any, Dict, Optional
14
15from ..filter.filter_config import DataSubsetConfig
16from ..ratings.convert_constants import KEY_RATING_CONVERTER
17from ..ratings.convert_config import ConvertConfig
18from ..split.split_constants import KEY_SPLITTING
19from ..split.split_config import SplitConfig
20
21
22@dataclass
23class DataMatrixConfig(DataSubsetConfig):
24    """Data Matrix Configuration.
25
26    dataset: the name of the dataset.
27    matrix: the name of the dataset matrix.
28    filter_passes: the subset of the dataset matrix as a list of filter passes.
29    converter: the rating converter of the dataset matrix.
30    splitting: the train/test splitter of the dataset matrix.
31    """
32
33    converter: Optional[ConvertConfig]
34    splitting: SplitConfig
35
36    def get_data_matrix_name(self) -> str:
37        """Get the combined dataset and matrix name of the configuration."""
38        return self.dataset + '_' + self.matrix
39
40    def to_yml_format(self) -> Dict[str, Any]:
41        """Format data matrix configuration to a yml compatible dictionary.
42
43        Returns:
44            a dictionary containing the dataset configuration.
45        """
46        yml_format = DataSubsetConfig.to_yml_format(self)
47        yml_format[KEY_SPLITTING] = self.splitting.to_yml_format()
48        # only include rating modifier if it is present
49        if self.converter:
50            yml_format[KEY_RATING_CONVERTER] = self.converter.to_yml_format()
51
52        return yml_format
@dataclass
class DataMatrixConfig(src.fairreckitlib.data.filter.filter_config.DataSubsetConfig):
23@dataclass
24class DataMatrixConfig(DataSubsetConfig):
25    """Data Matrix Configuration.
26
27    dataset: the name of the dataset.
28    matrix: the name of the dataset matrix.
29    filter_passes: the subset of the dataset matrix as a list of filter passes.
30    converter: the rating converter of the dataset matrix.
31    splitting: the train/test splitter of the dataset matrix.
32    """
33
34    converter: Optional[ConvertConfig]
35    splitting: SplitConfig
36
37    def get_data_matrix_name(self) -> str:
38        """Get the combined dataset and matrix name of the configuration."""
39        return self.dataset + '_' + self.matrix
40
41    def to_yml_format(self) -> Dict[str, Any]:
42        """Format data matrix configuration to a yml compatible dictionary.
43
44        Returns:
45            a dictionary containing the dataset configuration.
46        """
47        yml_format = DataSubsetConfig.to_yml_format(self)
48        yml_format[KEY_SPLITTING] = self.splitting.to_yml_format()
49        # only include rating modifier if it is present
50        if self.converter:
51            yml_format[KEY_RATING_CONVERTER] = self.converter.to_yml_format()
52
53        return yml_format

Data Matrix Configuration.

dataset: the name of the dataset. matrix: the name of the dataset matrix. filter_passes: the subset of the dataset matrix as a list of filter passes. converter: the rating converter of the dataset matrix. splitting: the train/test splitter of the dataset matrix.

DataMatrixConfig( dataset: str, matrix: str, filter_passes: List[src.fairreckitlib.data.filter.filter_config.FilterPassConfig], converter: Optional[src.fairreckitlib.data.ratings.convert_config.ConvertConfig], splitting: src.fairreckitlib.data.split.split_config.SplitConfig)
def get_data_matrix_name(self) -> str:
37    def get_data_matrix_name(self) -> str:
38        """Get the combined dataset and matrix name of the configuration."""
39        return self.dataset + '_' + self.matrix

Get the combined dataset and matrix name of the configuration.

def to_yml_format(self) -> Dict[str, Any]:
41    def to_yml_format(self) -> Dict[str, Any]:
42        """Format data matrix configuration to a yml compatible dictionary.
43
44        Returns:
45            a dictionary containing the dataset configuration.
46        """
47        yml_format = DataSubsetConfig.to_yml_format(self)
48        yml_format[KEY_SPLITTING] = self.splitting.to_yml_format()
49        # only include rating modifier if it is present
50        if self.converter:
51            yml_format[KEY_RATING_CONVERTER] = self.converter.to_yml_format()
52
53        return yml_format

Format data matrix configuration to a yml compatible dictionary.

Returns: a dictionary containing the dataset configuration.