src.fairreckitlib.data.pipeline.data_config_parsing
This module contains a parser for the dataset configuration.
Functions:
parse_data_config: parse dataset matrices from the experiment configuration.
parse_data_matrix_config: parse dataset matrix configuration.
This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)
1"""This module contains a parser for the dataset configuration. 2 3Functions: 4 5 parse_data_config: parse dataset matrices from the experiment configuration. 6 parse_data_matrix_config: parse dataset matrix configuration. 7 8This program has been developed by students from the bachelor Computer Science at 9Utrecht University within the Software Project course. 10© Copyright Utrecht University (Department of Information and Computing Sciences) 11""" 12 13from typing import Any, Dict, List, Optional, Tuple, Union 14 15from ...core.config.config_factories import GroupFactory 16from ...core.events.event_dispatcher import EventDispatcher 17from ...core.parsing.parse_assert import \ 18 assert_is_type, assert_is_container_not_empty, assert_is_key_in_dict 19from ...core.parsing.parse_event import ON_PARSE, ParseEventArgs 20from ..data_factory import KEY_DATA 21from ..filter.filter_config_parsing import parse_data_subset_config 22from ..filter.filter_constants import KEY_DATA_SUBSET 23from ..ratings.convert_constants import KEY_RATING_CONVERTER 24from ..ratings.convert_config_parsing import parse_data_convert_config 25from ..set.dataset_registry import DataRegistry 26from ..split.split_constants import KEY_SPLITTING 27from ..split.split_config_parsing import parse_data_split_config 28from .data_config import DataMatrixConfig 29 30 31def parse_data_config( 32 experiment_config: Dict[str, Any], 33 data_registry: DataRegistry, 34 data_factory: GroupFactory, 35 event_dispatcher: EventDispatcher) -> Optional[List[DataMatrixConfig]]: 36 """Parse all dataset configurations. 37 38 Args: 39 experiment_config: the experiment's total configuration. 40 data_registry: the data registry containing the available datasets. 41 data_factory: factory with available data modifier factories. 42 event_dispatcher: to dispatch the parse event on failure. 43 44 Returns: 45 a list of parsed DatasetConfig's or None when empty. 46 """ 47 # assert KEY_DATA is present 48 if not assert_is_key_in_dict( 49 KEY_DATA, 50 experiment_config, 51 event_dispatcher, 52 'PARSE ERROR: missing experiment key \'' + KEY_DATA + '\' (required)' 53 ): return None 54 55 data_matrices_config = experiment_config[KEY_DATA] 56 57 # assert data_matrices_config is a list 58 if not assert_is_type( 59 data_matrices_config, 60 list, 61 event_dispatcher, 62 'PARSE ERROR: invalid experiment value for key \'' + KEY_DATA + '\'' 63 ): return None 64 65 # assert data_matrices_config has list entries 66 if not assert_is_container_not_empty( 67 data_matrices_config, 68 event_dispatcher, 69 'PARSE ERROR: experiment \'' + KEY_DATA + '\' is empty' 70 ): return None 71 72 parsed_matrices = [] 73 74 # parse datasets_config list entries 75 for data_matrix_config in data_matrices_config: 76 data_matrix, data_matrix_name = parse_data_matrix_config( 77 data_matrix_config, 78 data_registry, 79 data_factory, 80 event_dispatcher 81 ) 82 # skip on failure 83 if data_matrix is None: 84 event_dispatcher.dispatch(ParseEventArgs( 85 ON_PARSE, 86 'PARSE WARNING: failed to parse data matrix \'' + 87 str(data_matrix_name) + '\', skipping...' 88 )) 89 continue 90 91 parsed_matrices.append(data_matrix) 92 93 # final check to verify at least one data matrix got parsed 94 if not assert_is_container_not_empty( 95 parsed_matrices, 96 event_dispatcher, 97 'PARSE ERROR: missing experiment data matrices' 98 ): return None 99 100 return parsed_matrices 101 102 103def parse_data_matrix_config( 104 data_matrix_config: Any, 105 data_registry: DataRegistry, 106 data_factory: GroupFactory, 107 event_dispatcher: EventDispatcher) -> Union[Tuple[DataMatrixConfig, str],Tuple[None, None]]: 108 """Parse a data matrix configuration. 109 110 Args: 111 data_matrix_config: the data matrix configuration. 112 data_registry: the data registry containing the available datasets. 113 data_factory: factory with available data modifier factories. 114 event_dispatcher: to dispatch the parse event on failure. 115 116 Returns: 117 parsed_config: the parsed configuration or None on failure. 118 dataset_name: the name of the parsed dataset or None on failure. 119 """ 120 # assert data_matrix_config is a dict 121 if not assert_is_type( 122 data_matrix_config, 123 dict, 124 event_dispatcher, 125 'PARSE ERROR: invalid dataset matrix entry' 126 ): return None, None 127 128 dataset_subset, dataset_matrix_name = parse_data_subset_config( 129 data_matrix_config, 130 data_registry, 131 data_factory.get_factory(KEY_DATA_SUBSET), 132 event_dispatcher 133 ) 134 if not dataset_subset: 135 return None, dataset_matrix_name 136 137 dataset = data_registry.get_set(dataset_subset.dataset) 138 139 # parse dataset matrix rating converter 140 dataset_rating_modifier = parse_data_convert_config( 141 data_matrix_config, 142 dataset, 143 dataset_subset.matrix, 144 data_factory.get_factory(KEY_RATING_CONVERTER), 145 event_dispatcher 146 ) 147 148 # parse dataset matrix splitter 149 dataset_splitting = parse_data_split_config( 150 data_matrix_config, 151 dataset, 152 dataset_subset.matrix, 153 data_factory.get_factory(KEY_SPLITTING), 154 event_dispatcher 155 ) 156 157 parsed_config = DataMatrixConfig( 158 dataset_subset.dataset, 159 dataset_subset.matrix, 160 dataset_subset.filter_passes, 161 dataset_rating_modifier, 162 dataset_splitting 163 ) 164 165 return parsed_config, dataset_matrix_name
32def parse_data_config( 33 experiment_config: Dict[str, Any], 34 data_registry: DataRegistry, 35 data_factory: GroupFactory, 36 event_dispatcher: EventDispatcher) -> Optional[List[DataMatrixConfig]]: 37 """Parse all dataset configurations. 38 39 Args: 40 experiment_config: the experiment's total configuration. 41 data_registry: the data registry containing the available datasets. 42 data_factory: factory with available data modifier factories. 43 event_dispatcher: to dispatch the parse event on failure. 44 45 Returns: 46 a list of parsed DatasetConfig's or None when empty. 47 """ 48 # assert KEY_DATA is present 49 if not assert_is_key_in_dict( 50 KEY_DATA, 51 experiment_config, 52 event_dispatcher, 53 'PARSE ERROR: missing experiment key \'' + KEY_DATA + '\' (required)' 54 ): return None 55 56 data_matrices_config = experiment_config[KEY_DATA] 57 58 # assert data_matrices_config is a list 59 if not assert_is_type( 60 data_matrices_config, 61 list, 62 event_dispatcher, 63 'PARSE ERROR: invalid experiment value for key \'' + KEY_DATA + '\'' 64 ): return None 65 66 # assert data_matrices_config has list entries 67 if not assert_is_container_not_empty( 68 data_matrices_config, 69 event_dispatcher, 70 'PARSE ERROR: experiment \'' + KEY_DATA + '\' is empty' 71 ): return None 72 73 parsed_matrices = [] 74 75 # parse datasets_config list entries 76 for data_matrix_config in data_matrices_config: 77 data_matrix, data_matrix_name = parse_data_matrix_config( 78 data_matrix_config, 79 data_registry, 80 data_factory, 81 event_dispatcher 82 ) 83 # skip on failure 84 if data_matrix is None: 85 event_dispatcher.dispatch(ParseEventArgs( 86 ON_PARSE, 87 'PARSE WARNING: failed to parse data matrix \'' + 88 str(data_matrix_name) + '\', skipping...' 89 )) 90 continue 91 92 parsed_matrices.append(data_matrix) 93 94 # final check to verify at least one data matrix got parsed 95 if not assert_is_container_not_empty( 96 parsed_matrices, 97 event_dispatcher, 98 'PARSE ERROR: missing experiment data matrices' 99 ): return None 100 101 return parsed_matrices
Parse all dataset configurations.
Args: experiment_config: the experiment's total configuration. data_registry: the data registry containing the available datasets. data_factory: factory with available data modifier factories. event_dispatcher: to dispatch the parse event on failure.
Returns: a list of parsed DatasetConfig's or None when empty.
104def parse_data_matrix_config( 105 data_matrix_config: Any, 106 data_registry: DataRegistry, 107 data_factory: GroupFactory, 108 event_dispatcher: EventDispatcher) -> Union[Tuple[DataMatrixConfig, str],Tuple[None, None]]: 109 """Parse a data matrix configuration. 110 111 Args: 112 data_matrix_config: the data matrix configuration. 113 data_registry: the data registry containing the available datasets. 114 data_factory: factory with available data modifier factories. 115 event_dispatcher: to dispatch the parse event on failure. 116 117 Returns: 118 parsed_config: the parsed configuration or None on failure. 119 dataset_name: the name of the parsed dataset or None on failure. 120 """ 121 # assert data_matrix_config is a dict 122 if not assert_is_type( 123 data_matrix_config, 124 dict, 125 event_dispatcher, 126 'PARSE ERROR: invalid dataset matrix entry' 127 ): return None, None 128 129 dataset_subset, dataset_matrix_name = parse_data_subset_config( 130 data_matrix_config, 131 data_registry, 132 data_factory.get_factory(KEY_DATA_SUBSET), 133 event_dispatcher 134 ) 135 if not dataset_subset: 136 return None, dataset_matrix_name 137 138 dataset = data_registry.get_set(dataset_subset.dataset) 139 140 # parse dataset matrix rating converter 141 dataset_rating_modifier = parse_data_convert_config( 142 data_matrix_config, 143 dataset, 144 dataset_subset.matrix, 145 data_factory.get_factory(KEY_RATING_CONVERTER), 146 event_dispatcher 147 ) 148 149 # parse dataset matrix splitter 150 dataset_splitting = parse_data_split_config( 151 data_matrix_config, 152 dataset, 153 dataset_subset.matrix, 154 data_factory.get_factory(KEY_SPLITTING), 155 event_dispatcher 156 ) 157 158 parsed_config = DataMatrixConfig( 159 dataset_subset.dataset, 160 dataset_subset.matrix, 161 dataset_subset.filter_passes, 162 dataset_rating_modifier, 163 dataset_splitting 164 ) 165 166 return parsed_config, dataset_matrix_name
Parse a data matrix configuration.
Args: data_matrix_config: the data matrix configuration. data_registry: the data registry containing the available datasets. data_factory: factory with available data modifier factories. event_dispatcher: to dispatch the parse event on failure.
Returns: parsed_config: the parsed configuration or None on failure. dataset_name: the name of the parsed dataset or None on failure.