src.fairreckitlib.data.filter.filter_config_parsing
This module contains parsers for the data subgroup/filter configurations.
Functions:
parse_data_subset_config: parse data subset configuration with multiple filter passes.
parse_data_filter_passes: parse multiple filter pass configurations.
parse_data_filter_pass_config: parse data filter pass configuration to multiple filters.
This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)
1"""This module contains parsers for the data subgroup/filter configurations. 2 3Functions: 4 5 parse_data_subset_config: parse data subset configuration with multiple filter passes. 6 parse_data_filter_passes: parse multiple filter pass configurations. 7 parse_data_filter_pass_config: parse data filter pass configuration to multiple filters. 8 9This program has been developed by students from the bachelor Computer Science at 10Utrecht University within the Software Project course. 11© Copyright Utrecht University (Department of Information and Computing Sciences) 12""" 13 14from typing import Any, Dict, List, Optional, Tuple, Union 15 16from ...core.config.config_factories import GroupFactory 17from ...core.events.event_dispatcher import EventDispatcher 18from ...core.parsing.parse_assert import \ 19 assert_is_container_not_empty, assert_is_key_in_dict, assert_is_one_of_list, assert_is_type 20from ...core.parsing.parse_config_object import parse_config_object_list 21from ..set.dataset import Dataset 22from ..set.dataset_constants import KEY_DATASET, KEY_MATRIX 23from ..set.dataset_registry import DataRegistry 24from .filter_constants import KEY_DATA_FILTER_PASS, KEY_DATA_SUBSET 25from .filter_config import DataSubsetConfig, FilterPassConfig, FilterConfig 26 27 28def parse_data_subset_config( 29 data_subset_config: Dict[str, Any], 30 data_registry: DataRegistry, 31 data_filter_factory: GroupFactory, 32 event_dispatcher: EventDispatcher, 33 *, 34 data_parent_name: str=None, 35 required: bool=True) -> Union[Tuple[DataSubsetConfig, str], Tuple[None, None]]: 36 """Parse a data subset configuration. 37 38 Args: 39 data_subset_config: the data subset configuration. 40 data_registry: the data registry containing the available datasets. 41 data_filter_factory: factory with available dataset-matrix filter factories. 42 event_dispatcher: to dispatch the parse event on failure. 43 data_parent_name: the data parent name related to the data subset. 44 required: whether parsing the subset is required to succeed. 45 46 Returns: 47 parsed_config: the parsed configuration or None on failure. 48 dataset_name: the name of the parsed dataset or None on failure. 49 """ 50 # assert dataset name is present 51 if not assert_is_key_in_dict( 52 KEY_DATASET, 53 data_subset_config, 54 event_dispatcher, 55 'PARSE ERROR: missing key \'' + KEY_DATASET + '\' (required)' if required else '' 56 ): return None, None 57 58 dataset_name = data_subset_config[KEY_DATASET] 59 parse_err = 'PARSE ERROR: ' + (data_parent_name + ' ' if data_parent_name else '') 60 61 # assert dataset name is available in the data registry 62 if not assert_is_one_of_list( 63 dataset_name, 64 data_registry.get_available_sets(), 65 event_dispatcher, 66 parse_err + 'unknown dataset name \'' + str(dataset_name) + '\'' 67 ): return None, dataset_name 68 69 dataset = data_registry.get_set(dataset_name) 70 71 # assert dataset matrix name is present 72 if not assert_is_key_in_dict( 73 KEY_MATRIX, 74 data_subset_config, 75 event_dispatcher, 76 parse_err + 'dataset \'' + dataset_name + '\' missing key \'' + KEY_MATRIX + '\' (required)' 77 ): return None, dataset_name 78 79 dataset_matrix = data_subset_config[KEY_MATRIX] 80 81 # assert matrix name is available in the dataset 82 if not assert_is_one_of_list( 83 dataset_matrix, 84 dataset.get_available_matrices(), 85 event_dispatcher, 86 parse_err + 'unknown matrix \'' + str(dataset_matrix) + '\'' 87 ): return None, dataset_name + ' ' + str(dataset_matrix) 88 89 # parse dataset filter passes 90 91 dataset_filter_passes = parse_data_filter_passes( 92 dataset_name + ' ' + dataset_matrix, 93 data_subset_config, 94 (dataset, dataset_matrix), 95 data_filter_factory, 96 event_dispatcher 97 ) 98 99 parsed_config = DataSubsetConfig( 100 dataset_name, 101 dataset_matrix, 102 dataset_filter_passes 103 ) 104 105 return parsed_config, dataset_name + ' ' + dataset_matrix 106 107 108def parse_data_filter_passes( 109 data_parent_name: str, 110 data_parent_config: Dict[str, Any], 111 dataset_pair: Tuple[Dataset, str], 112 filter_factory: GroupFactory, 113 event_dispatcher: EventDispatcher) -> List[FilterPassConfig]: 114 """Parse a list of filter pass configurations. 115 116 Args: 117 data_parent_name: the parent name related to the filter passes that are being parsed. 118 data_parent_config: the parent configuration to parse the filter passes from. 119 dataset_pair: a pair consisting of the dataset and the matrix name. 120 filter_factory: the filter factory containing available filters for the dataset. 121 event_dispatcher: to dispatch the parse event on failure. 122 123 Returns: 124 a list of parsed subgroup configurations. 125 """ 126 # filter passes are not mandatory 127 if KEY_DATA_SUBSET not in data_parent_config: 128 return [] 129 130 filter_passes_config = data_parent_config[KEY_DATA_SUBSET] 131 132 # assert filter_passes_config is a list 133 if not assert_is_type( 134 filter_passes_config, 135 list, 136 event_dispatcher, 137 'PARSE WARNING: ' + data_parent_name + ' invalid \'' + KEY_DATA_SUBSET + '\' value' 138 ): return [] 139 140 filter_passes = [] 141 # attempt to parse each filter pass 142 for filter_pass_config in filter_passes_config: 143 parsed_filter_pass = parse_data_filter_pass_config( 144 data_parent_name, 145 filter_pass_config, 146 dataset_pair, 147 filter_factory, 148 event_dispatcher 149 ) 150 # skip on failure 151 if parsed_filter_pass is None: 152 continue 153 154 filter_passes.append(parsed_filter_pass) 155 156 return filter_passes 157 158 159def parse_data_filter_pass_config( 160 parent_filter_name: str, 161 filter_pass_config: Any, 162 dataset_pair: Tuple[Dataset, str], 163 filter_factory: GroupFactory, 164 event_dispatcher: EventDispatcher) -> Optional[FilterPassConfig]: 165 """Parse data filter pass configuration to multiple filters. 166 167 Args: 168 parent_filter_name: the filter parent name related to the filter pass that is being parsed. 169 filter_pass_config: the filter pass configuration to parse. 170 dataset_pair: a pair consisting of the dataset and the matrix name. 171 filter_factory: the filter factory containing available filters for the dataset. 172 event_dispatcher: to dispatch the parse event on failure. 173 174 Returns: 175 the parsed configuration or None on failure. 176 """ 177 # assert filter_pass_config is a dict 178 if not assert_is_type( 179 filter_pass_config, 180 dict, 181 event_dispatcher, 182 'PARSE WARNING: ' + parent_filter_name + ' invalid ' + KEY_DATA_FILTER_PASS + ' value' 183 ): return None 184 185 # assert KEY_DATA_FILTER_PASS is present 186 if not assert_is_key_in_dict( 187 KEY_DATA_FILTER_PASS, 188 filter_pass_config, 189 event_dispatcher, 190 'PARSE WARNING: ' + parent_filter_name + ' missing key \'' + KEY_DATA_FILTER_PASS + '\'' 191 ): return None 192 193 filter_config = filter_pass_config[KEY_DATA_FILTER_PASS] 194 195 dataset_filter_factory = filter_factory.get_factory(dataset_pair[0].get_name()) 196 matrix_filter_factory = dataset_filter_factory.get_factory(dataset_pair[1]) 197 198 # parse filter configurations as objects 199 parsed_config_objs = parse_config_object_list( 200 parent_filter_name, 201 KEY_DATA_FILTER_PASS, 202 filter_config, 203 matrix_filter_factory, 204 event_dispatcher 205 ) 206 207 # convert object to filter configurations 208 filter_config_list = [] 209 for (filter_config, _) in parsed_config_objs: 210 filter_config_list.append(FilterConfig( 211 filter_config.name, 212 filter_config.params, 213 )) 214 215 # assert filter pass has entries available 216 if not assert_is_container_not_empty( 217 filter_config_list, 218 event_dispatcher, 219 'PARSE WARNING: ' + parent_filter_name + ' has no filters, skipping...' 220 ): return None 221 222 return FilterPassConfig(filter_config_list)
29def parse_data_subset_config( 30 data_subset_config: Dict[str, Any], 31 data_registry: DataRegistry, 32 data_filter_factory: GroupFactory, 33 event_dispatcher: EventDispatcher, 34 *, 35 data_parent_name: str=None, 36 required: bool=True) -> Union[Tuple[DataSubsetConfig, str], Tuple[None, None]]: 37 """Parse a data subset configuration. 38 39 Args: 40 data_subset_config: the data subset configuration. 41 data_registry: the data registry containing the available datasets. 42 data_filter_factory: factory with available dataset-matrix filter factories. 43 event_dispatcher: to dispatch the parse event on failure. 44 data_parent_name: the data parent name related to the data subset. 45 required: whether parsing the subset is required to succeed. 46 47 Returns: 48 parsed_config: the parsed configuration or None on failure. 49 dataset_name: the name of the parsed dataset or None on failure. 50 """ 51 # assert dataset name is present 52 if not assert_is_key_in_dict( 53 KEY_DATASET, 54 data_subset_config, 55 event_dispatcher, 56 'PARSE ERROR: missing key \'' + KEY_DATASET + '\' (required)' if required else '' 57 ): return None, None 58 59 dataset_name = data_subset_config[KEY_DATASET] 60 parse_err = 'PARSE ERROR: ' + (data_parent_name + ' ' if data_parent_name else '') 61 62 # assert dataset name is available in the data registry 63 if not assert_is_one_of_list( 64 dataset_name, 65 data_registry.get_available_sets(), 66 event_dispatcher, 67 parse_err + 'unknown dataset name \'' + str(dataset_name) + '\'' 68 ): return None, dataset_name 69 70 dataset = data_registry.get_set(dataset_name) 71 72 # assert dataset matrix name is present 73 if not assert_is_key_in_dict( 74 KEY_MATRIX, 75 data_subset_config, 76 event_dispatcher, 77 parse_err + 'dataset \'' + dataset_name + '\' missing key \'' + KEY_MATRIX + '\' (required)' 78 ): return None, dataset_name 79 80 dataset_matrix = data_subset_config[KEY_MATRIX] 81 82 # assert matrix name is available in the dataset 83 if not assert_is_one_of_list( 84 dataset_matrix, 85 dataset.get_available_matrices(), 86 event_dispatcher, 87 parse_err + 'unknown matrix \'' + str(dataset_matrix) + '\'' 88 ): return None, dataset_name + ' ' + str(dataset_matrix) 89 90 # parse dataset filter passes 91 92 dataset_filter_passes = parse_data_filter_passes( 93 dataset_name + ' ' + dataset_matrix, 94 data_subset_config, 95 (dataset, dataset_matrix), 96 data_filter_factory, 97 event_dispatcher 98 ) 99 100 parsed_config = DataSubsetConfig( 101 dataset_name, 102 dataset_matrix, 103 dataset_filter_passes 104 ) 105 106 return parsed_config, dataset_name + ' ' + dataset_matrix
Parse a data subset configuration.
Args: data_subset_config: the data subset configuration. data_registry: the data registry containing the available datasets. data_filter_factory: factory with available dataset-matrix filter factories. event_dispatcher: to dispatch the parse event on failure. data_parent_name: the data parent name related to the data subset. required: whether parsing the subset is required to succeed.
Returns: parsed_config: the parsed configuration or None on failure. dataset_name: the name of the parsed dataset or None on failure.
109def parse_data_filter_passes( 110 data_parent_name: str, 111 data_parent_config: Dict[str, Any], 112 dataset_pair: Tuple[Dataset, str], 113 filter_factory: GroupFactory, 114 event_dispatcher: EventDispatcher) -> List[FilterPassConfig]: 115 """Parse a list of filter pass configurations. 116 117 Args: 118 data_parent_name: the parent name related to the filter passes that are being parsed. 119 data_parent_config: the parent configuration to parse the filter passes from. 120 dataset_pair: a pair consisting of the dataset and the matrix name. 121 filter_factory: the filter factory containing available filters for the dataset. 122 event_dispatcher: to dispatch the parse event on failure. 123 124 Returns: 125 a list of parsed subgroup configurations. 126 """ 127 # filter passes are not mandatory 128 if KEY_DATA_SUBSET not in data_parent_config: 129 return [] 130 131 filter_passes_config = data_parent_config[KEY_DATA_SUBSET] 132 133 # assert filter_passes_config is a list 134 if not assert_is_type( 135 filter_passes_config, 136 list, 137 event_dispatcher, 138 'PARSE WARNING: ' + data_parent_name + ' invalid \'' + KEY_DATA_SUBSET + '\' value' 139 ): return [] 140 141 filter_passes = [] 142 # attempt to parse each filter pass 143 for filter_pass_config in filter_passes_config: 144 parsed_filter_pass = parse_data_filter_pass_config( 145 data_parent_name, 146 filter_pass_config, 147 dataset_pair, 148 filter_factory, 149 event_dispatcher 150 ) 151 # skip on failure 152 if parsed_filter_pass is None: 153 continue 154 155 filter_passes.append(parsed_filter_pass) 156 157 return filter_passes
Parse a list of filter pass configurations.
Args: data_parent_name: the parent name related to the filter passes that are being parsed. data_parent_config: the parent configuration to parse the filter passes from. dataset_pair: a pair consisting of the dataset and the matrix name. filter_factory: the filter factory containing available filters for the dataset. event_dispatcher: to dispatch the parse event on failure.
Returns: a list of parsed subgroup configurations.
160def parse_data_filter_pass_config( 161 parent_filter_name: str, 162 filter_pass_config: Any, 163 dataset_pair: Tuple[Dataset, str], 164 filter_factory: GroupFactory, 165 event_dispatcher: EventDispatcher) -> Optional[FilterPassConfig]: 166 """Parse data filter pass configuration to multiple filters. 167 168 Args: 169 parent_filter_name: the filter parent name related to the filter pass that is being parsed. 170 filter_pass_config: the filter pass configuration to parse. 171 dataset_pair: a pair consisting of the dataset and the matrix name. 172 filter_factory: the filter factory containing available filters for the dataset. 173 event_dispatcher: to dispatch the parse event on failure. 174 175 Returns: 176 the parsed configuration or None on failure. 177 """ 178 # assert filter_pass_config is a dict 179 if not assert_is_type( 180 filter_pass_config, 181 dict, 182 event_dispatcher, 183 'PARSE WARNING: ' + parent_filter_name + ' invalid ' + KEY_DATA_FILTER_PASS + ' value' 184 ): return None 185 186 # assert KEY_DATA_FILTER_PASS is present 187 if not assert_is_key_in_dict( 188 KEY_DATA_FILTER_PASS, 189 filter_pass_config, 190 event_dispatcher, 191 'PARSE WARNING: ' + parent_filter_name + ' missing key \'' + KEY_DATA_FILTER_PASS + '\'' 192 ): return None 193 194 filter_config = filter_pass_config[KEY_DATA_FILTER_PASS] 195 196 dataset_filter_factory = filter_factory.get_factory(dataset_pair[0].get_name()) 197 matrix_filter_factory = dataset_filter_factory.get_factory(dataset_pair[1]) 198 199 # parse filter configurations as objects 200 parsed_config_objs = parse_config_object_list( 201 parent_filter_name, 202 KEY_DATA_FILTER_PASS, 203 filter_config, 204 matrix_filter_factory, 205 event_dispatcher 206 ) 207 208 # convert object to filter configurations 209 filter_config_list = [] 210 for (filter_config, _) in parsed_config_objs: 211 filter_config_list.append(FilterConfig( 212 filter_config.name, 213 filter_config.params, 214 )) 215 216 # assert filter pass has entries available 217 if not assert_is_container_not_empty( 218 filter_config_list, 219 event_dispatcher, 220 'PARSE WARNING: ' + parent_filter_name + ' has no filters, skipping...' 221 ): return None 222 223 return FilterPassConfig(filter_config_list)
Parse data filter pass configuration to multiple filters.
Args: parent_filter_name: the filter parent name related to the filter pass that is being parsed. filter_pass_config: the filter pass configuration to parse. dataset_pair: a pair consisting of the dataset and the matrix name. filter_factory: the filter factory containing available filters for the dataset. event_dispatcher: to dispatch the parse event on failure.
Returns: the parsed configuration or None on failure.