src.fairreckitlib.data.filter.filter_config_parsing

This module contains parsers for the data subgroup/filter configurations.

Functions:

parse_data_subset_config: parse data subset configuration with multiple filter passes.
parse_data_filter_passes: parse multiple filter pass configurations.
parse_data_filter_pass_config: parse data filter pass configuration to multiple filters.

This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)

  1"""This module contains parsers for the data subgroup/filter configurations.
  2
  3Functions:
  4
  5    parse_data_subset_config: parse data subset configuration with multiple filter passes.
  6    parse_data_filter_passes: parse multiple filter pass configurations.
  7    parse_data_filter_pass_config: parse data filter pass configuration to multiple filters.
  8
  9This program has been developed by students from the bachelor Computer Science at
 10Utrecht University within the Software Project course.
 11© Copyright Utrecht University (Department of Information and Computing Sciences)
 12"""
 13
 14from typing import Any, Dict, List, Optional, Tuple, Union
 15
 16from ...core.config.config_factories import GroupFactory
 17from ...core.events.event_dispatcher import EventDispatcher
 18from ...core.parsing.parse_assert import \
 19    assert_is_container_not_empty, assert_is_key_in_dict, assert_is_one_of_list, assert_is_type
 20from ...core.parsing.parse_config_object import parse_config_object_list
 21from ..set.dataset import Dataset
 22from ..set.dataset_constants import KEY_DATASET, KEY_MATRIX
 23from ..set.dataset_registry import DataRegistry
 24from .filter_constants import KEY_DATA_FILTER_PASS, KEY_DATA_SUBSET
 25from .filter_config import DataSubsetConfig, FilterPassConfig, FilterConfig
 26
 27
 28def parse_data_subset_config(
 29        data_subset_config: Dict[str, Any],
 30        data_registry: DataRegistry,
 31        data_filter_factory: GroupFactory,
 32        event_dispatcher: EventDispatcher,
 33        *,
 34        data_parent_name: str=None,
 35        required: bool=True) -> Union[Tuple[DataSubsetConfig, str], Tuple[None, None]]:
 36    """Parse a data subset configuration.
 37
 38    Args:
 39        data_subset_config: the data subset configuration.
 40        data_registry: the data registry containing the available datasets.
 41        data_filter_factory: factory with available dataset-matrix filter factories.
 42        event_dispatcher: to dispatch the parse event on failure.
 43        data_parent_name: the data parent name related to the data subset.
 44        required: whether parsing the subset is required to succeed.
 45
 46    Returns:
 47        parsed_config: the parsed configuration or None on failure.
 48        dataset_name: the name of the parsed dataset or None on failure.
 49    """
 50    # assert dataset name is present
 51    if not assert_is_key_in_dict(
 52        KEY_DATASET,
 53        data_subset_config,
 54        event_dispatcher,
 55        'PARSE ERROR: missing key \'' + KEY_DATASET + '\' (required)' if required else ''
 56    ): return None, None
 57
 58    dataset_name = data_subset_config[KEY_DATASET]
 59    parse_err = 'PARSE ERROR: ' + (data_parent_name + ' ' if data_parent_name else '')
 60
 61    # assert dataset name is available in the data registry
 62    if not assert_is_one_of_list(
 63        dataset_name,
 64        data_registry.get_available_sets(),
 65        event_dispatcher,
 66        parse_err + 'unknown dataset name \'' + str(dataset_name) + '\''
 67    ): return None, dataset_name
 68
 69    dataset = data_registry.get_set(dataset_name)
 70
 71    # assert dataset matrix name is present
 72    if not assert_is_key_in_dict(
 73        KEY_MATRIX,
 74        data_subset_config,
 75        event_dispatcher,
 76        parse_err + 'dataset \'' + dataset_name + '\' missing key \'' + KEY_MATRIX + '\' (required)'
 77    ): return None, dataset_name
 78
 79    dataset_matrix = data_subset_config[KEY_MATRIX]
 80
 81    # assert matrix name is available in the dataset
 82    if not assert_is_one_of_list(
 83        dataset_matrix,
 84        dataset.get_available_matrices(),
 85        event_dispatcher,
 86        parse_err + 'unknown matrix \'' + str(dataset_matrix) + '\''
 87    ): return None, dataset_name + ' ' + str(dataset_matrix)
 88
 89    # parse dataset filter passes
 90
 91    dataset_filter_passes = parse_data_filter_passes(
 92        dataset_name + ' ' + dataset_matrix,
 93        data_subset_config,
 94        (dataset, dataset_matrix),
 95        data_filter_factory,
 96        event_dispatcher
 97    )
 98
 99    parsed_config = DataSubsetConfig(
100        dataset_name,
101        dataset_matrix,
102        dataset_filter_passes
103    )
104
105    return parsed_config, dataset_name + ' ' + dataset_matrix
106
107
108def parse_data_filter_passes(
109        data_parent_name: str,
110        data_parent_config: Dict[str, Any],
111        dataset_pair: Tuple[Dataset, str],
112        filter_factory: GroupFactory,
113        event_dispatcher: EventDispatcher) -> List[FilterPassConfig]:
114    """Parse a list of filter pass configurations.
115
116    Args:
117        data_parent_name: the parent name related to the filter passes that are being parsed.
118        data_parent_config: the parent configuration to parse the filter passes from.
119        dataset_pair: a pair consisting of the dataset and the matrix name.
120        filter_factory: the filter factory containing available filters for the dataset.
121        event_dispatcher: to dispatch the parse event on failure.
122
123    Returns:
124        a list of parsed subgroup configurations.
125    """
126    # filter passes are not mandatory
127    if KEY_DATA_SUBSET not in data_parent_config:
128        return []
129
130    filter_passes_config = data_parent_config[KEY_DATA_SUBSET]
131
132    # assert filter_passes_config is a list
133    if not assert_is_type(
134        filter_passes_config,
135        list,
136        event_dispatcher,
137        'PARSE WARNING: ' + data_parent_name + ' invalid \'' + KEY_DATA_SUBSET + '\' value'
138    ): return []
139
140    filter_passes = []
141    # attempt to parse each filter pass
142    for filter_pass_config in filter_passes_config:
143        parsed_filter_pass = parse_data_filter_pass_config(
144            data_parent_name,
145            filter_pass_config,
146            dataset_pair,
147            filter_factory,
148            event_dispatcher
149        )
150        # skip on failure
151        if parsed_filter_pass is None:
152            continue
153
154        filter_passes.append(parsed_filter_pass)
155
156    return filter_passes
157
158
159def parse_data_filter_pass_config(
160        parent_filter_name: str,
161        filter_pass_config: Any,
162        dataset_pair: Tuple[Dataset, str],
163        filter_factory: GroupFactory,
164        event_dispatcher: EventDispatcher) -> Optional[FilterPassConfig]:
165    """Parse data filter pass configuration to multiple filters.
166
167    Args:
168        parent_filter_name: the filter parent name related to the filter pass that is being parsed.
169        filter_pass_config: the filter pass configuration to parse.
170        dataset_pair: a pair consisting of the dataset and the matrix name.
171        filter_factory: the filter factory containing available filters for the dataset.
172        event_dispatcher: to dispatch the parse event on failure.
173
174    Returns:
175        the parsed configuration or None on failure.
176    """
177    # assert filter_pass_config is a dict
178    if not assert_is_type(
179        filter_pass_config,
180        dict,
181        event_dispatcher,
182        'PARSE WARNING: ' + parent_filter_name + ' invalid ' + KEY_DATA_FILTER_PASS + ' value'
183    ): return None
184
185    # assert KEY_DATA_FILTER_PASS is present
186    if not assert_is_key_in_dict(
187        KEY_DATA_FILTER_PASS,
188        filter_pass_config,
189        event_dispatcher,
190        'PARSE WARNING: ' + parent_filter_name + ' missing key \'' + KEY_DATA_FILTER_PASS + '\''
191    ): return None
192
193    filter_config = filter_pass_config[KEY_DATA_FILTER_PASS]
194
195    dataset_filter_factory = filter_factory.get_factory(dataset_pair[0].get_name())
196    matrix_filter_factory = dataset_filter_factory.get_factory(dataset_pair[1])
197
198    # parse filter configurations as objects
199    parsed_config_objs = parse_config_object_list(
200        parent_filter_name,
201        KEY_DATA_FILTER_PASS,
202        filter_config,
203        matrix_filter_factory,
204        event_dispatcher
205    )
206
207    # convert object to filter configurations
208    filter_config_list = []
209    for (filter_config, _) in parsed_config_objs:
210        filter_config_list.append(FilterConfig(
211            filter_config.name,
212            filter_config.params,
213        ))
214
215    # assert filter pass has entries available
216    if not assert_is_container_not_empty(
217        filter_config_list,
218        event_dispatcher,
219        'PARSE WARNING: ' + parent_filter_name + ' has no filters, skipping...'
220    ): return None
221
222    return FilterPassConfig(filter_config_list)
def parse_data_subset_config( data_subset_config: Dict[str, Any], data_registry: src.fairreckitlib.data.set.dataset_registry.DataRegistry, data_filter_factory: src.fairreckitlib.core.config.config_factories.GroupFactory, event_dispatcher: src.fairreckitlib.core.events.event_dispatcher.EventDispatcher, *, data_parent_name: str = None, required: bool = True) -> Union[Tuple[src.fairreckitlib.data.filter.filter_config.DataSubsetConfig, str], Tuple[NoneType, NoneType]]:
 29def parse_data_subset_config(
 30        data_subset_config: Dict[str, Any],
 31        data_registry: DataRegistry,
 32        data_filter_factory: GroupFactory,
 33        event_dispatcher: EventDispatcher,
 34        *,
 35        data_parent_name: str=None,
 36        required: bool=True) -> Union[Tuple[DataSubsetConfig, str], Tuple[None, None]]:
 37    """Parse a data subset configuration.
 38
 39    Args:
 40        data_subset_config: the data subset configuration.
 41        data_registry: the data registry containing the available datasets.
 42        data_filter_factory: factory with available dataset-matrix filter factories.
 43        event_dispatcher: to dispatch the parse event on failure.
 44        data_parent_name: the data parent name related to the data subset.
 45        required: whether parsing the subset is required to succeed.
 46
 47    Returns:
 48        parsed_config: the parsed configuration or None on failure.
 49        dataset_name: the name of the parsed dataset or None on failure.
 50    """
 51    # assert dataset name is present
 52    if not assert_is_key_in_dict(
 53        KEY_DATASET,
 54        data_subset_config,
 55        event_dispatcher,
 56        'PARSE ERROR: missing key \'' + KEY_DATASET + '\' (required)' if required else ''
 57    ): return None, None
 58
 59    dataset_name = data_subset_config[KEY_DATASET]
 60    parse_err = 'PARSE ERROR: ' + (data_parent_name + ' ' if data_parent_name else '')
 61
 62    # assert dataset name is available in the data registry
 63    if not assert_is_one_of_list(
 64        dataset_name,
 65        data_registry.get_available_sets(),
 66        event_dispatcher,
 67        parse_err + 'unknown dataset name \'' + str(dataset_name) + '\''
 68    ): return None, dataset_name
 69
 70    dataset = data_registry.get_set(dataset_name)
 71
 72    # assert dataset matrix name is present
 73    if not assert_is_key_in_dict(
 74        KEY_MATRIX,
 75        data_subset_config,
 76        event_dispatcher,
 77        parse_err + 'dataset \'' + dataset_name + '\' missing key \'' + KEY_MATRIX + '\' (required)'
 78    ): return None, dataset_name
 79
 80    dataset_matrix = data_subset_config[KEY_MATRIX]
 81
 82    # assert matrix name is available in the dataset
 83    if not assert_is_one_of_list(
 84        dataset_matrix,
 85        dataset.get_available_matrices(),
 86        event_dispatcher,
 87        parse_err + 'unknown matrix \'' + str(dataset_matrix) + '\''
 88    ): return None, dataset_name + ' ' + str(dataset_matrix)
 89
 90    # parse dataset filter passes
 91
 92    dataset_filter_passes = parse_data_filter_passes(
 93        dataset_name + ' ' + dataset_matrix,
 94        data_subset_config,
 95        (dataset, dataset_matrix),
 96        data_filter_factory,
 97        event_dispatcher
 98    )
 99
100    parsed_config = DataSubsetConfig(
101        dataset_name,
102        dataset_matrix,
103        dataset_filter_passes
104    )
105
106    return parsed_config, dataset_name + ' ' + dataset_matrix

Parse a data subset configuration.

Args: data_subset_config: the data subset configuration. data_registry: the data registry containing the available datasets. data_filter_factory: factory with available dataset-matrix filter factories. event_dispatcher: to dispatch the parse event on failure. data_parent_name: the data parent name related to the data subset. required: whether parsing the subset is required to succeed.

Returns: parsed_config: the parsed configuration or None on failure. dataset_name: the name of the parsed dataset or None on failure.

def parse_data_filter_passes( data_parent_name: str, data_parent_config: Dict[str, Any], dataset_pair: Tuple[src.fairreckitlib.data.set.dataset.Dataset, str], filter_factory: src.fairreckitlib.core.config.config_factories.GroupFactory, event_dispatcher: src.fairreckitlib.core.events.event_dispatcher.EventDispatcher) -> List[src.fairreckitlib.data.filter.filter_config.FilterPassConfig]:
109def parse_data_filter_passes(
110        data_parent_name: str,
111        data_parent_config: Dict[str, Any],
112        dataset_pair: Tuple[Dataset, str],
113        filter_factory: GroupFactory,
114        event_dispatcher: EventDispatcher) -> List[FilterPassConfig]:
115    """Parse a list of filter pass configurations.
116
117    Args:
118        data_parent_name: the parent name related to the filter passes that are being parsed.
119        data_parent_config: the parent configuration to parse the filter passes from.
120        dataset_pair: a pair consisting of the dataset and the matrix name.
121        filter_factory: the filter factory containing available filters for the dataset.
122        event_dispatcher: to dispatch the parse event on failure.
123
124    Returns:
125        a list of parsed subgroup configurations.
126    """
127    # filter passes are not mandatory
128    if KEY_DATA_SUBSET not in data_parent_config:
129        return []
130
131    filter_passes_config = data_parent_config[KEY_DATA_SUBSET]
132
133    # assert filter_passes_config is a list
134    if not assert_is_type(
135        filter_passes_config,
136        list,
137        event_dispatcher,
138        'PARSE WARNING: ' + data_parent_name + ' invalid \'' + KEY_DATA_SUBSET + '\' value'
139    ): return []
140
141    filter_passes = []
142    # attempt to parse each filter pass
143    for filter_pass_config in filter_passes_config:
144        parsed_filter_pass = parse_data_filter_pass_config(
145            data_parent_name,
146            filter_pass_config,
147            dataset_pair,
148            filter_factory,
149            event_dispatcher
150        )
151        # skip on failure
152        if parsed_filter_pass is None:
153            continue
154
155        filter_passes.append(parsed_filter_pass)
156
157    return filter_passes

Parse a list of filter pass configurations.

Args: data_parent_name: the parent name related to the filter passes that are being parsed. data_parent_config: the parent configuration to parse the filter passes from. dataset_pair: a pair consisting of the dataset and the matrix name. filter_factory: the filter factory containing available filters for the dataset. event_dispatcher: to dispatch the parse event on failure.

Returns: a list of parsed subgroup configurations.

def parse_data_filter_pass_config( parent_filter_name: str, filter_pass_config: Any, dataset_pair: Tuple[src.fairreckitlib.data.set.dataset.Dataset, str], filter_factory: src.fairreckitlib.core.config.config_factories.GroupFactory, event_dispatcher: src.fairreckitlib.core.events.event_dispatcher.EventDispatcher) -> Optional[src.fairreckitlib.data.filter.filter_config.FilterPassConfig]:
160def parse_data_filter_pass_config(
161        parent_filter_name: str,
162        filter_pass_config: Any,
163        dataset_pair: Tuple[Dataset, str],
164        filter_factory: GroupFactory,
165        event_dispatcher: EventDispatcher) -> Optional[FilterPassConfig]:
166    """Parse data filter pass configuration to multiple filters.
167
168    Args:
169        parent_filter_name: the filter parent name related to the filter pass that is being parsed.
170        filter_pass_config: the filter pass configuration to parse.
171        dataset_pair: a pair consisting of the dataset and the matrix name.
172        filter_factory: the filter factory containing available filters for the dataset.
173        event_dispatcher: to dispatch the parse event on failure.
174
175    Returns:
176        the parsed configuration or None on failure.
177    """
178    # assert filter_pass_config is a dict
179    if not assert_is_type(
180        filter_pass_config,
181        dict,
182        event_dispatcher,
183        'PARSE WARNING: ' + parent_filter_name + ' invalid ' + KEY_DATA_FILTER_PASS + ' value'
184    ): return None
185
186    # assert KEY_DATA_FILTER_PASS is present
187    if not assert_is_key_in_dict(
188        KEY_DATA_FILTER_PASS,
189        filter_pass_config,
190        event_dispatcher,
191        'PARSE WARNING: ' + parent_filter_name + ' missing key \'' + KEY_DATA_FILTER_PASS + '\''
192    ): return None
193
194    filter_config = filter_pass_config[KEY_DATA_FILTER_PASS]
195
196    dataset_filter_factory = filter_factory.get_factory(dataset_pair[0].get_name())
197    matrix_filter_factory = dataset_filter_factory.get_factory(dataset_pair[1])
198
199    # parse filter configurations as objects
200    parsed_config_objs = parse_config_object_list(
201        parent_filter_name,
202        KEY_DATA_FILTER_PASS,
203        filter_config,
204        matrix_filter_factory,
205        event_dispatcher
206    )
207
208    # convert object to filter configurations
209    filter_config_list = []
210    for (filter_config, _) in parsed_config_objs:
211        filter_config_list.append(FilterConfig(
212            filter_config.name,
213            filter_config.params,
214        ))
215
216    # assert filter pass has entries available
217    if not assert_is_container_not_empty(
218        filter_config_list,
219        event_dispatcher,
220        'PARSE WARNING: ' + parent_filter_name + ' has no filters, skipping...'
221    ): return None
222
223    return FilterPassConfig(filter_config_list)

Parse data filter pass configuration to multiple filters.

Args: parent_filter_name: the filter parent name related to the filter pass that is being parsed. filter_pass_config: the filter pass configuration to parse. dataset_pair: a pair consisting of the dataset and the matrix name. filter_factory: the filter factory containing available filters for the dataset. event_dispatcher: to dispatch the parse event on failure.

Returns: the parsed configuration or None on failure.