src.fairreckitlib.data.split.split_config_parsing

This module contains a parser for the dataset splitting configuration.

Functions:

parse_data_split_config: parse split configuration.

This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)

 1"""This module contains a parser for the dataset splitting configuration.
 2
 3Functions:
 4
 5    parse_data_split_config: parse split configuration.
 6
 7This program has been developed by students from the bachelor Computer Science at
 8Utrecht University within the Software Project course.
 9© Copyright Utrecht University (Department of Information and Computing Sciences)
10"""
11
12from typing import Any, Dict
13
14from ...core.config.config_factories import Factory
15from ...core.config.config_value_param import ConfigNumberParam
16from ...core.events.event_dispatcher import EventDispatcher
17from ...core.parsing.parse_config_object import parse_config_object
18from ...core.parsing.parse_config_params import parse_config_param
19from ...core.parsing.parse_event import ON_PARSE, ParseEventArgs
20from ..set.dataset import Dataset
21from .split_config import SplitConfig, create_default_split_config
22from .split_constants import KEY_SPLITTING, KEY_SPLIT_TEST_RATIO
23from .split_constants import DEFAULT_SPLIT_TEST_RATIO, MIN_TEST_RATIO, MAX_TEST_RATIO
24
25
26def parse_data_split_config(
27        dataset_config: Dict[str, Any],
28        dataset: Dataset,
29        matrix_name: str,
30        split_factory: Factory,
31        event_dispatcher: EventDispatcher) -> SplitConfig:
32    """Parse a dataset splitting configuration.
33
34    Args:
35        dataset_config: the dataset's total configuration.
36        dataset: the dataset related to the splitting configuration.
37        matrix_name: the dataset's matrix name that is used.
38        split_factory: the split factory containing available splitters.
39        event_dispatcher: to dispatch the parse event on failure.
40
41    Returns:
42        the parsed configuration or None on failure.
43    """
44    default_config = create_default_split_config()
45
46    # dataset splitting is optional
47    if KEY_SPLITTING not in dataset_config:
48        event_dispatcher.dispatch(ParseEventArgs(
49            ON_PARSE,
50            'PARSE WARNING: dataset ' + dataset.get_name() + ' \'' + matrix_name +
51            '\' missing key \'' + KEY_SPLITTING + '\'',
52            default_value=default_config
53        ))
54        return default_config
55
56    split_config = dataset_config[KEY_SPLITTING]
57
58    splitter, _ = parse_config_object(
59        'dataset ' + dataset.get_name() + ' \'' + matrix_name + '\' splitter',
60        split_config,
61        split_factory,
62        event_dispatcher,
63        default_config=default_config
64    )
65    if not bool(splitter):
66        return default_config
67
68    # parse splitting test ratio
69    _, test_ratio = parse_config_param(
70        split_config,
71        'dataset ' + dataset.get_name() + ' \'' + matrix_name +
72        '\' splitter \'' + splitter.name + '\'',
73        ConfigNumberParam(
74            KEY_SPLIT_TEST_RATIO,
75            float,
76            DEFAULT_SPLIT_TEST_RATIO,
77            (MIN_TEST_RATIO, MAX_TEST_RATIO)
78        ),
79        event_dispatcher
80    )
81
82    return SplitConfig(splitter.name, splitter.params, test_ratio)
def parse_data_split_config( dataset_config: Dict[str, Any], dataset: src.fairreckitlib.data.set.dataset.Dataset, matrix_name: str, split_factory: src.fairreckitlib.core.config.config_factories.Factory, event_dispatcher: src.fairreckitlib.core.events.event_dispatcher.EventDispatcher) -> src.fairreckitlib.data.split.split_config.SplitConfig:
27def parse_data_split_config(
28        dataset_config: Dict[str, Any],
29        dataset: Dataset,
30        matrix_name: str,
31        split_factory: Factory,
32        event_dispatcher: EventDispatcher) -> SplitConfig:
33    """Parse a dataset splitting configuration.
34
35    Args:
36        dataset_config: the dataset's total configuration.
37        dataset: the dataset related to the splitting configuration.
38        matrix_name: the dataset's matrix name that is used.
39        split_factory: the split factory containing available splitters.
40        event_dispatcher: to dispatch the parse event on failure.
41
42    Returns:
43        the parsed configuration or None on failure.
44    """
45    default_config = create_default_split_config()
46
47    # dataset splitting is optional
48    if KEY_SPLITTING not in dataset_config:
49        event_dispatcher.dispatch(ParseEventArgs(
50            ON_PARSE,
51            'PARSE WARNING: dataset ' + dataset.get_name() + ' \'' + matrix_name +
52            '\' missing key \'' + KEY_SPLITTING + '\'',
53            default_value=default_config
54        ))
55        return default_config
56
57    split_config = dataset_config[KEY_SPLITTING]
58
59    splitter, _ = parse_config_object(
60        'dataset ' + dataset.get_name() + ' \'' + matrix_name + '\' splitter',
61        split_config,
62        split_factory,
63        event_dispatcher,
64        default_config=default_config
65    )
66    if not bool(splitter):
67        return default_config
68
69    # parse splitting test ratio
70    _, test_ratio = parse_config_param(
71        split_config,
72        'dataset ' + dataset.get_name() + ' \'' + matrix_name +
73        '\' splitter \'' + splitter.name + '\'',
74        ConfigNumberParam(
75            KEY_SPLIT_TEST_RATIO,
76            float,
77            DEFAULT_SPLIT_TEST_RATIO,
78            (MIN_TEST_RATIO, MAX_TEST_RATIO)
79        ),
80        event_dispatcher
81    )
82
83    return SplitConfig(splitter.name, splitter.params, test_ratio)

Parse a dataset splitting configuration.

Args: dataset_config: the dataset's total configuration. dataset: the dataset related to the splitting configuration. matrix_name: the dataset's matrix name that is used. split_factory: the split factory containing available splitters. event_dispatcher: to dispatch the parse event on failure.

Returns: the parsed configuration or None on failure.