src.fairreckitlib.data.set.dataset_config_parser

This module contains the parser for the dataset configuration and parser utility functions.

Classes:

DatasetParser: dataset configuration parser.

Functions:

parse_file_name: parse a file name from a configuration and verify existence on disk.
parse_float: parse floating-point value from a configuration.
parse_int: parse integer value from a configuration.
parse_optional_bool: parse optional boolean value from a configuration.
parse_optional_string: parse optional string value from a configuration.
parse_rating_matrix: parse rating matrix configuration.
parse_string: parse a string value from a configuration.
parse_string_list: parse a list of strings from a configuration.

This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)

  1"""This module contains the parser for the dataset configuration and parser utility functions.
  2
  3Classes:
  4
  5    DatasetParser: dataset configuration parser.
  6
  7Functions:
  8
  9    parse_file_name: parse a file name from a configuration and verify existence on disk.
 10    parse_float: parse floating-point value from a configuration.
 11    parse_int: parse integer value from a configuration.
 12    parse_optional_bool: parse optional boolean value from a configuration.
 13    parse_optional_string: parse optional string value from a configuration.
 14    parse_rating_matrix: parse rating matrix configuration.
 15    parse_string: parse a string value from a configuration.
 16    parse_string_list: parse a list of strings from a configuration.
 17
 18This program has been developed by students from the bachelor Computer Science at
 19Utrecht University within the Software Project course.
 20© Copyright Utrecht University (Department of Information and Computing Sciences)
 21"""
 22import os.path
 23from typing import Any, Dict, List, Optional, Tuple
 24
 25from ...core.core_constants import KEY_NAME
 26from ...core.events.event_dispatcher import EventDispatcher
 27from ...core.io.io_utility import load_yml
 28from ...core.parsing.parse_assert import \
 29    assert_is_type, assert_is_key_in_dict, assert_is_one_of_list
 30from ...core.parsing.parse_event import ON_PARSE, ParseEventArgs, print_parse_event
 31from .dataset_constants import KEY_DATASET, KEY_EVENTS, KEY_MATRICES, KEY_TABLES
 32from .dataset_constants import KEY_MATRIX, KEY_IDX_ITEM, KEY_IDX_USER
 33from .dataset_constants import KEY_RATING_MIN, KEY_RATING_MAX, KEY_RATING_TYPE
 34from .dataset_constants import TABLE_KEY, TABLE_PRIMARY_KEY, TABLE_FOREIGN_KEYS, TABLE_COLUMNS
 35from .dataset_constants import TABLE_FILE, TABLE_COMPRESSION, TABLE_ENCODING
 36from .dataset_constants import TABLE_HEADER, TABLE_NUM_RECORDS, TABLE_SEP
 37from .dataset_config import DatasetIndexConfig, DatasetMatrixConfig, RatingMatrixConfig
 38from .dataset_config import DatasetConfig, DatasetFileConfig, DatasetTableConfig, FileOptionsConfig
 39from .dataset_config import DATASET_RATINGS_EXPLICIT, DATASET_RATINGS_IMPLICIT
 40
 41VALID_SEPARATORS = [',', '|']
 42VALID_COMPRESSIONS = ['bz2']
 43VALID_ENCODINGS = ['utf-8', 'ISO-8859-1']
 44
 45class DatasetConfigParser:
 46    """Dataset Configuration Parser.
 47
 48    Public methods:
 49
 50    parse_dataset_config
 51    parse_dataset_config_from_yml
 52    """
 53
 54    def __init__(self, verbose: bool):
 55        """Construct the DatasetConfigParser.
 56
 57        Args:
 58            verbose: whether the parser should give verbose output.
 59        """
 60        self.verbose = verbose
 61        handle_parse_event = lambda parser, args: \
 62            print_parse_event(args) if parser.verbose else None
 63
 64        self.event_dispatcher = EventDispatcher()
 65        self.event_dispatcher.add_listener(ON_PARSE, self, (handle_parse_event, None))
 66
 67    def parse_dataset_config(
 68            self,
 69            data_dir: str,
 70            dataset_config: Dict[str, Any],
 71            available_datasets: List[str]) -> Optional[DatasetConfig]:
 72        """Parse a dataset configuration.
 73
 74        Args:
 75            data_dir: the directory where the dataset is stored.
 76            dataset_config: the dataset configuration.
 77            available_datasets: a list of already available datasets.
 78
 79        Returns:
 80            the parsed configuration or None on failure.
 81        """
 82        # attempt to parse the name of the dataset
 83        dataset_name = parse_string(
 84            dataset_config,
 85            KEY_DATASET,
 86            self.event_dispatcher
 87        )
 88        if dataset_name is None:
 89            return None
 90
 91        # verify that the dataset name is not already present
 92        if dataset_name in available_datasets:
 93            self.event_dispatcher.dispatch(ParseEventArgs(
 94                ON_PARSE,
 95                'PARSE ERROR: dataset already exists: ' + dataset_name
 96            ))
 97            return None
 98
 99        # attempt to parse the dataset (event) tables
100        events = self.parse_dataset_events(data_dir, dataset_config)
101
102        # attempt to parse the dataset (matrix) tables
103        matrices = self.parse_dataset_matrices(data_dir, dataset_config)
104
105        # attempt to parse the dataset (other) tables
106        tables = self.parse_dataset_tables(data_dir, dataset_config)
107
108        return DatasetConfig(
109            dataset_name,
110            events,
111            matrices,
112            tables
113        )
114
115    def parse_dataset_config_from_yml(
116            self,
117            data_dir: str,
118            file_name: str,
119            available_datasets: List[str]) -> Optional[DatasetConfig]:
120        """Parse a dataset configuration.
121
122        Args:
123            data_dir: the directory where the dataset is stored.
124            file_name: the name of the yml file with extension.
125            available_datasets: a list of already available datasets.
126
127        Returns:
128            the parsed configuration or None on failure.
129        """
130        return self.parse_dataset_config(
131            data_dir,
132            load_yml(os.path.join(data_dir, file_name)),
133            available_datasets
134        )
135
136    def parse_dataset_events(
137            self,
138            data_dir: str,
139            dataset_config: Dict[str, Any],) -> Dict[str, DatasetTableConfig]:
140        """Parse dataset event tables from the configuration.
141
142        Args:
143            data_dir: the directory where the dataset is stored.
144            dataset_config: the dataset configuration.
145
146        Returns:
147            a dictionary with parsed event table configurations.
148        """
149        events = {}
150        if dataset_config.get(KEY_EVENTS) is not None:
151            if assert_is_type(
152                dataset_config[KEY_EVENTS],
153                dict,
154                self.event_dispatcher,
155                'PARSE WARNING: dataset events invalid value'
156            ):
157                for table_name, table_config in dataset_config[KEY_EVENTS].items():
158                    config = self.parse_dataset_table_config(data_dir, table_config)
159                    if config is None:
160                        continue
161
162                    events[table_name] = config
163
164        return events
165
166    def parse_dataset_matrices(
167            self,
168            data_dir: str,
169            dataset_config: Dict[str, Any]) -> Dict[str, DatasetMatrixConfig]:
170        """Parse dataset matrices from the configuration.
171
172        Args:
173            data_dir: the directory where the dataset is stored.
174            dataset_config: the dataset configuration.
175
176        Returns:
177            a dictionary with parsed matrix configurations.
178        """
179        matrices = {}
180        if dataset_config.get(KEY_MATRICES) is not None:
181            if assert_is_type(
182                dataset_config[KEY_MATRICES],
183                dict,
184                self.event_dispatcher,
185                'PARSE WARNING: dataset matrices invalid value'
186            ):
187                for matrix_name, matrix_config in dataset_config[KEY_MATRICES].items():
188                    config = self.parse_dataset_matrix_config(
189                        data_dir,
190                        matrix_config
191                    )
192                    if config is None:
193                        continue
194
195                    matrices[matrix_name] = config
196
197        return matrices
198
199    def parse_dataset_tables(
200            self,
201            data_dir: str,
202            dataset_config: Dict[str, Any],) -> Dict[str, DatasetTableConfig]:
203        """Parse dataset tables from the configuration.
204
205        Args:
206            data_dir: the directory where the dataset is stored.
207            dataset_config: the dataset configuration.
208
209        Returns:
210            a dictionary with parsed table configurations.
211        """
212        tables = {}
213        if dataset_config.get(KEY_TABLES) is not None:
214            if assert_is_type(
215                dataset_config[KEY_TABLES],
216                dict,
217                self.event_dispatcher,
218                'PARSE WARNING: dataset tables invalid value'
219            ):
220                for table_name, table_config in dataset_config[KEY_TABLES].items():
221                    config = self.parse_dataset_table_config(data_dir, table_config)
222                    if config is None:
223                        continue
224
225                    tables[table_name] = config
226
227        return tables
228
229    def parse_file_options_config(
230            self,
231            file_config: Dict[str, Any]) -> Optional[FileOptionsConfig]:
232        """Parse a dataset file configuration.
233
234        Args:
235            file_config: the dataset file configuration.
236
237        Returns:
238            the parsed configuration or None on failure.
239        """
240        # attempt to parse the optional separator string
241        success, file_sep = parse_optional_string(
242            file_config,
243            TABLE_SEP,
244            VALID_SEPARATORS,
245            self.event_dispatcher
246        )
247        if not success:
248            return None
249
250        # attempt to parse the optional compression string
251        success, file_compression = parse_optional_string(
252            file_config,
253            TABLE_COMPRESSION,
254            VALID_COMPRESSIONS,
255            self.event_dispatcher
256        )
257        if not success:
258            return None
259
260        # attempt to parse the optional encoding string
261        success, file_encoding = parse_optional_string(
262            file_config,
263            TABLE_ENCODING,
264            VALID_ENCODINGS,
265            self.event_dispatcher
266        )
267        if not success:
268            return None
269
270        # attempt to parse the optional header boolean
271        success, file_header = parse_optional_bool(
272            file_config,
273            TABLE_HEADER,
274            self.event_dispatcher
275        )
276        if not success:
277            return None
278
279        return FileOptionsConfig(
280            file_sep,
281            file_compression,
282            file_encoding,
283            file_header
284        )
285
286    def parse_dataset_file_config(
287            self,
288            data_dir: str,
289            file_config: Dict[str, Any]) -> Optional[DatasetFileConfig]:
290        """Parse a dataset file configuration.
291
292        Args:
293            data_dir: the directory where the file is stored.
294            file_config: the dataset file configuration.
295
296        Returns:
297            the parsed configuration or None on failure.
298        """
299        # attempt to parse the (required) file name
300        success, file_name = parse_file_name(
301            data_dir,
302            file_config,
303            KEY_NAME,
304            self.event_dispatcher
305        )
306        if not success:
307            return None
308
309        # attempt to parse the file options
310        file_options = self.parse_file_options_config(file_config)
311        if file_options is None:
312            return None
313
314        return DatasetFileConfig(file_name, file_options)
315
316    def parse_dataset_index_config(
317            self,
318            data_dir: str,
319            index_config: Dict[str, Any]) -> Optional[DatasetIndexConfig]:
320        """Parse a dataset matrix' user/item index configuration.
321
322        Args:
323            data_dir: the directory where the file is stored.
324            index_config: the dataset matrix index configuration.
325
326        Returns:
327            the parsed configuration or None on failure.
328        """
329        # attempt to parse (optional) file name
330        success, file_name = parse_file_name(
331            data_dir,
332            index_config,
333            TABLE_FILE,
334            self.event_dispatcher,
335            required=False
336        )
337        if not success:
338            return None
339
340        # attempt to parse the key that is associated with the index
341        file_key = parse_string(
342            index_config,
343            TABLE_KEY,
344            self.event_dispatcher
345        )
346        if file_key is None:
347            return None
348
349        # attempt to parse the number of records in the file
350        num_records = parse_int(
351            index_config,
352            TABLE_NUM_RECORDS,
353            self.event_dispatcher
354        )
355        if num_records is None:
356            return None
357
358        return DatasetIndexConfig(file_name, file_key, num_records)
359
360    def parse_dataset_matrix_config(
361            self,
362            data_dir: str,
363            matrix_config: Dict[str, Any]) -> Optional[DatasetMatrixConfig]:
364        """Parse a dataset matrix configuration.
365
366        Args:
367            data_dir: the directory where the dataset matrix is stored.
368            matrix_config: the dataset matrix configuration.
369
370        Returns:
371            the parsed configuration or None on failure.
372        """
373        # attempt to parse the matrix table
374        matrix_table = self.parse_dataset_table_config(data_dir,
375                                                       matrix_config.get(KEY_MATRIX, {}))
376        if matrix_table is None:
377            return None
378
379        # attempt to parse the matrix users
380        matrix_users = self.parse_dataset_index_config(data_dir,
381                                                       matrix_config.get(KEY_IDX_USER, {}))
382        if matrix_users is None:
383            return None
384
385        # attempt to parse the matrix items
386        matrix_items = self.parse_dataset_index_config(data_dir,
387                                                       matrix_config.get(KEY_IDX_ITEM, {}))
388        if matrix_items is None:
389            return None
390
391        # attempt to parse the matrix ratings
392        matrix_ratings = parse_rating_matrix(
393            matrix_config,
394            self.event_dispatcher
395        )
396        if matrix_ratings is None:
397            return None
398
399        return DatasetMatrixConfig(
400            matrix_table,
401            matrix_ratings,
402            matrix_users,
403            matrix_items
404        )
405
406    def parse_dataset_table_config(
407            self,
408            data_dir: str,
409            table_config: Dict[str, Any]) -> Optional[DatasetTableConfig]:
410        """Parse a dataset table configuration.
411
412        Args:
413            data_dir: the directory where the table is stored.
414            table_config: the dataset table configuration.
415
416        Returns:
417            the parsed configuration or None on failure.
418        """
419        file_config = self.parse_dataset_file_config(data_dir, table_config.get(TABLE_FILE, {}))
420        if file_config is None:
421            return None
422
423        table_primary_key = parse_string_list(
424            table_config,
425            TABLE_PRIMARY_KEY,
426            1,
427            self.event_dispatcher
428        )
429        if table_primary_key is None:
430            return None
431
432        table_foreign_keys = None
433        if TABLE_FOREIGN_KEYS in table_config:
434            table_foreign_keys = parse_string_list(
435                table_config,
436                TABLE_FOREIGN_KEYS,
437                0,
438                self.event_dispatcher
439            )
440
441        table_columns = parse_string_list(
442            table_config,
443            TABLE_COLUMNS,
444            1,
445            self.event_dispatcher
446        )
447        if table_columns is None:
448            return None
449
450        table_num_records = parse_int(
451            table_config,
452            TABLE_NUM_RECORDS,
453            self.event_dispatcher
454        )
455        if table_num_records is None:
456            return None
457
458        return DatasetTableConfig(
459            table_primary_key,
460            table_foreign_keys,
461            table_columns,
462            table_num_records,
463            file_config
464        )
465
466
467def parse_file_name(
468        data_dir: str,
469        file_config: Dict[str, Any],
470        file_key: str,
471        event_dispatcher: EventDispatcher,
472        *,
473        required: bool=True) -> Tuple[bool, Optional[str]]:
474    """Parse the file name from the configuration.
475
476    In addition, when the file name is parsed correctly it is checked
477    for existence in the specified data directory.
478
479    Args:
480        data_dir: the directory where the file is stored.
481        file_config: the configuration dictionary to parse from.
482        file_key: the key in the configuration that contains the file name.
483        event_dispatcher: to dispatch the parse event on failure.
484        required: whether the parsing is required to succeed.
485
486    Returns:
487        whether the parsing succeeded and the parsed file name or None on failure.
488    """
489    if required and not assert_is_key_in_dict(
490        file_key,
491        file_config,
492        event_dispatcher,
493        'PARSE ERROR: file configuration missing key \'' + file_key + '\''
494    ): return False, None
495
496    file_name = file_config.get(file_key)
497    if required and file_name is None:
498        event_dispatcher.dispatch(ParseEventArgs(
499            ON_PARSE,
500            'PARSE ERROR: file configuration missing value for \'' + file_key + '\''
501        ))
502        return False, None
503
504    if file_name is not None:
505        if not assert_is_type(
506            file_name,
507            str,
508            event_dispatcher,
509            'PARSE ERROR: file configuration contains invalid name'
510        ): return False, None
511
512        file_path = os.path.join(data_dir, file_name)
513        if not os.path.isfile(file_path):
514            event_dispatcher.dispatch(ParseEventArgs(
515                ON_PARSE,
516                'PARSE ERROR: file configuration file name does not exist: ' + file_path
517            ))
518            return False, None
519
520    return True, file_name
521
522
523def parse_float(
524        config: Dict[str, Any],
525        float_key: str,
526        event_dispatcher: EventDispatcher) -> Optional[float]:
527    """Parse a float-point value from the configuration.
528
529    Args:
530        config: the configuration dictionary to parse from.
531        float_key: the key in the configuration that contains the float-point value.
532        event_dispatcher: to dispatch the parse event on failure.
533
534    Returns:
535        the parsed float-point value or None on failure.
536    """
537    if not assert_is_key_in_dict(
538        float_key,
539        config,
540        event_dispatcher,
541        'PARSE ERROR: configuration contains invalid \'' + float_key + '\' value'
542    ): return None
543
544    float_value = config[float_key]
545
546    if not assert_is_type(
547        float_value,
548        float,
549        event_dispatcher,
550        'PARSE ERROR: configuration contains invalid \'' + float_key + '\''
551    ): return None
552
553    return float_value
554
555
556def parse_int(
557        config: Dict[str, Any],
558        int_key: str,
559        event_dispatcher: EventDispatcher) -> Optional[int]:
560    """Parse an integer value from the configuration.
561
562    The integer is expected to be greater than zero to be parsed successfully.
563
564    Args:
565        config: the configuration dictionary to parse from.
566        int_key: the key in the configuration that contains the integer value.
567        event_dispatcher: to dispatch the parse event on failure.
568
569    Returns:
570        the parsed integer value or None on failure.
571    """
572    if not assert_is_key_in_dict(
573        int_key,
574        config,
575        event_dispatcher,
576        'PARSE ERROR: configuration contains invalid \'' + int_key + '\' value'
577    ): return None
578
579    int_value = config[int_key]
580
581    if isinstance(int_value, bool):
582        event_dispatcher.dispatch(ParseEventArgs(
583            ON_PARSE,
584            'PARSE ERROR: configuration contains invalid \'' + int_key + '\'',
585            expected_type=int,
586            actual_type=bool
587        ))
588        return None
589
590    if not assert_is_type(
591        int_value,
592        int,
593        event_dispatcher,
594        'PARSE ERROR: configuration contains invalid \'' + int_key + '\''
595    ): return None
596
597    if int_value <= 0:
598        event_dispatcher.dispatch(ParseEventArgs(
599            ON_PARSE,
600        'PARSE ERROR: configuration contains invalid \'' + int_key + '\' less than or equal to zero'
601        ))
602        return None
603
604    return int_value
605
606
607def parse_optional_bool(
608        config: Dict[str, Any],
609        bool_key: str,
610        event_dispatcher: EventDispatcher) -> Tuple[bool, Optional[bool]]:
611    """Parse an optional boolean from the configuration.
612
613    Args:
614        config: the configuration dictionary to parse from.
615        bool_key: the key in the configuration that contains the boolean.
616        event_dispatcher: to dispatch the parse event on failure.
617
618    Returns:
619        whether the parsing succeeded and the optional boolean value.
620    """
621    bool_value = config.get(bool_key)
622    if bool_value is not None:
623        if not assert_is_type(
624            bool_value,
625            bool,
626            event_dispatcher,
627            'PARSE ERROR: configuration contains invalid ' + bool_key + ' value'
628        ): return False, None
629    else:
630        bool_value = False
631
632    return True, bool_value
633
634
635def parse_optional_string(
636        config: Dict[str, Any],
637        string_key: str,
638        string_options: List[str],
639        event_dispatcher: EventDispatcher) -> Tuple[bool, Optional[str]]:
640    """Parse an optional string from a list of valid values from the configuration.
641
642    Args:
643        config: the configuration dictionary to parse from.
644        string_key: the key in the configuration that contains the string.
645        string_options: the options that are available for the string that is being parsed.
646        event_dispatcher: to dispatch the parse event on failure.
647
648    Returns:
649        whether the parsing succeeded and the optional string value.
650    """
651    string_value = config.get(string_key)
652    if string_value is not None:
653        if not assert_is_type(
654            string_value,
655            str,
656            event_dispatcher,
657            'PARSE ERROR: configuration contains invalid \'' + string_key + '\' value'
658        ): return False, None
659
660        if not assert_is_one_of_list(
661            string_value,
662            string_options,
663            event_dispatcher,
664            'PARSE ERROR: configuration contains invalid \'' + string_key + '\''
665        ): return False, None
666
667    return True, string_value
668
669
670def parse_rating_matrix(
671        matrix_config: Dict[str, Any],
672        event_dispatcher: EventDispatcher) -> Optional[RatingMatrixConfig]:
673    """Parse a rating matrix from the configuration.
674
675    Args:
676        matrix_config: the matrix configuration dictionary to parse from.
677        event_dispatcher: to dispatch the parse event on failure.
678
679    Returns:
680        the parsed string or None on failure.
681    """
682    rating_min = parse_float(
683        matrix_config,
684        KEY_RATING_MIN,
685        event_dispatcher
686    )
687    if rating_min is None:
688        return None
689
690    if rating_min <= 0.0:
691        event_dispatcher.dispatch(ParseEventArgs(
692            ON_PARSE,
693            'PARSE ERROR: matrix configuration contains minimum rating greater than zero'
694        ))
695        return None
696
697    rating_max = parse_float(
698        matrix_config,
699        KEY_RATING_MAX,
700        event_dispatcher
701    )
702    if rating_max is None:
703        return None
704
705    if rating_max < rating_min:
706        event_dispatcher.dispatch(ParseEventArgs(
707            ON_PARSE,
708            'PARSE ERROR: matrix configuration contains maximum rating less than minimum rating'
709        ))
710        return None
711
712    rating_type = parse_string(
713        matrix_config,
714        KEY_RATING_TYPE,
715        event_dispatcher,
716        one_of_list=[DATASET_RATINGS_EXPLICIT, DATASET_RATINGS_IMPLICIT]
717    )
718
719    if rating_type is None:
720        return None
721
722    return RatingMatrixConfig(rating_min, rating_max, rating_type)
723
724
725def parse_string(
726        config: Dict[str, Any],
727        string_key: str,
728        event_dispatcher: EventDispatcher,
729        *,
730        one_of_list: List[str]=None) -> Optional[str]:
731    """Parse a string from the configuration.
732
733    Args:
734        config: the configuration dictionary to parse from.
735        string_key: the key in the configuration that contains the string.
736        event_dispatcher: to dispatch the parse event on failure.
737        one_of_list: when not None the string is to be expected one of the specified list.
738
739    Returns:
740        the parsed string or None on failure.
741    """
742    if not assert_is_key_in_dict(
743        string_key,
744        config,
745        event_dispatcher,
746        'PARSE ERROR: configuration contains invalid \'' + string_key + '\' value'
747    ): return None
748
749    string_value = config[string_key]
750
751    if not assert_is_type(
752        string_value,
753        str,
754        event_dispatcher,
755        'PARSE ERROR: configuration contains invalid \'' + string_key + '\''
756    ): return None
757
758    if one_of_list is not None:
759        if not assert_is_one_of_list(
760            string_value,
761            one_of_list,
762            event_dispatcher,
763            'PARSE ERROR: configuration contains invalid \'' + string_key + '\''
764        ): return None
765
766    return string_value
767
768
769def parse_string_list(
770        config: Dict[str, Any],
771        string_list_key: str,
772        min_list_length: int,
773        event_dispatcher: EventDispatcher) -> Optional[List[str]]:
774    """Parse a list of strings from the configuration.
775
776    Args:
777        config: the configuration dictionary to parse from.
778        string_list_key: the key in the configuration that contains the string list.
779        min_list_length: the minimum length of the list to succeed.
780        event_dispatcher: to dispatch the parse event on failure.
781
782    Returns:
783        the parsed string list or None on failure.
784    """
785    if not assert_is_key_in_dict(
786        string_list_key,
787        config,
788        event_dispatcher,
789        'PARSE ERROR: configuration contains invalid \'' + string_list_key + '\' value'
790    ): return None
791
792    string_list = config[string_list_key]
793
794    if not assert_is_type(
795        string_list,
796        list,
797        event_dispatcher,
798        'PARSE ERROR: configuration contains invalid \'' + string_list_key + '\''
799    ): return None
800
801    result_strings = []
802    for string in string_list:
803        if not assert_is_type(
804            string,
805            str,
806            event_dispatcher,
807            'PARSE ERROR: configuration list \'' + string_list_key + '\' contains invalid value'
808        ): return None
809
810        result_strings.append(string)
811
812    if len(result_strings) < min_list_length:
813        event_dispatcher.dispatch(ParseEventArgs(
814            ON_PARSE,
815            'PARSE ERROR: configuration list \'' + string_list_key + '\' contains too few values'
816        ))
817        return None
818
819    return result_strings
class DatasetConfigParser:
 46class DatasetConfigParser:
 47    """Dataset Configuration Parser.
 48
 49    Public methods:
 50
 51    parse_dataset_config
 52    parse_dataset_config_from_yml
 53    """
 54
 55    def __init__(self, verbose: bool):
 56        """Construct the DatasetConfigParser.
 57
 58        Args:
 59            verbose: whether the parser should give verbose output.
 60        """
 61        self.verbose = verbose
 62        handle_parse_event = lambda parser, args: \
 63            print_parse_event(args) if parser.verbose else None
 64
 65        self.event_dispatcher = EventDispatcher()
 66        self.event_dispatcher.add_listener(ON_PARSE, self, (handle_parse_event, None))
 67
 68    def parse_dataset_config(
 69            self,
 70            data_dir: str,
 71            dataset_config: Dict[str, Any],
 72            available_datasets: List[str]) -> Optional[DatasetConfig]:
 73        """Parse a dataset configuration.
 74
 75        Args:
 76            data_dir: the directory where the dataset is stored.
 77            dataset_config: the dataset configuration.
 78            available_datasets: a list of already available datasets.
 79
 80        Returns:
 81            the parsed configuration or None on failure.
 82        """
 83        # attempt to parse the name of the dataset
 84        dataset_name = parse_string(
 85            dataset_config,
 86            KEY_DATASET,
 87            self.event_dispatcher
 88        )
 89        if dataset_name is None:
 90            return None
 91
 92        # verify that the dataset name is not already present
 93        if dataset_name in available_datasets:
 94            self.event_dispatcher.dispatch(ParseEventArgs(
 95                ON_PARSE,
 96                'PARSE ERROR: dataset already exists: ' + dataset_name
 97            ))
 98            return None
 99
100        # attempt to parse the dataset (event) tables
101        events = self.parse_dataset_events(data_dir, dataset_config)
102
103        # attempt to parse the dataset (matrix) tables
104        matrices = self.parse_dataset_matrices(data_dir, dataset_config)
105
106        # attempt to parse the dataset (other) tables
107        tables = self.parse_dataset_tables(data_dir, dataset_config)
108
109        return DatasetConfig(
110            dataset_name,
111            events,
112            matrices,
113            tables
114        )
115
116    def parse_dataset_config_from_yml(
117            self,
118            data_dir: str,
119            file_name: str,
120            available_datasets: List[str]) -> Optional[DatasetConfig]:
121        """Parse a dataset configuration.
122
123        Args:
124            data_dir: the directory where the dataset is stored.
125            file_name: the name of the yml file with extension.
126            available_datasets: a list of already available datasets.
127
128        Returns:
129            the parsed configuration or None on failure.
130        """
131        return self.parse_dataset_config(
132            data_dir,
133            load_yml(os.path.join(data_dir, file_name)),
134            available_datasets
135        )
136
137    def parse_dataset_events(
138            self,
139            data_dir: str,
140            dataset_config: Dict[str, Any],) -> Dict[str, DatasetTableConfig]:
141        """Parse dataset event tables from the configuration.
142
143        Args:
144            data_dir: the directory where the dataset is stored.
145            dataset_config: the dataset configuration.
146
147        Returns:
148            a dictionary with parsed event table configurations.
149        """
150        events = {}
151        if dataset_config.get(KEY_EVENTS) is not None:
152            if assert_is_type(
153                dataset_config[KEY_EVENTS],
154                dict,
155                self.event_dispatcher,
156                'PARSE WARNING: dataset events invalid value'
157            ):
158                for table_name, table_config in dataset_config[KEY_EVENTS].items():
159                    config = self.parse_dataset_table_config(data_dir, table_config)
160                    if config is None:
161                        continue
162
163                    events[table_name] = config
164
165        return events
166
167    def parse_dataset_matrices(
168            self,
169            data_dir: str,
170            dataset_config: Dict[str, Any]) -> Dict[str, DatasetMatrixConfig]:
171        """Parse dataset matrices from the configuration.
172
173        Args:
174            data_dir: the directory where the dataset is stored.
175            dataset_config: the dataset configuration.
176
177        Returns:
178            a dictionary with parsed matrix configurations.
179        """
180        matrices = {}
181        if dataset_config.get(KEY_MATRICES) is not None:
182            if assert_is_type(
183                dataset_config[KEY_MATRICES],
184                dict,
185                self.event_dispatcher,
186                'PARSE WARNING: dataset matrices invalid value'
187            ):
188                for matrix_name, matrix_config in dataset_config[KEY_MATRICES].items():
189                    config = self.parse_dataset_matrix_config(
190                        data_dir,
191                        matrix_config
192                    )
193                    if config is None:
194                        continue
195
196                    matrices[matrix_name] = config
197
198        return matrices
199
200    def parse_dataset_tables(
201            self,
202            data_dir: str,
203            dataset_config: Dict[str, Any],) -> Dict[str, DatasetTableConfig]:
204        """Parse dataset tables from the configuration.
205
206        Args:
207            data_dir: the directory where the dataset is stored.
208            dataset_config: the dataset configuration.
209
210        Returns:
211            a dictionary with parsed table configurations.
212        """
213        tables = {}
214        if dataset_config.get(KEY_TABLES) is not None:
215            if assert_is_type(
216                dataset_config[KEY_TABLES],
217                dict,
218                self.event_dispatcher,
219                'PARSE WARNING: dataset tables invalid value'
220            ):
221                for table_name, table_config in dataset_config[KEY_TABLES].items():
222                    config = self.parse_dataset_table_config(data_dir, table_config)
223                    if config is None:
224                        continue
225
226                    tables[table_name] = config
227
228        return tables
229
230    def parse_file_options_config(
231            self,
232            file_config: Dict[str, Any]) -> Optional[FileOptionsConfig]:
233        """Parse a dataset file configuration.
234
235        Args:
236            file_config: the dataset file configuration.
237
238        Returns:
239            the parsed configuration or None on failure.
240        """
241        # attempt to parse the optional separator string
242        success, file_sep = parse_optional_string(
243            file_config,
244            TABLE_SEP,
245            VALID_SEPARATORS,
246            self.event_dispatcher
247        )
248        if not success:
249            return None
250
251        # attempt to parse the optional compression string
252        success, file_compression = parse_optional_string(
253            file_config,
254            TABLE_COMPRESSION,
255            VALID_COMPRESSIONS,
256            self.event_dispatcher
257        )
258        if not success:
259            return None
260
261        # attempt to parse the optional encoding string
262        success, file_encoding = parse_optional_string(
263            file_config,
264            TABLE_ENCODING,
265            VALID_ENCODINGS,
266            self.event_dispatcher
267        )
268        if not success:
269            return None
270
271        # attempt to parse the optional header boolean
272        success, file_header = parse_optional_bool(
273            file_config,
274            TABLE_HEADER,
275            self.event_dispatcher
276        )
277        if not success:
278            return None
279
280        return FileOptionsConfig(
281            file_sep,
282            file_compression,
283            file_encoding,
284            file_header
285        )
286
287    def parse_dataset_file_config(
288            self,
289            data_dir: str,
290            file_config: Dict[str, Any]) -> Optional[DatasetFileConfig]:
291        """Parse a dataset file configuration.
292
293        Args:
294            data_dir: the directory where the file is stored.
295            file_config: the dataset file configuration.
296
297        Returns:
298            the parsed configuration or None on failure.
299        """
300        # attempt to parse the (required) file name
301        success, file_name = parse_file_name(
302            data_dir,
303            file_config,
304            KEY_NAME,
305            self.event_dispatcher
306        )
307        if not success:
308            return None
309
310        # attempt to parse the file options
311        file_options = self.parse_file_options_config(file_config)
312        if file_options is None:
313            return None
314
315        return DatasetFileConfig(file_name, file_options)
316
317    def parse_dataset_index_config(
318            self,
319            data_dir: str,
320            index_config: Dict[str, Any]) -> Optional[DatasetIndexConfig]:
321        """Parse a dataset matrix' user/item index configuration.
322
323        Args:
324            data_dir: the directory where the file is stored.
325            index_config: the dataset matrix index configuration.
326
327        Returns:
328            the parsed configuration or None on failure.
329        """
330        # attempt to parse (optional) file name
331        success, file_name = parse_file_name(
332            data_dir,
333            index_config,
334            TABLE_FILE,
335            self.event_dispatcher,
336            required=False
337        )
338        if not success:
339            return None
340
341        # attempt to parse the key that is associated with the index
342        file_key = parse_string(
343            index_config,
344            TABLE_KEY,
345            self.event_dispatcher
346        )
347        if file_key is None:
348            return None
349
350        # attempt to parse the number of records in the file
351        num_records = parse_int(
352            index_config,
353            TABLE_NUM_RECORDS,
354            self.event_dispatcher
355        )
356        if num_records is None:
357            return None
358
359        return DatasetIndexConfig(file_name, file_key, num_records)
360
361    def parse_dataset_matrix_config(
362            self,
363            data_dir: str,
364            matrix_config: Dict[str, Any]) -> Optional[DatasetMatrixConfig]:
365        """Parse a dataset matrix configuration.
366
367        Args:
368            data_dir: the directory where the dataset matrix is stored.
369            matrix_config: the dataset matrix configuration.
370
371        Returns:
372            the parsed configuration or None on failure.
373        """
374        # attempt to parse the matrix table
375        matrix_table = self.parse_dataset_table_config(data_dir,
376                                                       matrix_config.get(KEY_MATRIX, {}))
377        if matrix_table is None:
378            return None
379
380        # attempt to parse the matrix users
381        matrix_users = self.parse_dataset_index_config(data_dir,
382                                                       matrix_config.get(KEY_IDX_USER, {}))
383        if matrix_users is None:
384            return None
385
386        # attempt to parse the matrix items
387        matrix_items = self.parse_dataset_index_config(data_dir,
388                                                       matrix_config.get(KEY_IDX_ITEM, {}))
389        if matrix_items is None:
390            return None
391
392        # attempt to parse the matrix ratings
393        matrix_ratings = parse_rating_matrix(
394            matrix_config,
395            self.event_dispatcher
396        )
397        if matrix_ratings is None:
398            return None
399
400        return DatasetMatrixConfig(
401            matrix_table,
402            matrix_ratings,
403            matrix_users,
404            matrix_items
405        )
406
407    def parse_dataset_table_config(
408            self,
409            data_dir: str,
410            table_config: Dict[str, Any]) -> Optional[DatasetTableConfig]:
411        """Parse a dataset table configuration.
412
413        Args:
414            data_dir: the directory where the table is stored.
415            table_config: the dataset table configuration.
416
417        Returns:
418            the parsed configuration or None on failure.
419        """
420        file_config = self.parse_dataset_file_config(data_dir, table_config.get(TABLE_FILE, {}))
421        if file_config is None:
422            return None
423
424        table_primary_key = parse_string_list(
425            table_config,
426            TABLE_PRIMARY_KEY,
427            1,
428            self.event_dispatcher
429        )
430        if table_primary_key is None:
431            return None
432
433        table_foreign_keys = None
434        if TABLE_FOREIGN_KEYS in table_config:
435            table_foreign_keys = parse_string_list(
436                table_config,
437                TABLE_FOREIGN_KEYS,
438                0,
439                self.event_dispatcher
440            )
441
442        table_columns = parse_string_list(
443            table_config,
444            TABLE_COLUMNS,
445            1,
446            self.event_dispatcher
447        )
448        if table_columns is None:
449            return None
450
451        table_num_records = parse_int(
452            table_config,
453            TABLE_NUM_RECORDS,
454            self.event_dispatcher
455        )
456        if table_num_records is None:
457            return None
458
459        return DatasetTableConfig(
460            table_primary_key,
461            table_foreign_keys,
462            table_columns,
463            table_num_records,
464            file_config
465        )

Dataset Configuration Parser.

Public methods:

parse_dataset_config parse_dataset_config_from_yml

DatasetConfigParser(verbose: bool)
55    def __init__(self, verbose: bool):
56        """Construct the DatasetConfigParser.
57
58        Args:
59            verbose: whether the parser should give verbose output.
60        """
61        self.verbose = verbose
62        handle_parse_event = lambda parser, args: \
63            print_parse_event(args) if parser.verbose else None
64
65        self.event_dispatcher = EventDispatcher()
66        self.event_dispatcher.add_listener(ON_PARSE, self, (handle_parse_event, None))

Construct the DatasetConfigParser.

Args: verbose: whether the parser should give verbose output.

def parse_dataset_config( self, data_dir: str, dataset_config: Dict[str, Any], available_datasets: List[str]) -> Optional[src.fairreckitlib.data.set.dataset_config.DatasetConfig]:
 68    def parse_dataset_config(
 69            self,
 70            data_dir: str,
 71            dataset_config: Dict[str, Any],
 72            available_datasets: List[str]) -> Optional[DatasetConfig]:
 73        """Parse a dataset configuration.
 74
 75        Args:
 76            data_dir: the directory where the dataset is stored.
 77            dataset_config: the dataset configuration.
 78            available_datasets: a list of already available datasets.
 79
 80        Returns:
 81            the parsed configuration or None on failure.
 82        """
 83        # attempt to parse the name of the dataset
 84        dataset_name = parse_string(
 85            dataset_config,
 86            KEY_DATASET,
 87            self.event_dispatcher
 88        )
 89        if dataset_name is None:
 90            return None
 91
 92        # verify that the dataset name is not already present
 93        if dataset_name in available_datasets:
 94            self.event_dispatcher.dispatch(ParseEventArgs(
 95                ON_PARSE,
 96                'PARSE ERROR: dataset already exists: ' + dataset_name
 97            ))
 98            return None
 99
100        # attempt to parse the dataset (event) tables
101        events = self.parse_dataset_events(data_dir, dataset_config)
102
103        # attempt to parse the dataset (matrix) tables
104        matrices = self.parse_dataset_matrices(data_dir, dataset_config)
105
106        # attempt to parse the dataset (other) tables
107        tables = self.parse_dataset_tables(data_dir, dataset_config)
108
109        return DatasetConfig(
110            dataset_name,
111            events,
112            matrices,
113            tables
114        )

Parse a dataset configuration.

Args: data_dir: the directory where the dataset is stored. dataset_config: the dataset configuration. available_datasets: a list of already available datasets.

Returns: the parsed configuration or None on failure.

def parse_dataset_config_from_yml( self, data_dir: str, file_name: str, available_datasets: List[str]) -> Optional[src.fairreckitlib.data.set.dataset_config.DatasetConfig]:
116    def parse_dataset_config_from_yml(
117            self,
118            data_dir: str,
119            file_name: str,
120            available_datasets: List[str]) -> Optional[DatasetConfig]:
121        """Parse a dataset configuration.
122
123        Args:
124            data_dir: the directory where the dataset is stored.
125            file_name: the name of the yml file with extension.
126            available_datasets: a list of already available datasets.
127
128        Returns:
129            the parsed configuration or None on failure.
130        """
131        return self.parse_dataset_config(
132            data_dir,
133            load_yml(os.path.join(data_dir, file_name)),
134            available_datasets
135        )

Parse a dataset configuration.

Args: data_dir: the directory where the dataset is stored. file_name: the name of the yml file with extension. available_datasets: a list of already available datasets.

Returns: the parsed configuration or None on failure.

def parse_dataset_events( self, data_dir: str, dataset_config: Dict[str, Any]) -> Dict[str, src.fairreckitlib.data.set.dataset_config.DatasetTableConfig]:
137    def parse_dataset_events(
138            self,
139            data_dir: str,
140            dataset_config: Dict[str, Any],) -> Dict[str, DatasetTableConfig]:
141        """Parse dataset event tables from the configuration.
142
143        Args:
144            data_dir: the directory where the dataset is stored.
145            dataset_config: the dataset configuration.
146
147        Returns:
148            a dictionary with parsed event table configurations.
149        """
150        events = {}
151        if dataset_config.get(KEY_EVENTS) is not None:
152            if assert_is_type(
153                dataset_config[KEY_EVENTS],
154                dict,
155                self.event_dispatcher,
156                'PARSE WARNING: dataset events invalid value'
157            ):
158                for table_name, table_config in dataset_config[KEY_EVENTS].items():
159                    config = self.parse_dataset_table_config(data_dir, table_config)
160                    if config is None:
161                        continue
162
163                    events[table_name] = config
164
165        return events

Parse dataset event tables from the configuration.

Args: data_dir: the directory where the dataset is stored. dataset_config: the dataset configuration.

Returns: a dictionary with parsed event table configurations.

def parse_dataset_matrices( self, data_dir: str, dataset_config: Dict[str, Any]) -> Dict[str, src.fairreckitlib.data.set.dataset_config.DatasetMatrixConfig]:
167    def parse_dataset_matrices(
168            self,
169            data_dir: str,
170            dataset_config: Dict[str, Any]) -> Dict[str, DatasetMatrixConfig]:
171        """Parse dataset matrices from the configuration.
172
173        Args:
174            data_dir: the directory where the dataset is stored.
175            dataset_config: the dataset configuration.
176
177        Returns:
178            a dictionary with parsed matrix configurations.
179        """
180        matrices = {}
181        if dataset_config.get(KEY_MATRICES) is not None:
182            if assert_is_type(
183                dataset_config[KEY_MATRICES],
184                dict,
185                self.event_dispatcher,
186                'PARSE WARNING: dataset matrices invalid value'
187            ):
188                for matrix_name, matrix_config in dataset_config[KEY_MATRICES].items():
189                    config = self.parse_dataset_matrix_config(
190                        data_dir,
191                        matrix_config
192                    )
193                    if config is None:
194                        continue
195
196                    matrices[matrix_name] = config
197
198        return matrices

Parse dataset matrices from the configuration.

Args: data_dir: the directory where the dataset is stored. dataset_config: the dataset configuration.

Returns: a dictionary with parsed matrix configurations.

def parse_dataset_tables( self, data_dir: str, dataset_config: Dict[str, Any]) -> Dict[str, src.fairreckitlib.data.set.dataset_config.DatasetTableConfig]:
200    def parse_dataset_tables(
201            self,
202            data_dir: str,
203            dataset_config: Dict[str, Any],) -> Dict[str, DatasetTableConfig]:
204        """Parse dataset tables from the configuration.
205
206        Args:
207            data_dir: the directory where the dataset is stored.
208            dataset_config: the dataset configuration.
209
210        Returns:
211            a dictionary with parsed table configurations.
212        """
213        tables = {}
214        if dataset_config.get(KEY_TABLES) is not None:
215            if assert_is_type(
216                dataset_config[KEY_TABLES],
217                dict,
218                self.event_dispatcher,
219                'PARSE WARNING: dataset tables invalid value'
220            ):
221                for table_name, table_config in dataset_config[KEY_TABLES].items():
222                    config = self.parse_dataset_table_config(data_dir, table_config)
223                    if config is None:
224                        continue
225
226                    tables[table_name] = config
227
228        return tables

Parse dataset tables from the configuration.

Args: data_dir: the directory where the dataset is stored. dataset_config: the dataset configuration.

Returns: a dictionary with parsed table configurations.

def parse_file_options_config( self, file_config: Dict[str, Any]) -> Optional[src.fairreckitlib.data.set.dataset_config.FileOptionsConfig]:
230    def parse_file_options_config(
231            self,
232            file_config: Dict[str, Any]) -> Optional[FileOptionsConfig]:
233        """Parse a dataset file configuration.
234
235        Args:
236            file_config: the dataset file configuration.
237
238        Returns:
239            the parsed configuration or None on failure.
240        """
241        # attempt to parse the optional separator string
242        success, file_sep = parse_optional_string(
243            file_config,
244            TABLE_SEP,
245            VALID_SEPARATORS,
246            self.event_dispatcher
247        )
248        if not success:
249            return None
250
251        # attempt to parse the optional compression string
252        success, file_compression = parse_optional_string(
253            file_config,
254            TABLE_COMPRESSION,
255            VALID_COMPRESSIONS,
256            self.event_dispatcher
257        )
258        if not success:
259            return None
260
261        # attempt to parse the optional encoding string
262        success, file_encoding = parse_optional_string(
263            file_config,
264            TABLE_ENCODING,
265            VALID_ENCODINGS,
266            self.event_dispatcher
267        )
268        if not success:
269            return None
270
271        # attempt to parse the optional header boolean
272        success, file_header = parse_optional_bool(
273            file_config,
274            TABLE_HEADER,
275            self.event_dispatcher
276        )
277        if not success:
278            return None
279
280        return FileOptionsConfig(
281            file_sep,
282            file_compression,
283            file_encoding,
284            file_header
285        )

Parse a dataset file configuration.

Args: file_config: the dataset file configuration.

Returns: the parsed configuration or None on failure.

def parse_dataset_file_config( self, data_dir: str, file_config: Dict[str, Any]) -> Optional[src.fairreckitlib.data.set.dataset_config.DatasetFileConfig]:
287    def parse_dataset_file_config(
288            self,
289            data_dir: str,
290            file_config: Dict[str, Any]) -> Optional[DatasetFileConfig]:
291        """Parse a dataset file configuration.
292
293        Args:
294            data_dir: the directory where the file is stored.
295            file_config: the dataset file configuration.
296
297        Returns:
298            the parsed configuration or None on failure.
299        """
300        # attempt to parse the (required) file name
301        success, file_name = parse_file_name(
302            data_dir,
303            file_config,
304            KEY_NAME,
305            self.event_dispatcher
306        )
307        if not success:
308            return None
309
310        # attempt to parse the file options
311        file_options = self.parse_file_options_config(file_config)
312        if file_options is None:
313            return None
314
315        return DatasetFileConfig(file_name, file_options)

Parse a dataset file configuration.

Args: data_dir: the directory where the file is stored. file_config: the dataset file configuration.

Returns: the parsed configuration or None on failure.

def parse_dataset_index_config( self, data_dir: str, index_config: Dict[str, Any]) -> Optional[src.fairreckitlib.data.set.dataset_config.DatasetIndexConfig]:
317    def parse_dataset_index_config(
318            self,
319            data_dir: str,
320            index_config: Dict[str, Any]) -> Optional[DatasetIndexConfig]:
321        """Parse a dataset matrix' user/item index configuration.
322
323        Args:
324            data_dir: the directory where the file is stored.
325            index_config: the dataset matrix index configuration.
326
327        Returns:
328            the parsed configuration or None on failure.
329        """
330        # attempt to parse (optional) file name
331        success, file_name = parse_file_name(
332            data_dir,
333            index_config,
334            TABLE_FILE,
335            self.event_dispatcher,
336            required=False
337        )
338        if not success:
339            return None
340
341        # attempt to parse the key that is associated with the index
342        file_key = parse_string(
343            index_config,
344            TABLE_KEY,
345            self.event_dispatcher
346        )
347        if file_key is None:
348            return None
349
350        # attempt to parse the number of records in the file
351        num_records = parse_int(
352            index_config,
353            TABLE_NUM_RECORDS,
354            self.event_dispatcher
355        )
356        if num_records is None:
357            return None
358
359        return DatasetIndexConfig(file_name, file_key, num_records)

Parse a dataset matrix' user/item index configuration.

Args: data_dir: the directory where the file is stored. index_config: the dataset matrix index configuration.

Returns: the parsed configuration or None on failure.

def parse_dataset_matrix_config( self, data_dir: str, matrix_config: Dict[str, Any]) -> Optional[src.fairreckitlib.data.set.dataset_config.DatasetMatrixConfig]:
361    def parse_dataset_matrix_config(
362            self,
363            data_dir: str,
364            matrix_config: Dict[str, Any]) -> Optional[DatasetMatrixConfig]:
365        """Parse a dataset matrix configuration.
366
367        Args:
368            data_dir: the directory where the dataset matrix is stored.
369            matrix_config: the dataset matrix configuration.
370
371        Returns:
372            the parsed configuration or None on failure.
373        """
374        # attempt to parse the matrix table
375        matrix_table = self.parse_dataset_table_config(data_dir,
376                                                       matrix_config.get(KEY_MATRIX, {}))
377        if matrix_table is None:
378            return None
379
380        # attempt to parse the matrix users
381        matrix_users = self.parse_dataset_index_config(data_dir,
382                                                       matrix_config.get(KEY_IDX_USER, {}))
383        if matrix_users is None:
384            return None
385
386        # attempt to parse the matrix items
387        matrix_items = self.parse_dataset_index_config(data_dir,
388                                                       matrix_config.get(KEY_IDX_ITEM, {}))
389        if matrix_items is None:
390            return None
391
392        # attempt to parse the matrix ratings
393        matrix_ratings = parse_rating_matrix(
394            matrix_config,
395            self.event_dispatcher
396        )
397        if matrix_ratings is None:
398            return None
399
400        return DatasetMatrixConfig(
401            matrix_table,
402            matrix_ratings,
403            matrix_users,
404            matrix_items
405        )

Parse a dataset matrix configuration.

Args: data_dir: the directory where the dataset matrix is stored. matrix_config: the dataset matrix configuration.

Returns: the parsed configuration or None on failure.

def parse_dataset_table_config( self, data_dir: str, table_config: Dict[str, Any]) -> Optional[src.fairreckitlib.data.set.dataset_config.DatasetTableConfig]:
407    def parse_dataset_table_config(
408            self,
409            data_dir: str,
410            table_config: Dict[str, Any]) -> Optional[DatasetTableConfig]:
411        """Parse a dataset table configuration.
412
413        Args:
414            data_dir: the directory where the table is stored.
415            table_config: the dataset table configuration.
416
417        Returns:
418            the parsed configuration or None on failure.
419        """
420        file_config = self.parse_dataset_file_config(data_dir, table_config.get(TABLE_FILE, {}))
421        if file_config is None:
422            return None
423
424        table_primary_key = parse_string_list(
425            table_config,
426            TABLE_PRIMARY_KEY,
427            1,
428            self.event_dispatcher
429        )
430        if table_primary_key is None:
431            return None
432
433        table_foreign_keys = None
434        if TABLE_FOREIGN_KEYS in table_config:
435            table_foreign_keys = parse_string_list(
436                table_config,
437                TABLE_FOREIGN_KEYS,
438                0,
439                self.event_dispatcher
440            )
441
442        table_columns = parse_string_list(
443            table_config,
444            TABLE_COLUMNS,
445            1,
446            self.event_dispatcher
447        )
448        if table_columns is None:
449            return None
450
451        table_num_records = parse_int(
452            table_config,
453            TABLE_NUM_RECORDS,
454            self.event_dispatcher
455        )
456        if table_num_records is None:
457            return None
458
459        return DatasetTableConfig(
460            table_primary_key,
461            table_foreign_keys,
462            table_columns,
463            table_num_records,
464            file_config
465        )

Parse a dataset table configuration.

Args: data_dir: the directory where the table is stored. table_config: the dataset table configuration.

Returns: the parsed configuration or None on failure.

def parse_file_name( data_dir: str, file_config: Dict[str, Any], file_key: str, event_dispatcher: src.fairreckitlib.core.events.event_dispatcher.EventDispatcher, *, required: bool = True) -> Tuple[bool, Optional[str]]:
468def parse_file_name(
469        data_dir: str,
470        file_config: Dict[str, Any],
471        file_key: str,
472        event_dispatcher: EventDispatcher,
473        *,
474        required: bool=True) -> Tuple[bool, Optional[str]]:
475    """Parse the file name from the configuration.
476
477    In addition, when the file name is parsed correctly it is checked
478    for existence in the specified data directory.
479
480    Args:
481        data_dir: the directory where the file is stored.
482        file_config: the configuration dictionary to parse from.
483        file_key: the key in the configuration that contains the file name.
484        event_dispatcher: to dispatch the parse event on failure.
485        required: whether the parsing is required to succeed.
486
487    Returns:
488        whether the parsing succeeded and the parsed file name or None on failure.
489    """
490    if required and not assert_is_key_in_dict(
491        file_key,
492        file_config,
493        event_dispatcher,
494        'PARSE ERROR: file configuration missing key \'' + file_key + '\''
495    ): return False, None
496
497    file_name = file_config.get(file_key)
498    if required and file_name is None:
499        event_dispatcher.dispatch(ParseEventArgs(
500            ON_PARSE,
501            'PARSE ERROR: file configuration missing value for \'' + file_key + '\''
502        ))
503        return False, None
504
505    if file_name is not None:
506        if not assert_is_type(
507            file_name,
508            str,
509            event_dispatcher,
510            'PARSE ERROR: file configuration contains invalid name'
511        ): return False, None
512
513        file_path = os.path.join(data_dir, file_name)
514        if not os.path.isfile(file_path):
515            event_dispatcher.dispatch(ParseEventArgs(
516                ON_PARSE,
517                'PARSE ERROR: file configuration file name does not exist: ' + file_path
518            ))
519            return False, None
520
521    return True, file_name

Parse the file name from the configuration.

In addition, when the file name is parsed correctly it is checked for existence in the specified data directory.

Args: data_dir: the directory where the file is stored. file_config: the configuration dictionary to parse from. file_key: the key in the configuration that contains the file name. event_dispatcher: to dispatch the parse event on failure. required: whether the parsing is required to succeed.

Returns: whether the parsing succeeded and the parsed file name or None on failure.

def parse_float( config: Dict[str, Any], float_key: str, event_dispatcher: src.fairreckitlib.core.events.event_dispatcher.EventDispatcher) -> Optional[float]:
524def parse_float(
525        config: Dict[str, Any],
526        float_key: str,
527        event_dispatcher: EventDispatcher) -> Optional[float]:
528    """Parse a float-point value from the configuration.
529
530    Args:
531        config: the configuration dictionary to parse from.
532        float_key: the key in the configuration that contains the float-point value.
533        event_dispatcher: to dispatch the parse event on failure.
534
535    Returns:
536        the parsed float-point value or None on failure.
537    """
538    if not assert_is_key_in_dict(
539        float_key,
540        config,
541        event_dispatcher,
542        'PARSE ERROR: configuration contains invalid \'' + float_key + '\' value'
543    ): return None
544
545    float_value = config[float_key]
546
547    if not assert_is_type(
548        float_value,
549        float,
550        event_dispatcher,
551        'PARSE ERROR: configuration contains invalid \'' + float_key + '\''
552    ): return None
553
554    return float_value

Parse a float-point value from the configuration.

Args: config: the configuration dictionary to parse from. float_key: the key in the configuration that contains the float-point value. event_dispatcher: to dispatch the parse event on failure.

Returns: the parsed float-point value or None on failure.

def parse_int( config: Dict[str, Any], int_key: str, event_dispatcher: src.fairreckitlib.core.events.event_dispatcher.EventDispatcher) -> Optional[int]:
557def parse_int(
558        config: Dict[str, Any],
559        int_key: str,
560        event_dispatcher: EventDispatcher) -> Optional[int]:
561    """Parse an integer value from the configuration.
562
563    The integer is expected to be greater than zero to be parsed successfully.
564
565    Args:
566        config: the configuration dictionary to parse from.
567        int_key: the key in the configuration that contains the integer value.
568        event_dispatcher: to dispatch the parse event on failure.
569
570    Returns:
571        the parsed integer value or None on failure.
572    """
573    if not assert_is_key_in_dict(
574        int_key,
575        config,
576        event_dispatcher,
577        'PARSE ERROR: configuration contains invalid \'' + int_key + '\' value'
578    ): return None
579
580    int_value = config[int_key]
581
582    if isinstance(int_value, bool):
583        event_dispatcher.dispatch(ParseEventArgs(
584            ON_PARSE,
585            'PARSE ERROR: configuration contains invalid \'' + int_key + '\'',
586            expected_type=int,
587            actual_type=bool
588        ))
589        return None
590
591    if not assert_is_type(
592        int_value,
593        int,
594        event_dispatcher,
595        'PARSE ERROR: configuration contains invalid \'' + int_key + '\''
596    ): return None
597
598    if int_value <= 0:
599        event_dispatcher.dispatch(ParseEventArgs(
600            ON_PARSE,
601        'PARSE ERROR: configuration contains invalid \'' + int_key + '\' less than or equal to zero'
602        ))
603        return None
604
605    return int_value

Parse an integer value from the configuration.

The integer is expected to be greater than zero to be parsed successfully.

Args: config: the configuration dictionary to parse from. int_key: the key in the configuration that contains the integer value. event_dispatcher: to dispatch the parse event on failure.

Returns: the parsed integer value or None on failure.

def parse_optional_bool( config: Dict[str, Any], bool_key: str, event_dispatcher: src.fairreckitlib.core.events.event_dispatcher.EventDispatcher) -> Tuple[bool, Optional[bool]]:
608def parse_optional_bool(
609        config: Dict[str, Any],
610        bool_key: str,
611        event_dispatcher: EventDispatcher) -> Tuple[bool, Optional[bool]]:
612    """Parse an optional boolean from the configuration.
613
614    Args:
615        config: the configuration dictionary to parse from.
616        bool_key: the key in the configuration that contains the boolean.
617        event_dispatcher: to dispatch the parse event on failure.
618
619    Returns:
620        whether the parsing succeeded and the optional boolean value.
621    """
622    bool_value = config.get(bool_key)
623    if bool_value is not None:
624        if not assert_is_type(
625            bool_value,
626            bool,
627            event_dispatcher,
628            'PARSE ERROR: configuration contains invalid ' + bool_key + ' value'
629        ): return False, None
630    else:
631        bool_value = False
632
633    return True, bool_value

Parse an optional boolean from the configuration.

Args: config: the configuration dictionary to parse from. bool_key: the key in the configuration that contains the boolean. event_dispatcher: to dispatch the parse event on failure.

Returns: whether the parsing succeeded and the optional boolean value.

def parse_optional_string( config: Dict[str, Any], string_key: str, string_options: List[str], event_dispatcher: src.fairreckitlib.core.events.event_dispatcher.EventDispatcher) -> Tuple[bool, Optional[str]]:
636def parse_optional_string(
637        config: Dict[str, Any],
638        string_key: str,
639        string_options: List[str],
640        event_dispatcher: EventDispatcher) -> Tuple[bool, Optional[str]]:
641    """Parse an optional string from a list of valid values from the configuration.
642
643    Args:
644        config: the configuration dictionary to parse from.
645        string_key: the key in the configuration that contains the string.
646        string_options: the options that are available for the string that is being parsed.
647        event_dispatcher: to dispatch the parse event on failure.
648
649    Returns:
650        whether the parsing succeeded and the optional string value.
651    """
652    string_value = config.get(string_key)
653    if string_value is not None:
654        if not assert_is_type(
655            string_value,
656            str,
657            event_dispatcher,
658            'PARSE ERROR: configuration contains invalid \'' + string_key + '\' value'
659        ): return False, None
660
661        if not assert_is_one_of_list(
662            string_value,
663            string_options,
664            event_dispatcher,
665            'PARSE ERROR: configuration contains invalid \'' + string_key + '\''
666        ): return False, None
667
668    return True, string_value

Parse an optional string from a list of valid values from the configuration.

Args: config: the configuration dictionary to parse from. string_key: the key in the configuration that contains the string. string_options: the options that are available for the string that is being parsed. event_dispatcher: to dispatch the parse event on failure.

Returns: whether the parsing succeeded and the optional string value.

def parse_rating_matrix( matrix_config: Dict[str, Any], event_dispatcher: src.fairreckitlib.core.events.event_dispatcher.EventDispatcher) -> Optional[src.fairreckitlib.data.set.dataset_config.RatingMatrixConfig]:
671def parse_rating_matrix(
672        matrix_config: Dict[str, Any],
673        event_dispatcher: EventDispatcher) -> Optional[RatingMatrixConfig]:
674    """Parse a rating matrix from the configuration.
675
676    Args:
677        matrix_config: the matrix configuration dictionary to parse from.
678        event_dispatcher: to dispatch the parse event on failure.
679
680    Returns:
681        the parsed string or None on failure.
682    """
683    rating_min = parse_float(
684        matrix_config,
685        KEY_RATING_MIN,
686        event_dispatcher
687    )
688    if rating_min is None:
689        return None
690
691    if rating_min <= 0.0:
692        event_dispatcher.dispatch(ParseEventArgs(
693            ON_PARSE,
694            'PARSE ERROR: matrix configuration contains minimum rating greater than zero'
695        ))
696        return None
697
698    rating_max = parse_float(
699        matrix_config,
700        KEY_RATING_MAX,
701        event_dispatcher
702    )
703    if rating_max is None:
704        return None
705
706    if rating_max < rating_min:
707        event_dispatcher.dispatch(ParseEventArgs(
708            ON_PARSE,
709            'PARSE ERROR: matrix configuration contains maximum rating less than minimum rating'
710        ))
711        return None
712
713    rating_type = parse_string(
714        matrix_config,
715        KEY_RATING_TYPE,
716        event_dispatcher,
717        one_of_list=[DATASET_RATINGS_EXPLICIT, DATASET_RATINGS_IMPLICIT]
718    )
719
720    if rating_type is None:
721        return None
722
723    return RatingMatrixConfig(rating_min, rating_max, rating_type)

Parse a rating matrix from the configuration.

Args: matrix_config: the matrix configuration dictionary to parse from. event_dispatcher: to dispatch the parse event on failure.

Returns: the parsed string or None on failure.

def parse_string( config: Dict[str, Any], string_key: str, event_dispatcher: src.fairreckitlib.core.events.event_dispatcher.EventDispatcher, *, one_of_list: List[str] = None) -> Optional[str]:
726def parse_string(
727        config: Dict[str, Any],
728        string_key: str,
729        event_dispatcher: EventDispatcher,
730        *,
731        one_of_list: List[str]=None) -> Optional[str]:
732    """Parse a string from the configuration.
733
734    Args:
735        config: the configuration dictionary to parse from.
736        string_key: the key in the configuration that contains the string.
737        event_dispatcher: to dispatch the parse event on failure.
738        one_of_list: when not None the string is to be expected one of the specified list.
739
740    Returns:
741        the parsed string or None on failure.
742    """
743    if not assert_is_key_in_dict(
744        string_key,
745        config,
746        event_dispatcher,
747        'PARSE ERROR: configuration contains invalid \'' + string_key + '\' value'
748    ): return None
749
750    string_value = config[string_key]
751
752    if not assert_is_type(
753        string_value,
754        str,
755        event_dispatcher,
756        'PARSE ERROR: configuration contains invalid \'' + string_key + '\''
757    ): return None
758
759    if one_of_list is not None:
760        if not assert_is_one_of_list(
761            string_value,
762            one_of_list,
763            event_dispatcher,
764            'PARSE ERROR: configuration contains invalid \'' + string_key + '\''
765        ): return None
766
767    return string_value

Parse a string from the configuration.

Args: config: the configuration dictionary to parse from. string_key: the key in the configuration that contains the string. event_dispatcher: to dispatch the parse event on failure. one_of_list: when not None the string is to be expected one of the specified list.

Returns: the parsed string or None on failure.

def parse_string_list( config: Dict[str, Any], string_list_key: str, min_list_length: int, event_dispatcher: src.fairreckitlib.core.events.event_dispatcher.EventDispatcher) -> Optional[List[str]]:
770def parse_string_list(
771        config: Dict[str, Any],
772        string_list_key: str,
773        min_list_length: int,
774        event_dispatcher: EventDispatcher) -> Optional[List[str]]:
775    """Parse a list of strings from the configuration.
776
777    Args:
778        config: the configuration dictionary to parse from.
779        string_list_key: the key in the configuration that contains the string list.
780        min_list_length: the minimum length of the list to succeed.
781        event_dispatcher: to dispatch the parse event on failure.
782
783    Returns:
784        the parsed string list or None on failure.
785    """
786    if not assert_is_key_in_dict(
787        string_list_key,
788        config,
789        event_dispatcher,
790        'PARSE ERROR: configuration contains invalid \'' + string_list_key + '\' value'
791    ): return None
792
793    string_list = config[string_list_key]
794
795    if not assert_is_type(
796        string_list,
797        list,
798        event_dispatcher,
799        'PARSE ERROR: configuration contains invalid \'' + string_list_key + '\''
800    ): return None
801
802    result_strings = []
803    for string in string_list:
804        if not assert_is_type(
805            string,
806            str,
807            event_dispatcher,
808            'PARSE ERROR: configuration list \'' + string_list_key + '\' contains invalid value'
809        ): return None
810
811        result_strings.append(string)
812
813    if len(result_strings) < min_list_length:
814        event_dispatcher.dispatch(ParseEventArgs(
815            ON_PARSE,
816            'PARSE ERROR: configuration list \'' + string_list_key + '\' contains too few values'
817        ))
818        return None
819
820    return result_strings

Parse a list of strings from the configuration.

Args: config: the configuration dictionary to parse from. string_list_key: the key in the configuration that contains the string list. min_list_length: the minimum length of the list to succeed. event_dispatcher: to dispatch the parse event on failure.

Returns: the parsed string list or None on failure.