src.fairreckitlib.data.set.dataset_config_parser
This module contains the parser for the dataset configuration and parser utility functions.
Classes:
DatasetParser: dataset configuration parser.
Functions:
parse_file_name: parse a file name from a configuration and verify existence on disk.
parse_float: parse floating-point value from a configuration.
parse_int: parse integer value from a configuration.
parse_optional_bool: parse optional boolean value from a configuration.
parse_optional_string: parse optional string value from a configuration.
parse_rating_matrix: parse rating matrix configuration.
parse_string: parse a string value from a configuration.
parse_string_list: parse a list of strings from a configuration.
This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)
1"""This module contains the parser for the dataset configuration and parser utility functions. 2 3Classes: 4 5 DatasetParser: dataset configuration parser. 6 7Functions: 8 9 parse_file_name: parse a file name from a configuration and verify existence on disk. 10 parse_float: parse floating-point value from a configuration. 11 parse_int: parse integer value from a configuration. 12 parse_optional_bool: parse optional boolean value from a configuration. 13 parse_optional_string: parse optional string value from a configuration. 14 parse_rating_matrix: parse rating matrix configuration. 15 parse_string: parse a string value from a configuration. 16 parse_string_list: parse a list of strings from a configuration. 17 18This program has been developed by students from the bachelor Computer Science at 19Utrecht University within the Software Project course. 20© Copyright Utrecht University (Department of Information and Computing Sciences) 21""" 22import os.path 23from typing import Any, Dict, List, Optional, Tuple 24 25from ...core.core_constants import KEY_NAME 26from ...core.events.event_dispatcher import EventDispatcher 27from ...core.io.io_utility import load_yml 28from ...core.parsing.parse_assert import \ 29 assert_is_type, assert_is_key_in_dict, assert_is_one_of_list 30from ...core.parsing.parse_event import ON_PARSE, ParseEventArgs, print_parse_event 31from .dataset_constants import KEY_DATASET, KEY_EVENTS, KEY_MATRICES, KEY_TABLES 32from .dataset_constants import KEY_MATRIX, KEY_IDX_ITEM, KEY_IDX_USER 33from .dataset_constants import KEY_RATING_MIN, KEY_RATING_MAX, KEY_RATING_TYPE 34from .dataset_constants import TABLE_KEY, TABLE_PRIMARY_KEY, TABLE_FOREIGN_KEYS, TABLE_COLUMNS 35from .dataset_constants import TABLE_FILE, TABLE_COMPRESSION, TABLE_ENCODING 36from .dataset_constants import TABLE_HEADER, TABLE_NUM_RECORDS, TABLE_SEP 37from .dataset_config import DatasetIndexConfig, DatasetMatrixConfig, RatingMatrixConfig 38from .dataset_config import DatasetConfig, DatasetFileConfig, DatasetTableConfig, FileOptionsConfig 39from .dataset_config import DATASET_RATINGS_EXPLICIT, DATASET_RATINGS_IMPLICIT 40 41VALID_SEPARATORS = [',', '|'] 42VALID_COMPRESSIONS = ['bz2'] 43VALID_ENCODINGS = ['utf-8', 'ISO-8859-1'] 44 45class DatasetConfigParser: 46 """Dataset Configuration Parser. 47 48 Public methods: 49 50 parse_dataset_config 51 parse_dataset_config_from_yml 52 """ 53 54 def __init__(self, verbose: bool): 55 """Construct the DatasetConfigParser. 56 57 Args: 58 verbose: whether the parser should give verbose output. 59 """ 60 self.verbose = verbose 61 handle_parse_event = lambda parser, args: \ 62 print_parse_event(args) if parser.verbose else None 63 64 self.event_dispatcher = EventDispatcher() 65 self.event_dispatcher.add_listener(ON_PARSE, self, (handle_parse_event, None)) 66 67 def parse_dataset_config( 68 self, 69 data_dir: str, 70 dataset_config: Dict[str, Any], 71 available_datasets: List[str]) -> Optional[DatasetConfig]: 72 """Parse a dataset configuration. 73 74 Args: 75 data_dir: the directory where the dataset is stored. 76 dataset_config: the dataset configuration. 77 available_datasets: a list of already available datasets. 78 79 Returns: 80 the parsed configuration or None on failure. 81 """ 82 # attempt to parse the name of the dataset 83 dataset_name = parse_string( 84 dataset_config, 85 KEY_DATASET, 86 self.event_dispatcher 87 ) 88 if dataset_name is None: 89 return None 90 91 # verify that the dataset name is not already present 92 if dataset_name in available_datasets: 93 self.event_dispatcher.dispatch(ParseEventArgs( 94 ON_PARSE, 95 'PARSE ERROR: dataset already exists: ' + dataset_name 96 )) 97 return None 98 99 # attempt to parse the dataset (event) tables 100 events = self.parse_dataset_events(data_dir, dataset_config) 101 102 # attempt to parse the dataset (matrix) tables 103 matrices = self.parse_dataset_matrices(data_dir, dataset_config) 104 105 # attempt to parse the dataset (other) tables 106 tables = self.parse_dataset_tables(data_dir, dataset_config) 107 108 return DatasetConfig( 109 dataset_name, 110 events, 111 matrices, 112 tables 113 ) 114 115 def parse_dataset_config_from_yml( 116 self, 117 data_dir: str, 118 file_name: str, 119 available_datasets: List[str]) -> Optional[DatasetConfig]: 120 """Parse a dataset configuration. 121 122 Args: 123 data_dir: the directory where the dataset is stored. 124 file_name: the name of the yml file with extension. 125 available_datasets: a list of already available datasets. 126 127 Returns: 128 the parsed configuration or None on failure. 129 """ 130 return self.parse_dataset_config( 131 data_dir, 132 load_yml(os.path.join(data_dir, file_name)), 133 available_datasets 134 ) 135 136 def parse_dataset_events( 137 self, 138 data_dir: str, 139 dataset_config: Dict[str, Any],) -> Dict[str, DatasetTableConfig]: 140 """Parse dataset event tables from the configuration. 141 142 Args: 143 data_dir: the directory where the dataset is stored. 144 dataset_config: the dataset configuration. 145 146 Returns: 147 a dictionary with parsed event table configurations. 148 """ 149 events = {} 150 if dataset_config.get(KEY_EVENTS) is not None: 151 if assert_is_type( 152 dataset_config[KEY_EVENTS], 153 dict, 154 self.event_dispatcher, 155 'PARSE WARNING: dataset events invalid value' 156 ): 157 for table_name, table_config in dataset_config[KEY_EVENTS].items(): 158 config = self.parse_dataset_table_config(data_dir, table_config) 159 if config is None: 160 continue 161 162 events[table_name] = config 163 164 return events 165 166 def parse_dataset_matrices( 167 self, 168 data_dir: str, 169 dataset_config: Dict[str, Any]) -> Dict[str, DatasetMatrixConfig]: 170 """Parse dataset matrices from the configuration. 171 172 Args: 173 data_dir: the directory where the dataset is stored. 174 dataset_config: the dataset configuration. 175 176 Returns: 177 a dictionary with parsed matrix configurations. 178 """ 179 matrices = {} 180 if dataset_config.get(KEY_MATRICES) is not None: 181 if assert_is_type( 182 dataset_config[KEY_MATRICES], 183 dict, 184 self.event_dispatcher, 185 'PARSE WARNING: dataset matrices invalid value' 186 ): 187 for matrix_name, matrix_config in dataset_config[KEY_MATRICES].items(): 188 config = self.parse_dataset_matrix_config( 189 data_dir, 190 matrix_config 191 ) 192 if config is None: 193 continue 194 195 matrices[matrix_name] = config 196 197 return matrices 198 199 def parse_dataset_tables( 200 self, 201 data_dir: str, 202 dataset_config: Dict[str, Any],) -> Dict[str, DatasetTableConfig]: 203 """Parse dataset tables from the configuration. 204 205 Args: 206 data_dir: the directory where the dataset is stored. 207 dataset_config: the dataset configuration. 208 209 Returns: 210 a dictionary with parsed table configurations. 211 """ 212 tables = {} 213 if dataset_config.get(KEY_TABLES) is not None: 214 if assert_is_type( 215 dataset_config[KEY_TABLES], 216 dict, 217 self.event_dispatcher, 218 'PARSE WARNING: dataset tables invalid value' 219 ): 220 for table_name, table_config in dataset_config[KEY_TABLES].items(): 221 config = self.parse_dataset_table_config(data_dir, table_config) 222 if config is None: 223 continue 224 225 tables[table_name] = config 226 227 return tables 228 229 def parse_file_options_config( 230 self, 231 file_config: Dict[str, Any]) -> Optional[FileOptionsConfig]: 232 """Parse a dataset file configuration. 233 234 Args: 235 file_config: the dataset file configuration. 236 237 Returns: 238 the parsed configuration or None on failure. 239 """ 240 # attempt to parse the optional separator string 241 success, file_sep = parse_optional_string( 242 file_config, 243 TABLE_SEP, 244 VALID_SEPARATORS, 245 self.event_dispatcher 246 ) 247 if not success: 248 return None 249 250 # attempt to parse the optional compression string 251 success, file_compression = parse_optional_string( 252 file_config, 253 TABLE_COMPRESSION, 254 VALID_COMPRESSIONS, 255 self.event_dispatcher 256 ) 257 if not success: 258 return None 259 260 # attempt to parse the optional encoding string 261 success, file_encoding = parse_optional_string( 262 file_config, 263 TABLE_ENCODING, 264 VALID_ENCODINGS, 265 self.event_dispatcher 266 ) 267 if not success: 268 return None 269 270 # attempt to parse the optional header boolean 271 success, file_header = parse_optional_bool( 272 file_config, 273 TABLE_HEADER, 274 self.event_dispatcher 275 ) 276 if not success: 277 return None 278 279 return FileOptionsConfig( 280 file_sep, 281 file_compression, 282 file_encoding, 283 file_header 284 ) 285 286 def parse_dataset_file_config( 287 self, 288 data_dir: str, 289 file_config: Dict[str, Any]) -> Optional[DatasetFileConfig]: 290 """Parse a dataset file configuration. 291 292 Args: 293 data_dir: the directory where the file is stored. 294 file_config: the dataset file configuration. 295 296 Returns: 297 the parsed configuration or None on failure. 298 """ 299 # attempt to parse the (required) file name 300 success, file_name = parse_file_name( 301 data_dir, 302 file_config, 303 KEY_NAME, 304 self.event_dispatcher 305 ) 306 if not success: 307 return None 308 309 # attempt to parse the file options 310 file_options = self.parse_file_options_config(file_config) 311 if file_options is None: 312 return None 313 314 return DatasetFileConfig(file_name, file_options) 315 316 def parse_dataset_index_config( 317 self, 318 data_dir: str, 319 index_config: Dict[str, Any]) -> Optional[DatasetIndexConfig]: 320 """Parse a dataset matrix' user/item index configuration. 321 322 Args: 323 data_dir: the directory where the file is stored. 324 index_config: the dataset matrix index configuration. 325 326 Returns: 327 the parsed configuration or None on failure. 328 """ 329 # attempt to parse (optional) file name 330 success, file_name = parse_file_name( 331 data_dir, 332 index_config, 333 TABLE_FILE, 334 self.event_dispatcher, 335 required=False 336 ) 337 if not success: 338 return None 339 340 # attempt to parse the key that is associated with the index 341 file_key = parse_string( 342 index_config, 343 TABLE_KEY, 344 self.event_dispatcher 345 ) 346 if file_key is None: 347 return None 348 349 # attempt to parse the number of records in the file 350 num_records = parse_int( 351 index_config, 352 TABLE_NUM_RECORDS, 353 self.event_dispatcher 354 ) 355 if num_records is None: 356 return None 357 358 return DatasetIndexConfig(file_name, file_key, num_records) 359 360 def parse_dataset_matrix_config( 361 self, 362 data_dir: str, 363 matrix_config: Dict[str, Any]) -> Optional[DatasetMatrixConfig]: 364 """Parse a dataset matrix configuration. 365 366 Args: 367 data_dir: the directory where the dataset matrix is stored. 368 matrix_config: the dataset matrix configuration. 369 370 Returns: 371 the parsed configuration or None on failure. 372 """ 373 # attempt to parse the matrix table 374 matrix_table = self.parse_dataset_table_config(data_dir, 375 matrix_config.get(KEY_MATRIX, {})) 376 if matrix_table is None: 377 return None 378 379 # attempt to parse the matrix users 380 matrix_users = self.parse_dataset_index_config(data_dir, 381 matrix_config.get(KEY_IDX_USER, {})) 382 if matrix_users is None: 383 return None 384 385 # attempt to parse the matrix items 386 matrix_items = self.parse_dataset_index_config(data_dir, 387 matrix_config.get(KEY_IDX_ITEM, {})) 388 if matrix_items is None: 389 return None 390 391 # attempt to parse the matrix ratings 392 matrix_ratings = parse_rating_matrix( 393 matrix_config, 394 self.event_dispatcher 395 ) 396 if matrix_ratings is None: 397 return None 398 399 return DatasetMatrixConfig( 400 matrix_table, 401 matrix_ratings, 402 matrix_users, 403 matrix_items 404 ) 405 406 def parse_dataset_table_config( 407 self, 408 data_dir: str, 409 table_config: Dict[str, Any]) -> Optional[DatasetTableConfig]: 410 """Parse a dataset table configuration. 411 412 Args: 413 data_dir: the directory where the table is stored. 414 table_config: the dataset table configuration. 415 416 Returns: 417 the parsed configuration or None on failure. 418 """ 419 file_config = self.parse_dataset_file_config(data_dir, table_config.get(TABLE_FILE, {})) 420 if file_config is None: 421 return None 422 423 table_primary_key = parse_string_list( 424 table_config, 425 TABLE_PRIMARY_KEY, 426 1, 427 self.event_dispatcher 428 ) 429 if table_primary_key is None: 430 return None 431 432 table_foreign_keys = None 433 if TABLE_FOREIGN_KEYS in table_config: 434 table_foreign_keys = parse_string_list( 435 table_config, 436 TABLE_FOREIGN_KEYS, 437 0, 438 self.event_dispatcher 439 ) 440 441 table_columns = parse_string_list( 442 table_config, 443 TABLE_COLUMNS, 444 1, 445 self.event_dispatcher 446 ) 447 if table_columns is None: 448 return None 449 450 table_num_records = parse_int( 451 table_config, 452 TABLE_NUM_RECORDS, 453 self.event_dispatcher 454 ) 455 if table_num_records is None: 456 return None 457 458 return DatasetTableConfig( 459 table_primary_key, 460 table_foreign_keys, 461 table_columns, 462 table_num_records, 463 file_config 464 ) 465 466 467def parse_file_name( 468 data_dir: str, 469 file_config: Dict[str, Any], 470 file_key: str, 471 event_dispatcher: EventDispatcher, 472 *, 473 required: bool=True) -> Tuple[bool, Optional[str]]: 474 """Parse the file name from the configuration. 475 476 In addition, when the file name is parsed correctly it is checked 477 for existence in the specified data directory. 478 479 Args: 480 data_dir: the directory where the file is stored. 481 file_config: the configuration dictionary to parse from. 482 file_key: the key in the configuration that contains the file name. 483 event_dispatcher: to dispatch the parse event on failure. 484 required: whether the parsing is required to succeed. 485 486 Returns: 487 whether the parsing succeeded and the parsed file name or None on failure. 488 """ 489 if required and not assert_is_key_in_dict( 490 file_key, 491 file_config, 492 event_dispatcher, 493 'PARSE ERROR: file configuration missing key \'' + file_key + '\'' 494 ): return False, None 495 496 file_name = file_config.get(file_key) 497 if required and file_name is None: 498 event_dispatcher.dispatch(ParseEventArgs( 499 ON_PARSE, 500 'PARSE ERROR: file configuration missing value for \'' + file_key + '\'' 501 )) 502 return False, None 503 504 if file_name is not None: 505 if not assert_is_type( 506 file_name, 507 str, 508 event_dispatcher, 509 'PARSE ERROR: file configuration contains invalid name' 510 ): return False, None 511 512 file_path = os.path.join(data_dir, file_name) 513 if not os.path.isfile(file_path): 514 event_dispatcher.dispatch(ParseEventArgs( 515 ON_PARSE, 516 'PARSE ERROR: file configuration file name does not exist: ' + file_path 517 )) 518 return False, None 519 520 return True, file_name 521 522 523def parse_float( 524 config: Dict[str, Any], 525 float_key: str, 526 event_dispatcher: EventDispatcher) -> Optional[float]: 527 """Parse a float-point value from the configuration. 528 529 Args: 530 config: the configuration dictionary to parse from. 531 float_key: the key in the configuration that contains the float-point value. 532 event_dispatcher: to dispatch the parse event on failure. 533 534 Returns: 535 the parsed float-point value or None on failure. 536 """ 537 if not assert_is_key_in_dict( 538 float_key, 539 config, 540 event_dispatcher, 541 'PARSE ERROR: configuration contains invalid \'' + float_key + '\' value' 542 ): return None 543 544 float_value = config[float_key] 545 546 if not assert_is_type( 547 float_value, 548 float, 549 event_dispatcher, 550 'PARSE ERROR: configuration contains invalid \'' + float_key + '\'' 551 ): return None 552 553 return float_value 554 555 556def parse_int( 557 config: Dict[str, Any], 558 int_key: str, 559 event_dispatcher: EventDispatcher) -> Optional[int]: 560 """Parse an integer value from the configuration. 561 562 The integer is expected to be greater than zero to be parsed successfully. 563 564 Args: 565 config: the configuration dictionary to parse from. 566 int_key: the key in the configuration that contains the integer value. 567 event_dispatcher: to dispatch the parse event on failure. 568 569 Returns: 570 the parsed integer value or None on failure. 571 """ 572 if not assert_is_key_in_dict( 573 int_key, 574 config, 575 event_dispatcher, 576 'PARSE ERROR: configuration contains invalid \'' + int_key + '\' value' 577 ): return None 578 579 int_value = config[int_key] 580 581 if isinstance(int_value, bool): 582 event_dispatcher.dispatch(ParseEventArgs( 583 ON_PARSE, 584 'PARSE ERROR: configuration contains invalid \'' + int_key + '\'', 585 expected_type=int, 586 actual_type=bool 587 )) 588 return None 589 590 if not assert_is_type( 591 int_value, 592 int, 593 event_dispatcher, 594 'PARSE ERROR: configuration contains invalid \'' + int_key + '\'' 595 ): return None 596 597 if int_value <= 0: 598 event_dispatcher.dispatch(ParseEventArgs( 599 ON_PARSE, 600 'PARSE ERROR: configuration contains invalid \'' + int_key + '\' less than or equal to zero' 601 )) 602 return None 603 604 return int_value 605 606 607def parse_optional_bool( 608 config: Dict[str, Any], 609 bool_key: str, 610 event_dispatcher: EventDispatcher) -> Tuple[bool, Optional[bool]]: 611 """Parse an optional boolean from the configuration. 612 613 Args: 614 config: the configuration dictionary to parse from. 615 bool_key: the key in the configuration that contains the boolean. 616 event_dispatcher: to dispatch the parse event on failure. 617 618 Returns: 619 whether the parsing succeeded and the optional boolean value. 620 """ 621 bool_value = config.get(bool_key) 622 if bool_value is not None: 623 if not assert_is_type( 624 bool_value, 625 bool, 626 event_dispatcher, 627 'PARSE ERROR: configuration contains invalid ' + bool_key + ' value' 628 ): return False, None 629 else: 630 bool_value = False 631 632 return True, bool_value 633 634 635def parse_optional_string( 636 config: Dict[str, Any], 637 string_key: str, 638 string_options: List[str], 639 event_dispatcher: EventDispatcher) -> Tuple[bool, Optional[str]]: 640 """Parse an optional string from a list of valid values from the configuration. 641 642 Args: 643 config: the configuration dictionary to parse from. 644 string_key: the key in the configuration that contains the string. 645 string_options: the options that are available for the string that is being parsed. 646 event_dispatcher: to dispatch the parse event on failure. 647 648 Returns: 649 whether the parsing succeeded and the optional string value. 650 """ 651 string_value = config.get(string_key) 652 if string_value is not None: 653 if not assert_is_type( 654 string_value, 655 str, 656 event_dispatcher, 657 'PARSE ERROR: configuration contains invalid \'' + string_key + '\' value' 658 ): return False, None 659 660 if not assert_is_one_of_list( 661 string_value, 662 string_options, 663 event_dispatcher, 664 'PARSE ERROR: configuration contains invalid \'' + string_key + '\'' 665 ): return False, None 666 667 return True, string_value 668 669 670def parse_rating_matrix( 671 matrix_config: Dict[str, Any], 672 event_dispatcher: EventDispatcher) -> Optional[RatingMatrixConfig]: 673 """Parse a rating matrix from the configuration. 674 675 Args: 676 matrix_config: the matrix configuration dictionary to parse from. 677 event_dispatcher: to dispatch the parse event on failure. 678 679 Returns: 680 the parsed string or None on failure. 681 """ 682 rating_min = parse_float( 683 matrix_config, 684 KEY_RATING_MIN, 685 event_dispatcher 686 ) 687 if rating_min is None: 688 return None 689 690 if rating_min <= 0.0: 691 event_dispatcher.dispatch(ParseEventArgs( 692 ON_PARSE, 693 'PARSE ERROR: matrix configuration contains minimum rating greater than zero' 694 )) 695 return None 696 697 rating_max = parse_float( 698 matrix_config, 699 KEY_RATING_MAX, 700 event_dispatcher 701 ) 702 if rating_max is None: 703 return None 704 705 if rating_max < rating_min: 706 event_dispatcher.dispatch(ParseEventArgs( 707 ON_PARSE, 708 'PARSE ERROR: matrix configuration contains maximum rating less than minimum rating' 709 )) 710 return None 711 712 rating_type = parse_string( 713 matrix_config, 714 KEY_RATING_TYPE, 715 event_dispatcher, 716 one_of_list=[DATASET_RATINGS_EXPLICIT, DATASET_RATINGS_IMPLICIT] 717 ) 718 719 if rating_type is None: 720 return None 721 722 return RatingMatrixConfig(rating_min, rating_max, rating_type) 723 724 725def parse_string( 726 config: Dict[str, Any], 727 string_key: str, 728 event_dispatcher: EventDispatcher, 729 *, 730 one_of_list: List[str]=None) -> Optional[str]: 731 """Parse a string from the configuration. 732 733 Args: 734 config: the configuration dictionary to parse from. 735 string_key: the key in the configuration that contains the string. 736 event_dispatcher: to dispatch the parse event on failure. 737 one_of_list: when not None the string is to be expected one of the specified list. 738 739 Returns: 740 the parsed string or None on failure. 741 """ 742 if not assert_is_key_in_dict( 743 string_key, 744 config, 745 event_dispatcher, 746 'PARSE ERROR: configuration contains invalid \'' + string_key + '\' value' 747 ): return None 748 749 string_value = config[string_key] 750 751 if not assert_is_type( 752 string_value, 753 str, 754 event_dispatcher, 755 'PARSE ERROR: configuration contains invalid \'' + string_key + '\'' 756 ): return None 757 758 if one_of_list is not None: 759 if not assert_is_one_of_list( 760 string_value, 761 one_of_list, 762 event_dispatcher, 763 'PARSE ERROR: configuration contains invalid \'' + string_key + '\'' 764 ): return None 765 766 return string_value 767 768 769def parse_string_list( 770 config: Dict[str, Any], 771 string_list_key: str, 772 min_list_length: int, 773 event_dispatcher: EventDispatcher) -> Optional[List[str]]: 774 """Parse a list of strings from the configuration. 775 776 Args: 777 config: the configuration dictionary to parse from. 778 string_list_key: the key in the configuration that contains the string list. 779 min_list_length: the minimum length of the list to succeed. 780 event_dispatcher: to dispatch the parse event on failure. 781 782 Returns: 783 the parsed string list or None on failure. 784 """ 785 if not assert_is_key_in_dict( 786 string_list_key, 787 config, 788 event_dispatcher, 789 'PARSE ERROR: configuration contains invalid \'' + string_list_key + '\' value' 790 ): return None 791 792 string_list = config[string_list_key] 793 794 if not assert_is_type( 795 string_list, 796 list, 797 event_dispatcher, 798 'PARSE ERROR: configuration contains invalid \'' + string_list_key + '\'' 799 ): return None 800 801 result_strings = [] 802 for string in string_list: 803 if not assert_is_type( 804 string, 805 str, 806 event_dispatcher, 807 'PARSE ERROR: configuration list \'' + string_list_key + '\' contains invalid value' 808 ): return None 809 810 result_strings.append(string) 811 812 if len(result_strings) < min_list_length: 813 event_dispatcher.dispatch(ParseEventArgs( 814 ON_PARSE, 815 'PARSE ERROR: configuration list \'' + string_list_key + '\' contains too few values' 816 )) 817 return None 818 819 return result_strings
46class DatasetConfigParser: 47 """Dataset Configuration Parser. 48 49 Public methods: 50 51 parse_dataset_config 52 parse_dataset_config_from_yml 53 """ 54 55 def __init__(self, verbose: bool): 56 """Construct the DatasetConfigParser. 57 58 Args: 59 verbose: whether the parser should give verbose output. 60 """ 61 self.verbose = verbose 62 handle_parse_event = lambda parser, args: \ 63 print_parse_event(args) if parser.verbose else None 64 65 self.event_dispatcher = EventDispatcher() 66 self.event_dispatcher.add_listener(ON_PARSE, self, (handle_parse_event, None)) 67 68 def parse_dataset_config( 69 self, 70 data_dir: str, 71 dataset_config: Dict[str, Any], 72 available_datasets: List[str]) -> Optional[DatasetConfig]: 73 """Parse a dataset configuration. 74 75 Args: 76 data_dir: the directory where the dataset is stored. 77 dataset_config: the dataset configuration. 78 available_datasets: a list of already available datasets. 79 80 Returns: 81 the parsed configuration or None on failure. 82 """ 83 # attempt to parse the name of the dataset 84 dataset_name = parse_string( 85 dataset_config, 86 KEY_DATASET, 87 self.event_dispatcher 88 ) 89 if dataset_name is None: 90 return None 91 92 # verify that the dataset name is not already present 93 if dataset_name in available_datasets: 94 self.event_dispatcher.dispatch(ParseEventArgs( 95 ON_PARSE, 96 'PARSE ERROR: dataset already exists: ' + dataset_name 97 )) 98 return None 99 100 # attempt to parse the dataset (event) tables 101 events = self.parse_dataset_events(data_dir, dataset_config) 102 103 # attempt to parse the dataset (matrix) tables 104 matrices = self.parse_dataset_matrices(data_dir, dataset_config) 105 106 # attempt to parse the dataset (other) tables 107 tables = self.parse_dataset_tables(data_dir, dataset_config) 108 109 return DatasetConfig( 110 dataset_name, 111 events, 112 matrices, 113 tables 114 ) 115 116 def parse_dataset_config_from_yml( 117 self, 118 data_dir: str, 119 file_name: str, 120 available_datasets: List[str]) -> Optional[DatasetConfig]: 121 """Parse a dataset configuration. 122 123 Args: 124 data_dir: the directory where the dataset is stored. 125 file_name: the name of the yml file with extension. 126 available_datasets: a list of already available datasets. 127 128 Returns: 129 the parsed configuration or None on failure. 130 """ 131 return self.parse_dataset_config( 132 data_dir, 133 load_yml(os.path.join(data_dir, file_name)), 134 available_datasets 135 ) 136 137 def parse_dataset_events( 138 self, 139 data_dir: str, 140 dataset_config: Dict[str, Any],) -> Dict[str, DatasetTableConfig]: 141 """Parse dataset event tables from the configuration. 142 143 Args: 144 data_dir: the directory where the dataset is stored. 145 dataset_config: the dataset configuration. 146 147 Returns: 148 a dictionary with parsed event table configurations. 149 """ 150 events = {} 151 if dataset_config.get(KEY_EVENTS) is not None: 152 if assert_is_type( 153 dataset_config[KEY_EVENTS], 154 dict, 155 self.event_dispatcher, 156 'PARSE WARNING: dataset events invalid value' 157 ): 158 for table_name, table_config in dataset_config[KEY_EVENTS].items(): 159 config = self.parse_dataset_table_config(data_dir, table_config) 160 if config is None: 161 continue 162 163 events[table_name] = config 164 165 return events 166 167 def parse_dataset_matrices( 168 self, 169 data_dir: str, 170 dataset_config: Dict[str, Any]) -> Dict[str, DatasetMatrixConfig]: 171 """Parse dataset matrices from the configuration. 172 173 Args: 174 data_dir: the directory where the dataset is stored. 175 dataset_config: the dataset configuration. 176 177 Returns: 178 a dictionary with parsed matrix configurations. 179 """ 180 matrices = {} 181 if dataset_config.get(KEY_MATRICES) is not None: 182 if assert_is_type( 183 dataset_config[KEY_MATRICES], 184 dict, 185 self.event_dispatcher, 186 'PARSE WARNING: dataset matrices invalid value' 187 ): 188 for matrix_name, matrix_config in dataset_config[KEY_MATRICES].items(): 189 config = self.parse_dataset_matrix_config( 190 data_dir, 191 matrix_config 192 ) 193 if config is None: 194 continue 195 196 matrices[matrix_name] = config 197 198 return matrices 199 200 def parse_dataset_tables( 201 self, 202 data_dir: str, 203 dataset_config: Dict[str, Any],) -> Dict[str, DatasetTableConfig]: 204 """Parse dataset tables from the configuration. 205 206 Args: 207 data_dir: the directory where the dataset is stored. 208 dataset_config: the dataset configuration. 209 210 Returns: 211 a dictionary with parsed table configurations. 212 """ 213 tables = {} 214 if dataset_config.get(KEY_TABLES) is not None: 215 if assert_is_type( 216 dataset_config[KEY_TABLES], 217 dict, 218 self.event_dispatcher, 219 'PARSE WARNING: dataset tables invalid value' 220 ): 221 for table_name, table_config in dataset_config[KEY_TABLES].items(): 222 config = self.parse_dataset_table_config(data_dir, table_config) 223 if config is None: 224 continue 225 226 tables[table_name] = config 227 228 return tables 229 230 def parse_file_options_config( 231 self, 232 file_config: Dict[str, Any]) -> Optional[FileOptionsConfig]: 233 """Parse a dataset file configuration. 234 235 Args: 236 file_config: the dataset file configuration. 237 238 Returns: 239 the parsed configuration or None on failure. 240 """ 241 # attempt to parse the optional separator string 242 success, file_sep = parse_optional_string( 243 file_config, 244 TABLE_SEP, 245 VALID_SEPARATORS, 246 self.event_dispatcher 247 ) 248 if not success: 249 return None 250 251 # attempt to parse the optional compression string 252 success, file_compression = parse_optional_string( 253 file_config, 254 TABLE_COMPRESSION, 255 VALID_COMPRESSIONS, 256 self.event_dispatcher 257 ) 258 if not success: 259 return None 260 261 # attempt to parse the optional encoding string 262 success, file_encoding = parse_optional_string( 263 file_config, 264 TABLE_ENCODING, 265 VALID_ENCODINGS, 266 self.event_dispatcher 267 ) 268 if not success: 269 return None 270 271 # attempt to parse the optional header boolean 272 success, file_header = parse_optional_bool( 273 file_config, 274 TABLE_HEADER, 275 self.event_dispatcher 276 ) 277 if not success: 278 return None 279 280 return FileOptionsConfig( 281 file_sep, 282 file_compression, 283 file_encoding, 284 file_header 285 ) 286 287 def parse_dataset_file_config( 288 self, 289 data_dir: str, 290 file_config: Dict[str, Any]) -> Optional[DatasetFileConfig]: 291 """Parse a dataset file configuration. 292 293 Args: 294 data_dir: the directory where the file is stored. 295 file_config: the dataset file configuration. 296 297 Returns: 298 the parsed configuration or None on failure. 299 """ 300 # attempt to parse the (required) file name 301 success, file_name = parse_file_name( 302 data_dir, 303 file_config, 304 KEY_NAME, 305 self.event_dispatcher 306 ) 307 if not success: 308 return None 309 310 # attempt to parse the file options 311 file_options = self.parse_file_options_config(file_config) 312 if file_options is None: 313 return None 314 315 return DatasetFileConfig(file_name, file_options) 316 317 def parse_dataset_index_config( 318 self, 319 data_dir: str, 320 index_config: Dict[str, Any]) -> Optional[DatasetIndexConfig]: 321 """Parse a dataset matrix' user/item index configuration. 322 323 Args: 324 data_dir: the directory where the file is stored. 325 index_config: the dataset matrix index configuration. 326 327 Returns: 328 the parsed configuration or None on failure. 329 """ 330 # attempt to parse (optional) file name 331 success, file_name = parse_file_name( 332 data_dir, 333 index_config, 334 TABLE_FILE, 335 self.event_dispatcher, 336 required=False 337 ) 338 if not success: 339 return None 340 341 # attempt to parse the key that is associated with the index 342 file_key = parse_string( 343 index_config, 344 TABLE_KEY, 345 self.event_dispatcher 346 ) 347 if file_key is None: 348 return None 349 350 # attempt to parse the number of records in the file 351 num_records = parse_int( 352 index_config, 353 TABLE_NUM_RECORDS, 354 self.event_dispatcher 355 ) 356 if num_records is None: 357 return None 358 359 return DatasetIndexConfig(file_name, file_key, num_records) 360 361 def parse_dataset_matrix_config( 362 self, 363 data_dir: str, 364 matrix_config: Dict[str, Any]) -> Optional[DatasetMatrixConfig]: 365 """Parse a dataset matrix configuration. 366 367 Args: 368 data_dir: the directory where the dataset matrix is stored. 369 matrix_config: the dataset matrix configuration. 370 371 Returns: 372 the parsed configuration or None on failure. 373 """ 374 # attempt to parse the matrix table 375 matrix_table = self.parse_dataset_table_config(data_dir, 376 matrix_config.get(KEY_MATRIX, {})) 377 if matrix_table is None: 378 return None 379 380 # attempt to parse the matrix users 381 matrix_users = self.parse_dataset_index_config(data_dir, 382 matrix_config.get(KEY_IDX_USER, {})) 383 if matrix_users is None: 384 return None 385 386 # attempt to parse the matrix items 387 matrix_items = self.parse_dataset_index_config(data_dir, 388 matrix_config.get(KEY_IDX_ITEM, {})) 389 if matrix_items is None: 390 return None 391 392 # attempt to parse the matrix ratings 393 matrix_ratings = parse_rating_matrix( 394 matrix_config, 395 self.event_dispatcher 396 ) 397 if matrix_ratings is None: 398 return None 399 400 return DatasetMatrixConfig( 401 matrix_table, 402 matrix_ratings, 403 matrix_users, 404 matrix_items 405 ) 406 407 def parse_dataset_table_config( 408 self, 409 data_dir: str, 410 table_config: Dict[str, Any]) -> Optional[DatasetTableConfig]: 411 """Parse a dataset table configuration. 412 413 Args: 414 data_dir: the directory where the table is stored. 415 table_config: the dataset table configuration. 416 417 Returns: 418 the parsed configuration or None on failure. 419 """ 420 file_config = self.parse_dataset_file_config(data_dir, table_config.get(TABLE_FILE, {})) 421 if file_config is None: 422 return None 423 424 table_primary_key = parse_string_list( 425 table_config, 426 TABLE_PRIMARY_KEY, 427 1, 428 self.event_dispatcher 429 ) 430 if table_primary_key is None: 431 return None 432 433 table_foreign_keys = None 434 if TABLE_FOREIGN_KEYS in table_config: 435 table_foreign_keys = parse_string_list( 436 table_config, 437 TABLE_FOREIGN_KEYS, 438 0, 439 self.event_dispatcher 440 ) 441 442 table_columns = parse_string_list( 443 table_config, 444 TABLE_COLUMNS, 445 1, 446 self.event_dispatcher 447 ) 448 if table_columns is None: 449 return None 450 451 table_num_records = parse_int( 452 table_config, 453 TABLE_NUM_RECORDS, 454 self.event_dispatcher 455 ) 456 if table_num_records is None: 457 return None 458 459 return DatasetTableConfig( 460 table_primary_key, 461 table_foreign_keys, 462 table_columns, 463 table_num_records, 464 file_config 465 )
Dataset Configuration Parser.
Public methods:
parse_dataset_config parse_dataset_config_from_yml
55 def __init__(self, verbose: bool): 56 """Construct the DatasetConfigParser. 57 58 Args: 59 verbose: whether the parser should give verbose output. 60 """ 61 self.verbose = verbose 62 handle_parse_event = lambda parser, args: \ 63 print_parse_event(args) if parser.verbose else None 64 65 self.event_dispatcher = EventDispatcher() 66 self.event_dispatcher.add_listener(ON_PARSE, self, (handle_parse_event, None))
Construct the DatasetConfigParser.
Args: verbose: whether the parser should give verbose output.
68 def parse_dataset_config( 69 self, 70 data_dir: str, 71 dataset_config: Dict[str, Any], 72 available_datasets: List[str]) -> Optional[DatasetConfig]: 73 """Parse a dataset configuration. 74 75 Args: 76 data_dir: the directory where the dataset is stored. 77 dataset_config: the dataset configuration. 78 available_datasets: a list of already available datasets. 79 80 Returns: 81 the parsed configuration or None on failure. 82 """ 83 # attempt to parse the name of the dataset 84 dataset_name = parse_string( 85 dataset_config, 86 KEY_DATASET, 87 self.event_dispatcher 88 ) 89 if dataset_name is None: 90 return None 91 92 # verify that the dataset name is not already present 93 if dataset_name in available_datasets: 94 self.event_dispatcher.dispatch(ParseEventArgs( 95 ON_PARSE, 96 'PARSE ERROR: dataset already exists: ' + dataset_name 97 )) 98 return None 99 100 # attempt to parse the dataset (event) tables 101 events = self.parse_dataset_events(data_dir, dataset_config) 102 103 # attempt to parse the dataset (matrix) tables 104 matrices = self.parse_dataset_matrices(data_dir, dataset_config) 105 106 # attempt to parse the dataset (other) tables 107 tables = self.parse_dataset_tables(data_dir, dataset_config) 108 109 return DatasetConfig( 110 dataset_name, 111 events, 112 matrices, 113 tables 114 )
Parse a dataset configuration.
Args: data_dir: the directory where the dataset is stored. dataset_config: the dataset configuration. available_datasets: a list of already available datasets.
Returns: the parsed configuration or None on failure.
116 def parse_dataset_config_from_yml( 117 self, 118 data_dir: str, 119 file_name: str, 120 available_datasets: List[str]) -> Optional[DatasetConfig]: 121 """Parse a dataset configuration. 122 123 Args: 124 data_dir: the directory where the dataset is stored. 125 file_name: the name of the yml file with extension. 126 available_datasets: a list of already available datasets. 127 128 Returns: 129 the parsed configuration or None on failure. 130 """ 131 return self.parse_dataset_config( 132 data_dir, 133 load_yml(os.path.join(data_dir, file_name)), 134 available_datasets 135 )
Parse a dataset configuration.
Args: data_dir: the directory where the dataset is stored. file_name: the name of the yml file with extension. available_datasets: a list of already available datasets.
Returns: the parsed configuration or None on failure.
137 def parse_dataset_events( 138 self, 139 data_dir: str, 140 dataset_config: Dict[str, Any],) -> Dict[str, DatasetTableConfig]: 141 """Parse dataset event tables from the configuration. 142 143 Args: 144 data_dir: the directory where the dataset is stored. 145 dataset_config: the dataset configuration. 146 147 Returns: 148 a dictionary with parsed event table configurations. 149 """ 150 events = {} 151 if dataset_config.get(KEY_EVENTS) is not None: 152 if assert_is_type( 153 dataset_config[KEY_EVENTS], 154 dict, 155 self.event_dispatcher, 156 'PARSE WARNING: dataset events invalid value' 157 ): 158 for table_name, table_config in dataset_config[KEY_EVENTS].items(): 159 config = self.parse_dataset_table_config(data_dir, table_config) 160 if config is None: 161 continue 162 163 events[table_name] = config 164 165 return events
Parse dataset event tables from the configuration.
Args: data_dir: the directory where the dataset is stored. dataset_config: the dataset configuration.
Returns: a dictionary with parsed event table configurations.
167 def parse_dataset_matrices( 168 self, 169 data_dir: str, 170 dataset_config: Dict[str, Any]) -> Dict[str, DatasetMatrixConfig]: 171 """Parse dataset matrices from the configuration. 172 173 Args: 174 data_dir: the directory where the dataset is stored. 175 dataset_config: the dataset configuration. 176 177 Returns: 178 a dictionary with parsed matrix configurations. 179 """ 180 matrices = {} 181 if dataset_config.get(KEY_MATRICES) is not None: 182 if assert_is_type( 183 dataset_config[KEY_MATRICES], 184 dict, 185 self.event_dispatcher, 186 'PARSE WARNING: dataset matrices invalid value' 187 ): 188 for matrix_name, matrix_config in dataset_config[KEY_MATRICES].items(): 189 config = self.parse_dataset_matrix_config( 190 data_dir, 191 matrix_config 192 ) 193 if config is None: 194 continue 195 196 matrices[matrix_name] = config 197 198 return matrices
Parse dataset matrices from the configuration.
Args: data_dir: the directory where the dataset is stored. dataset_config: the dataset configuration.
Returns: a dictionary with parsed matrix configurations.
200 def parse_dataset_tables( 201 self, 202 data_dir: str, 203 dataset_config: Dict[str, Any],) -> Dict[str, DatasetTableConfig]: 204 """Parse dataset tables from the configuration. 205 206 Args: 207 data_dir: the directory where the dataset is stored. 208 dataset_config: the dataset configuration. 209 210 Returns: 211 a dictionary with parsed table configurations. 212 """ 213 tables = {} 214 if dataset_config.get(KEY_TABLES) is not None: 215 if assert_is_type( 216 dataset_config[KEY_TABLES], 217 dict, 218 self.event_dispatcher, 219 'PARSE WARNING: dataset tables invalid value' 220 ): 221 for table_name, table_config in dataset_config[KEY_TABLES].items(): 222 config = self.parse_dataset_table_config(data_dir, table_config) 223 if config is None: 224 continue 225 226 tables[table_name] = config 227 228 return tables
Parse dataset tables from the configuration.
Args: data_dir: the directory where the dataset is stored. dataset_config: the dataset configuration.
Returns: a dictionary with parsed table configurations.
230 def parse_file_options_config( 231 self, 232 file_config: Dict[str, Any]) -> Optional[FileOptionsConfig]: 233 """Parse a dataset file configuration. 234 235 Args: 236 file_config: the dataset file configuration. 237 238 Returns: 239 the parsed configuration or None on failure. 240 """ 241 # attempt to parse the optional separator string 242 success, file_sep = parse_optional_string( 243 file_config, 244 TABLE_SEP, 245 VALID_SEPARATORS, 246 self.event_dispatcher 247 ) 248 if not success: 249 return None 250 251 # attempt to parse the optional compression string 252 success, file_compression = parse_optional_string( 253 file_config, 254 TABLE_COMPRESSION, 255 VALID_COMPRESSIONS, 256 self.event_dispatcher 257 ) 258 if not success: 259 return None 260 261 # attempt to parse the optional encoding string 262 success, file_encoding = parse_optional_string( 263 file_config, 264 TABLE_ENCODING, 265 VALID_ENCODINGS, 266 self.event_dispatcher 267 ) 268 if not success: 269 return None 270 271 # attempt to parse the optional header boolean 272 success, file_header = parse_optional_bool( 273 file_config, 274 TABLE_HEADER, 275 self.event_dispatcher 276 ) 277 if not success: 278 return None 279 280 return FileOptionsConfig( 281 file_sep, 282 file_compression, 283 file_encoding, 284 file_header 285 )
Parse a dataset file configuration.
Args: file_config: the dataset file configuration.
Returns: the parsed configuration or None on failure.
287 def parse_dataset_file_config( 288 self, 289 data_dir: str, 290 file_config: Dict[str, Any]) -> Optional[DatasetFileConfig]: 291 """Parse a dataset file configuration. 292 293 Args: 294 data_dir: the directory where the file is stored. 295 file_config: the dataset file configuration. 296 297 Returns: 298 the parsed configuration or None on failure. 299 """ 300 # attempt to parse the (required) file name 301 success, file_name = parse_file_name( 302 data_dir, 303 file_config, 304 KEY_NAME, 305 self.event_dispatcher 306 ) 307 if not success: 308 return None 309 310 # attempt to parse the file options 311 file_options = self.parse_file_options_config(file_config) 312 if file_options is None: 313 return None 314 315 return DatasetFileConfig(file_name, file_options)
Parse a dataset file configuration.
Args: data_dir: the directory where the file is stored. file_config: the dataset file configuration.
Returns: the parsed configuration or None on failure.
317 def parse_dataset_index_config( 318 self, 319 data_dir: str, 320 index_config: Dict[str, Any]) -> Optional[DatasetIndexConfig]: 321 """Parse a dataset matrix' user/item index configuration. 322 323 Args: 324 data_dir: the directory where the file is stored. 325 index_config: the dataset matrix index configuration. 326 327 Returns: 328 the parsed configuration or None on failure. 329 """ 330 # attempt to parse (optional) file name 331 success, file_name = parse_file_name( 332 data_dir, 333 index_config, 334 TABLE_FILE, 335 self.event_dispatcher, 336 required=False 337 ) 338 if not success: 339 return None 340 341 # attempt to parse the key that is associated with the index 342 file_key = parse_string( 343 index_config, 344 TABLE_KEY, 345 self.event_dispatcher 346 ) 347 if file_key is None: 348 return None 349 350 # attempt to parse the number of records in the file 351 num_records = parse_int( 352 index_config, 353 TABLE_NUM_RECORDS, 354 self.event_dispatcher 355 ) 356 if num_records is None: 357 return None 358 359 return DatasetIndexConfig(file_name, file_key, num_records)
Parse a dataset matrix' user/item index configuration.
Args: data_dir: the directory where the file is stored. index_config: the dataset matrix index configuration.
Returns: the parsed configuration or None on failure.
361 def parse_dataset_matrix_config( 362 self, 363 data_dir: str, 364 matrix_config: Dict[str, Any]) -> Optional[DatasetMatrixConfig]: 365 """Parse a dataset matrix configuration. 366 367 Args: 368 data_dir: the directory where the dataset matrix is stored. 369 matrix_config: the dataset matrix configuration. 370 371 Returns: 372 the parsed configuration or None on failure. 373 """ 374 # attempt to parse the matrix table 375 matrix_table = self.parse_dataset_table_config(data_dir, 376 matrix_config.get(KEY_MATRIX, {})) 377 if matrix_table is None: 378 return None 379 380 # attempt to parse the matrix users 381 matrix_users = self.parse_dataset_index_config(data_dir, 382 matrix_config.get(KEY_IDX_USER, {})) 383 if matrix_users is None: 384 return None 385 386 # attempt to parse the matrix items 387 matrix_items = self.parse_dataset_index_config(data_dir, 388 matrix_config.get(KEY_IDX_ITEM, {})) 389 if matrix_items is None: 390 return None 391 392 # attempt to parse the matrix ratings 393 matrix_ratings = parse_rating_matrix( 394 matrix_config, 395 self.event_dispatcher 396 ) 397 if matrix_ratings is None: 398 return None 399 400 return DatasetMatrixConfig( 401 matrix_table, 402 matrix_ratings, 403 matrix_users, 404 matrix_items 405 )
Parse a dataset matrix configuration.
Args: data_dir: the directory where the dataset matrix is stored. matrix_config: the dataset matrix configuration.
Returns: the parsed configuration or None on failure.
407 def parse_dataset_table_config( 408 self, 409 data_dir: str, 410 table_config: Dict[str, Any]) -> Optional[DatasetTableConfig]: 411 """Parse a dataset table configuration. 412 413 Args: 414 data_dir: the directory where the table is stored. 415 table_config: the dataset table configuration. 416 417 Returns: 418 the parsed configuration or None on failure. 419 """ 420 file_config = self.parse_dataset_file_config(data_dir, table_config.get(TABLE_FILE, {})) 421 if file_config is None: 422 return None 423 424 table_primary_key = parse_string_list( 425 table_config, 426 TABLE_PRIMARY_KEY, 427 1, 428 self.event_dispatcher 429 ) 430 if table_primary_key is None: 431 return None 432 433 table_foreign_keys = None 434 if TABLE_FOREIGN_KEYS in table_config: 435 table_foreign_keys = parse_string_list( 436 table_config, 437 TABLE_FOREIGN_KEYS, 438 0, 439 self.event_dispatcher 440 ) 441 442 table_columns = parse_string_list( 443 table_config, 444 TABLE_COLUMNS, 445 1, 446 self.event_dispatcher 447 ) 448 if table_columns is None: 449 return None 450 451 table_num_records = parse_int( 452 table_config, 453 TABLE_NUM_RECORDS, 454 self.event_dispatcher 455 ) 456 if table_num_records is None: 457 return None 458 459 return DatasetTableConfig( 460 table_primary_key, 461 table_foreign_keys, 462 table_columns, 463 table_num_records, 464 file_config 465 )
Parse a dataset table configuration.
Args: data_dir: the directory where the table is stored. table_config: the dataset table configuration.
Returns: the parsed configuration or None on failure.
468def parse_file_name( 469 data_dir: str, 470 file_config: Dict[str, Any], 471 file_key: str, 472 event_dispatcher: EventDispatcher, 473 *, 474 required: bool=True) -> Tuple[bool, Optional[str]]: 475 """Parse the file name from the configuration. 476 477 In addition, when the file name is parsed correctly it is checked 478 for existence in the specified data directory. 479 480 Args: 481 data_dir: the directory where the file is stored. 482 file_config: the configuration dictionary to parse from. 483 file_key: the key in the configuration that contains the file name. 484 event_dispatcher: to dispatch the parse event on failure. 485 required: whether the parsing is required to succeed. 486 487 Returns: 488 whether the parsing succeeded and the parsed file name or None on failure. 489 """ 490 if required and not assert_is_key_in_dict( 491 file_key, 492 file_config, 493 event_dispatcher, 494 'PARSE ERROR: file configuration missing key \'' + file_key + '\'' 495 ): return False, None 496 497 file_name = file_config.get(file_key) 498 if required and file_name is None: 499 event_dispatcher.dispatch(ParseEventArgs( 500 ON_PARSE, 501 'PARSE ERROR: file configuration missing value for \'' + file_key + '\'' 502 )) 503 return False, None 504 505 if file_name is not None: 506 if not assert_is_type( 507 file_name, 508 str, 509 event_dispatcher, 510 'PARSE ERROR: file configuration contains invalid name' 511 ): return False, None 512 513 file_path = os.path.join(data_dir, file_name) 514 if not os.path.isfile(file_path): 515 event_dispatcher.dispatch(ParseEventArgs( 516 ON_PARSE, 517 'PARSE ERROR: file configuration file name does not exist: ' + file_path 518 )) 519 return False, None 520 521 return True, file_name
Parse the file name from the configuration.
In addition, when the file name is parsed correctly it is checked for existence in the specified data directory.
Args: data_dir: the directory where the file is stored. file_config: the configuration dictionary to parse from. file_key: the key in the configuration that contains the file name. event_dispatcher: to dispatch the parse event on failure. required: whether the parsing is required to succeed.
Returns: whether the parsing succeeded and the parsed file name or None on failure.
524def parse_float( 525 config: Dict[str, Any], 526 float_key: str, 527 event_dispatcher: EventDispatcher) -> Optional[float]: 528 """Parse a float-point value from the configuration. 529 530 Args: 531 config: the configuration dictionary to parse from. 532 float_key: the key in the configuration that contains the float-point value. 533 event_dispatcher: to dispatch the parse event on failure. 534 535 Returns: 536 the parsed float-point value or None on failure. 537 """ 538 if not assert_is_key_in_dict( 539 float_key, 540 config, 541 event_dispatcher, 542 'PARSE ERROR: configuration contains invalid \'' + float_key + '\' value' 543 ): return None 544 545 float_value = config[float_key] 546 547 if not assert_is_type( 548 float_value, 549 float, 550 event_dispatcher, 551 'PARSE ERROR: configuration contains invalid \'' + float_key + '\'' 552 ): return None 553 554 return float_value
Parse a float-point value from the configuration.
Args: config: the configuration dictionary to parse from. float_key: the key in the configuration that contains the float-point value. event_dispatcher: to dispatch the parse event on failure.
Returns: the parsed float-point value or None on failure.
557def parse_int( 558 config: Dict[str, Any], 559 int_key: str, 560 event_dispatcher: EventDispatcher) -> Optional[int]: 561 """Parse an integer value from the configuration. 562 563 The integer is expected to be greater than zero to be parsed successfully. 564 565 Args: 566 config: the configuration dictionary to parse from. 567 int_key: the key in the configuration that contains the integer value. 568 event_dispatcher: to dispatch the parse event on failure. 569 570 Returns: 571 the parsed integer value or None on failure. 572 """ 573 if not assert_is_key_in_dict( 574 int_key, 575 config, 576 event_dispatcher, 577 'PARSE ERROR: configuration contains invalid \'' + int_key + '\' value' 578 ): return None 579 580 int_value = config[int_key] 581 582 if isinstance(int_value, bool): 583 event_dispatcher.dispatch(ParseEventArgs( 584 ON_PARSE, 585 'PARSE ERROR: configuration contains invalid \'' + int_key + '\'', 586 expected_type=int, 587 actual_type=bool 588 )) 589 return None 590 591 if not assert_is_type( 592 int_value, 593 int, 594 event_dispatcher, 595 'PARSE ERROR: configuration contains invalid \'' + int_key + '\'' 596 ): return None 597 598 if int_value <= 0: 599 event_dispatcher.dispatch(ParseEventArgs( 600 ON_PARSE, 601 'PARSE ERROR: configuration contains invalid \'' + int_key + '\' less than or equal to zero' 602 )) 603 return None 604 605 return int_value
Parse an integer value from the configuration.
The integer is expected to be greater than zero to be parsed successfully.
Args: config: the configuration dictionary to parse from. int_key: the key in the configuration that contains the integer value. event_dispatcher: to dispatch the parse event on failure.
Returns: the parsed integer value or None on failure.
608def parse_optional_bool( 609 config: Dict[str, Any], 610 bool_key: str, 611 event_dispatcher: EventDispatcher) -> Tuple[bool, Optional[bool]]: 612 """Parse an optional boolean from the configuration. 613 614 Args: 615 config: the configuration dictionary to parse from. 616 bool_key: the key in the configuration that contains the boolean. 617 event_dispatcher: to dispatch the parse event on failure. 618 619 Returns: 620 whether the parsing succeeded and the optional boolean value. 621 """ 622 bool_value = config.get(bool_key) 623 if bool_value is not None: 624 if not assert_is_type( 625 bool_value, 626 bool, 627 event_dispatcher, 628 'PARSE ERROR: configuration contains invalid ' + bool_key + ' value' 629 ): return False, None 630 else: 631 bool_value = False 632 633 return True, bool_value
Parse an optional boolean from the configuration.
Args: config: the configuration dictionary to parse from. bool_key: the key in the configuration that contains the boolean. event_dispatcher: to dispatch the parse event on failure.
Returns: whether the parsing succeeded and the optional boolean value.
636def parse_optional_string( 637 config: Dict[str, Any], 638 string_key: str, 639 string_options: List[str], 640 event_dispatcher: EventDispatcher) -> Tuple[bool, Optional[str]]: 641 """Parse an optional string from a list of valid values from the configuration. 642 643 Args: 644 config: the configuration dictionary to parse from. 645 string_key: the key in the configuration that contains the string. 646 string_options: the options that are available for the string that is being parsed. 647 event_dispatcher: to dispatch the parse event on failure. 648 649 Returns: 650 whether the parsing succeeded and the optional string value. 651 """ 652 string_value = config.get(string_key) 653 if string_value is not None: 654 if not assert_is_type( 655 string_value, 656 str, 657 event_dispatcher, 658 'PARSE ERROR: configuration contains invalid \'' + string_key + '\' value' 659 ): return False, None 660 661 if not assert_is_one_of_list( 662 string_value, 663 string_options, 664 event_dispatcher, 665 'PARSE ERROR: configuration contains invalid \'' + string_key + '\'' 666 ): return False, None 667 668 return True, string_value
Parse an optional string from a list of valid values from the configuration.
Args: config: the configuration dictionary to parse from. string_key: the key in the configuration that contains the string. string_options: the options that are available for the string that is being parsed. event_dispatcher: to dispatch the parse event on failure.
Returns: whether the parsing succeeded and the optional string value.
671def parse_rating_matrix( 672 matrix_config: Dict[str, Any], 673 event_dispatcher: EventDispatcher) -> Optional[RatingMatrixConfig]: 674 """Parse a rating matrix from the configuration. 675 676 Args: 677 matrix_config: the matrix configuration dictionary to parse from. 678 event_dispatcher: to dispatch the parse event on failure. 679 680 Returns: 681 the parsed string or None on failure. 682 """ 683 rating_min = parse_float( 684 matrix_config, 685 KEY_RATING_MIN, 686 event_dispatcher 687 ) 688 if rating_min is None: 689 return None 690 691 if rating_min <= 0.0: 692 event_dispatcher.dispatch(ParseEventArgs( 693 ON_PARSE, 694 'PARSE ERROR: matrix configuration contains minimum rating greater than zero' 695 )) 696 return None 697 698 rating_max = parse_float( 699 matrix_config, 700 KEY_RATING_MAX, 701 event_dispatcher 702 ) 703 if rating_max is None: 704 return None 705 706 if rating_max < rating_min: 707 event_dispatcher.dispatch(ParseEventArgs( 708 ON_PARSE, 709 'PARSE ERROR: matrix configuration contains maximum rating less than minimum rating' 710 )) 711 return None 712 713 rating_type = parse_string( 714 matrix_config, 715 KEY_RATING_TYPE, 716 event_dispatcher, 717 one_of_list=[DATASET_RATINGS_EXPLICIT, DATASET_RATINGS_IMPLICIT] 718 ) 719 720 if rating_type is None: 721 return None 722 723 return RatingMatrixConfig(rating_min, rating_max, rating_type)
Parse a rating matrix from the configuration.
Args: matrix_config: the matrix configuration dictionary to parse from. event_dispatcher: to dispatch the parse event on failure.
Returns: the parsed string or None on failure.
726def parse_string( 727 config: Dict[str, Any], 728 string_key: str, 729 event_dispatcher: EventDispatcher, 730 *, 731 one_of_list: List[str]=None) -> Optional[str]: 732 """Parse a string from the configuration. 733 734 Args: 735 config: the configuration dictionary to parse from. 736 string_key: the key in the configuration that contains the string. 737 event_dispatcher: to dispatch the parse event on failure. 738 one_of_list: when not None the string is to be expected one of the specified list. 739 740 Returns: 741 the parsed string or None on failure. 742 """ 743 if not assert_is_key_in_dict( 744 string_key, 745 config, 746 event_dispatcher, 747 'PARSE ERROR: configuration contains invalid \'' + string_key + '\' value' 748 ): return None 749 750 string_value = config[string_key] 751 752 if not assert_is_type( 753 string_value, 754 str, 755 event_dispatcher, 756 'PARSE ERROR: configuration contains invalid \'' + string_key + '\'' 757 ): return None 758 759 if one_of_list is not None: 760 if not assert_is_one_of_list( 761 string_value, 762 one_of_list, 763 event_dispatcher, 764 'PARSE ERROR: configuration contains invalid \'' + string_key + '\'' 765 ): return None 766 767 return string_value
Parse a string from the configuration.
Args: config: the configuration dictionary to parse from. string_key: the key in the configuration that contains the string. event_dispatcher: to dispatch the parse event on failure. one_of_list: when not None the string is to be expected one of the specified list.
Returns: the parsed string or None on failure.
770def parse_string_list( 771 config: Dict[str, Any], 772 string_list_key: str, 773 min_list_length: int, 774 event_dispatcher: EventDispatcher) -> Optional[List[str]]: 775 """Parse a list of strings from the configuration. 776 777 Args: 778 config: the configuration dictionary to parse from. 779 string_list_key: the key in the configuration that contains the string list. 780 min_list_length: the minimum length of the list to succeed. 781 event_dispatcher: to dispatch the parse event on failure. 782 783 Returns: 784 the parsed string list or None on failure. 785 """ 786 if not assert_is_key_in_dict( 787 string_list_key, 788 config, 789 event_dispatcher, 790 'PARSE ERROR: configuration contains invalid \'' + string_list_key + '\' value' 791 ): return None 792 793 string_list = config[string_list_key] 794 795 if not assert_is_type( 796 string_list, 797 list, 798 event_dispatcher, 799 'PARSE ERROR: configuration contains invalid \'' + string_list_key + '\'' 800 ): return None 801 802 result_strings = [] 803 for string in string_list: 804 if not assert_is_type( 805 string, 806 str, 807 event_dispatcher, 808 'PARSE ERROR: configuration list \'' + string_list_key + '\' contains invalid value' 809 ): return None 810 811 result_strings.append(string) 812 813 if len(result_strings) < min_list_length: 814 event_dispatcher.dispatch(ParseEventArgs( 815 ON_PARSE, 816 'PARSE ERROR: configuration list \'' + string_list_key + '\' contains too few values' 817 )) 818 return None 819 820 return result_strings
Parse a list of strings from the configuration.
Args: config: the configuration dictionary to parse from. string_list_key: the key in the configuration that contains the string list. min_list_length: the minimum length of the list to succeed. event_dispatcher: to dispatch the parse event on failure.
Returns: the parsed string list or None on failure.