src.fairreckitlib.experiment.experiment_pipeline

This module contains functionality of the complete experiment pipeline.

Classes:

ExperimentPipeline: class that connects the data, model and evaluation pipelines.

Functions:

add_result_to_overview: add a computed result to the experiment result overview.

This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)

  1"""This module contains functionality of the complete experiment pipeline.
  2
  3Classes:
  4
  5    ExperimentPipeline: class that connects the data, model and evaluation pipelines.
  6
  7Functions:
  8
  9    add_result_to_overview: add a computed result to the experiment result overview.
 10
 11This program has been developed by students from the bachelor Computer Science at
 12Utrecht University within the Software Project course.
 13© Copyright Utrecht University (Department of Information and Computing Sciences)
 14"""
 15
 16import os
 17import time
 18from typing import Dict, Callable, List, Tuple, Union
 19
 20from ..core.config.config_factories import GroupFactory
 21from ..core.events.event_dispatcher import EventDispatcher
 22from ..core.events.event_error import ON_FAILURE_ERROR, ErrorEventArgs
 23from ..core.io.io_create import create_dir, create_json
 24from ..data.filter.filter_factory import KEY_DATA_SUBSET
 25from ..data.data_factory import KEY_DATA
 26from ..data.pipeline.data_run import DataPipelineConfig, run_data_pipelines
 27from ..data.set.dataset_registry import DataRegistry
 28from ..evaluation.pipeline.evaluation_run import run_evaluation_pipelines, EvaluationPipelineConfig
 29from ..evaluation.evaluation_factory import KEY_EVALUATION
 30from ..model.pipeline.model_run import ModelPipelineConfig, run_model_pipelines
 31from ..model.model_factory import KEY_MODELS
 32from .experiment_config import ExperimentConfig
 33from .experiment_config import PredictorExperimentConfig, RecommenderExperimentConfig
 34from .experiment_event import ON_BEGIN_EXPERIMENT_PIPELINE, ExperimentEventArgs
 35from .experiment_event import ON_END_EXPERIMENT_PIPELINE
 36
 37
 38class ExperimentPipeline:
 39    """ExperimentPipeline that consists of the data, model and evaluation pipelines.
 40
 41    The experiment pipeline connects the three pipelines, by first running the data
 42    pipeline for all the specified dataset configurations. Each of the
 43    resulting data transitions is forwarded through the model pipelines where all the
 44    specified model configurations will compute rating results. These in turn are
 45    forwarded to the evaluation pipelines to compute the specified metric configurations
 46    of the performance of the models.
 47
 48    Public methods:
 49
 50    run
 51    """
 52
 53    def __init__(
 54            self,
 55            data_registry: DataRegistry,
 56            experiment_factory: GroupFactory,
 57            event_dispatcher: EventDispatcher):
 58        """Construct the ExperimentPipeline.
 59
 60        Args:
 61            data_registry: the registry with available datasets.
 62            experiment_factory: the factory containing all three pipeline factories.
 63            event_dispatcher: to dispatch the experiment events.
 64        """
 65        self.data_registry = data_registry
 66        self.experiment_factory = experiment_factory
 67        self.event_dispatcher = event_dispatcher
 68
 69    def run(self,
 70            output_dir: str,
 71            experiment_config: Union[PredictorExperimentConfig, RecommenderExperimentConfig],
 72            num_threads: int,
 73            is_running: Callable[[], bool]) -> None:
 74        """Run the experiment with the specified configuration.
 75
 76        Args:
 77            output_dir: the path of the directory to store the output.
 78            experiment_config: the configuration of the experiment.
 79            num_threads: the max number of threads the experiment can use.
 80            is_running: function that returns whether the experiment
 81                is still running. Stops early when False is returned.
 82
 83        Raises:
 84            IOError: when the specified output directory already exists.
 85            RuntimeError: when no data transitions are generated by the data or model pipelines.
 86        """
 87        if os.path.isdir(output_dir):
 88            raise IOError('Experiment pipeline output directory already exists')
 89
 90        # prepare experiment pipeline
 91        results, start_time = self.start_run(output_dir, experiment_config)
 92
 93        data_factory = self.experiment_factory.get_factory(KEY_DATA)
 94        # run all data pipelines
 95        data_result = run_data_pipelines(
 96            DataPipelineConfig(
 97                output_dir,
 98                self.data_registry,
 99                data_factory,
100                experiment_config.datasets
101            ),
102            self.event_dispatcher,
103            is_running
104        )
105
106        if len(data_result) == 0:
107            self.event_dispatcher.dispatch(ErrorEventArgs(
108                ON_FAILURE_ERROR,
109                'Failure: to generate experiment data transitions'
110            ))
111            raise RuntimeError('Experiment failed to generate data transitions')
112
113        kwargs = {'num_threads': num_threads}
114        if isinstance(experiment_config, RecommenderExperimentConfig):
115            kwargs['num_items'] = experiment_config.top_k
116            kwargs['rated_items_filter'] = experiment_config.rated_items_filter
117
118        computed_models = 0
119
120        # loop through each data transition result from the data pipeline
121        for data_transition in data_result:
122            if not is_running():
123                return
124
125            # run all model pipelines on the data transition
126            model_factory = self.experiment_factory.get_factory(KEY_MODELS)
127            model_dirs = run_model_pipelines(
128                ModelPipelineConfig(
129                    data_transition.output_dir,
130                    data_transition,
131                    model_factory.get_factory(experiment_config.get_type()),
132                    experiment_config.models
133                ),
134                self.event_dispatcher,
135                is_running,
136                **kwargs
137            )
138            if not is_running():
139                return
140
141            if len(model_dirs) == 0:
142                self.event_dispatcher.dispatch(ErrorEventArgs(
143                    ON_FAILURE_ERROR,
144                    'Failure: to compute experiment model ratings for data transition'
145                ))
146                continue
147
148            computed_models += len(model_dirs)
149
150            # run all evaluation pipelines on the computed model results
151            if len(experiment_config.evaluation) > 0:
152                evaluation_factory = self.experiment_factory.get_factory(KEY_EVALUATION)
153                run_evaluation_pipelines(
154                    EvaluationPipelineConfig(
155                        model_dirs,
156                        data_transition,
157                        data_factory.get_factory(KEY_DATA_SUBSET),
158                        evaluation_factory.get_factory(experiment_config.get_type()),
159                        experiment_config.evaluation
160                    ),
161                    self.event_dispatcher,
162                    is_running
163                )
164
165            # add overview of the data transition on the computed models/metrics
166            results = add_result_to_overview(results, model_dirs)
167
168        if computed_models == 0:
169            self.event_dispatcher.dispatch(ErrorEventArgs(
170                ON_FAILURE_ERROR,
171                'Failure: to compute any experiment model ratings'
172            ))
173            raise RuntimeError('Experiment failed to compute any models')
174
175        # finalize experiment pipeline
176        self.end_run(start_time, output_dir, experiment_config, results)
177
178    def start_run(
179            self,
180            output_dir: str,
181            experiment_config: ExperimentConfig) -> Tuple[List[Dict[str, str]], float]:
182        """Start the run, making the output dir and initializing the results' storage list.
183
184        Args:
185            output_dir: directory in which to store the run storage output.
186            experiment_config: the configuration of the experiment.
187
188        Returns:
189            the initial results list and the time the experiment started.
190        """
191        start_time = time.time()
192        self.event_dispatcher.dispatch(ExperimentEventArgs(
193            ON_BEGIN_EXPERIMENT_PIPELINE,
194            experiment_config.name
195        ))
196
197        create_dir(output_dir, self.event_dispatcher)
198
199        return [], start_time
200
201    def end_run(
202            self,
203            start_time: float,
204            output_dir: str,
205            experiment_config: ExperimentConfig,
206            results: List[Dict[str, str]]) -> None:
207        """End the run, writing the storage file and storing the results.
208
209        Args:
210            start_time: time the experiment started.
211            output_dir: directory in which to store the run storage output.
212            experiment_config: the configuration of the experiment.
213            results: the current results list.
214        """
215        self.write_storage_file(output_dir, results)
216
217        self.event_dispatcher.dispatch(ExperimentEventArgs(
218            ON_END_EXPERIMENT_PIPELINE,
219            experiment_config.name
220        ), elapsed_time=time.time() - start_time)
221
222    def write_storage_file(
223            self,
224            output_dir: str,
225            results: List[Dict[str, str]]) -> None:
226        """Write a JSON file with overview of the results file paths.
227
228        Args:
229            output_dir: path to the directory to store the result overview.
230            results: the result overview containing completed computations.
231        """
232        formatted_results = map(lambda result: {
233            'name': result['dataset'] + ' - ' + result['model'],
234            'dataset': result['dataset'],
235            'recommender_system': result['model'],
236            'evaluation_path': os.path.join(result['dir'], 'evaluations.json'),
237            'ratings_path': os.path.join(result['dir'], 'ratings.tsv'),
238            'ratings_settings_path': os.path.join(result['dir'], 'settings.json')
239        }, results)
240
241        create_json(
242            os.path.join(output_dir, 'overview.json'),
243            {'overview': list(formatted_results)},
244            self.event_dispatcher,
245            indent=4
246        )
247
248
249def add_result_to_overview(
250        results: List[Dict[str, str]],
251        model_dirs: List[str]) -> List[Dict[str, str]]:
252    """Add result to overview of results file paths.
253
254    Args:
255        results: the accumulated result overview.
256        model_dirs: the completed computations to add to the overview.
257
258    Returns:
259        the result overview appended with the completed computations.
260    """
261    for model_dir in model_dirs:
262        # Our evaluations are in the same directory as the model ratings
263        result = {
264            'dataset': os.path.basename(os.path.dirname(model_dir)),
265            'model': os.path.basename(model_dir),
266            'dir': model_dir
267        }
268        results.append(result)
269
270    return results
class ExperimentPipeline:
 39class ExperimentPipeline:
 40    """ExperimentPipeline that consists of the data, model and evaluation pipelines.
 41
 42    The experiment pipeline connects the three pipelines, by first running the data
 43    pipeline for all the specified dataset configurations. Each of the
 44    resulting data transitions is forwarded through the model pipelines where all the
 45    specified model configurations will compute rating results. These in turn are
 46    forwarded to the evaluation pipelines to compute the specified metric configurations
 47    of the performance of the models.
 48
 49    Public methods:
 50
 51    run
 52    """
 53
 54    def __init__(
 55            self,
 56            data_registry: DataRegistry,
 57            experiment_factory: GroupFactory,
 58            event_dispatcher: EventDispatcher):
 59        """Construct the ExperimentPipeline.
 60
 61        Args:
 62            data_registry: the registry with available datasets.
 63            experiment_factory: the factory containing all three pipeline factories.
 64            event_dispatcher: to dispatch the experiment events.
 65        """
 66        self.data_registry = data_registry
 67        self.experiment_factory = experiment_factory
 68        self.event_dispatcher = event_dispatcher
 69
 70    def run(self,
 71            output_dir: str,
 72            experiment_config: Union[PredictorExperimentConfig, RecommenderExperimentConfig],
 73            num_threads: int,
 74            is_running: Callable[[], bool]) -> None:
 75        """Run the experiment with the specified configuration.
 76
 77        Args:
 78            output_dir: the path of the directory to store the output.
 79            experiment_config: the configuration of the experiment.
 80            num_threads: the max number of threads the experiment can use.
 81            is_running: function that returns whether the experiment
 82                is still running. Stops early when False is returned.
 83
 84        Raises:
 85            IOError: when the specified output directory already exists.
 86            RuntimeError: when no data transitions are generated by the data or model pipelines.
 87        """
 88        if os.path.isdir(output_dir):
 89            raise IOError('Experiment pipeline output directory already exists')
 90
 91        # prepare experiment pipeline
 92        results, start_time = self.start_run(output_dir, experiment_config)
 93
 94        data_factory = self.experiment_factory.get_factory(KEY_DATA)
 95        # run all data pipelines
 96        data_result = run_data_pipelines(
 97            DataPipelineConfig(
 98                output_dir,
 99                self.data_registry,
100                data_factory,
101                experiment_config.datasets
102            ),
103            self.event_dispatcher,
104            is_running
105        )
106
107        if len(data_result) == 0:
108            self.event_dispatcher.dispatch(ErrorEventArgs(
109                ON_FAILURE_ERROR,
110                'Failure: to generate experiment data transitions'
111            ))
112            raise RuntimeError('Experiment failed to generate data transitions')
113
114        kwargs = {'num_threads': num_threads}
115        if isinstance(experiment_config, RecommenderExperimentConfig):
116            kwargs['num_items'] = experiment_config.top_k
117            kwargs['rated_items_filter'] = experiment_config.rated_items_filter
118
119        computed_models = 0
120
121        # loop through each data transition result from the data pipeline
122        for data_transition in data_result:
123            if not is_running():
124                return
125
126            # run all model pipelines on the data transition
127            model_factory = self.experiment_factory.get_factory(KEY_MODELS)
128            model_dirs = run_model_pipelines(
129                ModelPipelineConfig(
130                    data_transition.output_dir,
131                    data_transition,
132                    model_factory.get_factory(experiment_config.get_type()),
133                    experiment_config.models
134                ),
135                self.event_dispatcher,
136                is_running,
137                **kwargs
138            )
139            if not is_running():
140                return
141
142            if len(model_dirs) == 0:
143                self.event_dispatcher.dispatch(ErrorEventArgs(
144                    ON_FAILURE_ERROR,
145                    'Failure: to compute experiment model ratings for data transition'
146                ))
147                continue
148
149            computed_models += len(model_dirs)
150
151            # run all evaluation pipelines on the computed model results
152            if len(experiment_config.evaluation) > 0:
153                evaluation_factory = self.experiment_factory.get_factory(KEY_EVALUATION)
154                run_evaluation_pipelines(
155                    EvaluationPipelineConfig(
156                        model_dirs,
157                        data_transition,
158                        data_factory.get_factory(KEY_DATA_SUBSET),
159                        evaluation_factory.get_factory(experiment_config.get_type()),
160                        experiment_config.evaluation
161                    ),
162                    self.event_dispatcher,
163                    is_running
164                )
165
166            # add overview of the data transition on the computed models/metrics
167            results = add_result_to_overview(results, model_dirs)
168
169        if computed_models == 0:
170            self.event_dispatcher.dispatch(ErrorEventArgs(
171                ON_FAILURE_ERROR,
172                'Failure: to compute any experiment model ratings'
173            ))
174            raise RuntimeError('Experiment failed to compute any models')
175
176        # finalize experiment pipeline
177        self.end_run(start_time, output_dir, experiment_config, results)
178
179    def start_run(
180            self,
181            output_dir: str,
182            experiment_config: ExperimentConfig) -> Tuple[List[Dict[str, str]], float]:
183        """Start the run, making the output dir and initializing the results' storage list.
184
185        Args:
186            output_dir: directory in which to store the run storage output.
187            experiment_config: the configuration of the experiment.
188
189        Returns:
190            the initial results list and the time the experiment started.
191        """
192        start_time = time.time()
193        self.event_dispatcher.dispatch(ExperimentEventArgs(
194            ON_BEGIN_EXPERIMENT_PIPELINE,
195            experiment_config.name
196        ))
197
198        create_dir(output_dir, self.event_dispatcher)
199
200        return [], start_time
201
202    def end_run(
203            self,
204            start_time: float,
205            output_dir: str,
206            experiment_config: ExperimentConfig,
207            results: List[Dict[str, str]]) -> None:
208        """End the run, writing the storage file and storing the results.
209
210        Args:
211            start_time: time the experiment started.
212            output_dir: directory in which to store the run storage output.
213            experiment_config: the configuration of the experiment.
214            results: the current results list.
215        """
216        self.write_storage_file(output_dir, results)
217
218        self.event_dispatcher.dispatch(ExperimentEventArgs(
219            ON_END_EXPERIMENT_PIPELINE,
220            experiment_config.name
221        ), elapsed_time=time.time() - start_time)
222
223    def write_storage_file(
224            self,
225            output_dir: str,
226            results: List[Dict[str, str]]) -> None:
227        """Write a JSON file with overview of the results file paths.
228
229        Args:
230            output_dir: path to the directory to store the result overview.
231            results: the result overview containing completed computations.
232        """
233        formatted_results = map(lambda result: {
234            'name': result['dataset'] + ' - ' + result['model'],
235            'dataset': result['dataset'],
236            'recommender_system': result['model'],
237            'evaluation_path': os.path.join(result['dir'], 'evaluations.json'),
238            'ratings_path': os.path.join(result['dir'], 'ratings.tsv'),
239            'ratings_settings_path': os.path.join(result['dir'], 'settings.json')
240        }, results)
241
242        create_json(
243            os.path.join(output_dir, 'overview.json'),
244            {'overview': list(formatted_results)},
245            self.event_dispatcher,
246            indent=4
247        )

ExperimentPipeline that consists of the data, model and evaluation pipelines.

The experiment pipeline connects the three pipelines, by first running the data pipeline for all the specified dataset configurations. Each of the resulting data transitions is forwarded through the model pipelines where all the specified model configurations will compute rating results. These in turn are forwarded to the evaluation pipelines to compute the specified metric configurations of the performance of the models.

Public methods:

run

ExperimentPipeline( data_registry: src.fairreckitlib.data.set.dataset_registry.DataRegistry, experiment_factory: src.fairreckitlib.core.config.config_factories.GroupFactory, event_dispatcher: src.fairreckitlib.core.events.event_dispatcher.EventDispatcher)
54    def __init__(
55            self,
56            data_registry: DataRegistry,
57            experiment_factory: GroupFactory,
58            event_dispatcher: EventDispatcher):
59        """Construct the ExperimentPipeline.
60
61        Args:
62            data_registry: the registry with available datasets.
63            experiment_factory: the factory containing all three pipeline factories.
64            event_dispatcher: to dispatch the experiment events.
65        """
66        self.data_registry = data_registry
67        self.experiment_factory = experiment_factory
68        self.event_dispatcher = event_dispatcher

Construct the ExperimentPipeline.

Args: data_registry: the registry with available datasets. experiment_factory: the factory containing all three pipeline factories. event_dispatcher: to dispatch the experiment events.

def run( self, output_dir: str, experiment_config: Union[src.fairreckitlib.experiment.experiment_config.PredictorExperimentConfig, src.fairreckitlib.experiment.experiment_config.RecommenderExperimentConfig], num_threads: int, is_running: Callable[[], bool]) -> None:
 70    def run(self,
 71            output_dir: str,
 72            experiment_config: Union[PredictorExperimentConfig, RecommenderExperimentConfig],
 73            num_threads: int,
 74            is_running: Callable[[], bool]) -> None:
 75        """Run the experiment with the specified configuration.
 76
 77        Args:
 78            output_dir: the path of the directory to store the output.
 79            experiment_config: the configuration of the experiment.
 80            num_threads: the max number of threads the experiment can use.
 81            is_running: function that returns whether the experiment
 82                is still running. Stops early when False is returned.
 83
 84        Raises:
 85            IOError: when the specified output directory already exists.
 86            RuntimeError: when no data transitions are generated by the data or model pipelines.
 87        """
 88        if os.path.isdir(output_dir):
 89            raise IOError('Experiment pipeline output directory already exists')
 90
 91        # prepare experiment pipeline
 92        results, start_time = self.start_run(output_dir, experiment_config)
 93
 94        data_factory = self.experiment_factory.get_factory(KEY_DATA)
 95        # run all data pipelines
 96        data_result = run_data_pipelines(
 97            DataPipelineConfig(
 98                output_dir,
 99                self.data_registry,
100                data_factory,
101                experiment_config.datasets
102            ),
103            self.event_dispatcher,
104            is_running
105        )
106
107        if len(data_result) == 0:
108            self.event_dispatcher.dispatch(ErrorEventArgs(
109                ON_FAILURE_ERROR,
110                'Failure: to generate experiment data transitions'
111            ))
112            raise RuntimeError('Experiment failed to generate data transitions')
113
114        kwargs = {'num_threads': num_threads}
115        if isinstance(experiment_config, RecommenderExperimentConfig):
116            kwargs['num_items'] = experiment_config.top_k
117            kwargs['rated_items_filter'] = experiment_config.rated_items_filter
118
119        computed_models = 0
120
121        # loop through each data transition result from the data pipeline
122        for data_transition in data_result:
123            if not is_running():
124                return
125
126            # run all model pipelines on the data transition
127            model_factory = self.experiment_factory.get_factory(KEY_MODELS)
128            model_dirs = run_model_pipelines(
129                ModelPipelineConfig(
130                    data_transition.output_dir,
131                    data_transition,
132                    model_factory.get_factory(experiment_config.get_type()),
133                    experiment_config.models
134                ),
135                self.event_dispatcher,
136                is_running,
137                **kwargs
138            )
139            if not is_running():
140                return
141
142            if len(model_dirs) == 0:
143                self.event_dispatcher.dispatch(ErrorEventArgs(
144                    ON_FAILURE_ERROR,
145                    'Failure: to compute experiment model ratings for data transition'
146                ))
147                continue
148
149            computed_models += len(model_dirs)
150
151            # run all evaluation pipelines on the computed model results
152            if len(experiment_config.evaluation) > 0:
153                evaluation_factory = self.experiment_factory.get_factory(KEY_EVALUATION)
154                run_evaluation_pipelines(
155                    EvaluationPipelineConfig(
156                        model_dirs,
157                        data_transition,
158                        data_factory.get_factory(KEY_DATA_SUBSET),
159                        evaluation_factory.get_factory(experiment_config.get_type()),
160                        experiment_config.evaluation
161                    ),
162                    self.event_dispatcher,
163                    is_running
164                )
165
166            # add overview of the data transition on the computed models/metrics
167            results = add_result_to_overview(results, model_dirs)
168
169        if computed_models == 0:
170            self.event_dispatcher.dispatch(ErrorEventArgs(
171                ON_FAILURE_ERROR,
172                'Failure: to compute any experiment model ratings'
173            ))
174            raise RuntimeError('Experiment failed to compute any models')
175
176        # finalize experiment pipeline
177        self.end_run(start_time, output_dir, experiment_config, results)

Run the experiment with the specified configuration.

Args: output_dir: the path of the directory to store the output. experiment_config: the configuration of the experiment. num_threads: the max number of threads the experiment can use. is_running: function that returns whether the experiment is still running. Stops early when False is returned.

Raises: IOError: when the specified output directory already exists. RuntimeError: when no data transitions are generated by the data or model pipelines.

def start_run( self, output_dir: str, experiment_config: src.fairreckitlib.experiment.experiment_config.ExperimentConfig) -> Tuple[List[Dict[str, str]], float]:
179    def start_run(
180            self,
181            output_dir: str,
182            experiment_config: ExperimentConfig) -> Tuple[List[Dict[str, str]], float]:
183        """Start the run, making the output dir and initializing the results' storage list.
184
185        Args:
186            output_dir: directory in which to store the run storage output.
187            experiment_config: the configuration of the experiment.
188
189        Returns:
190            the initial results list and the time the experiment started.
191        """
192        start_time = time.time()
193        self.event_dispatcher.dispatch(ExperimentEventArgs(
194            ON_BEGIN_EXPERIMENT_PIPELINE,
195            experiment_config.name
196        ))
197
198        create_dir(output_dir, self.event_dispatcher)
199
200        return [], start_time

Start the run, making the output dir and initializing the results' storage list.

Args: output_dir: directory in which to store the run storage output. experiment_config: the configuration of the experiment.

Returns: the initial results list and the time the experiment started.

def end_run( self, start_time: float, output_dir: str, experiment_config: src.fairreckitlib.experiment.experiment_config.ExperimentConfig, results: List[Dict[str, str]]) -> None:
202    def end_run(
203            self,
204            start_time: float,
205            output_dir: str,
206            experiment_config: ExperimentConfig,
207            results: List[Dict[str, str]]) -> None:
208        """End the run, writing the storage file and storing the results.
209
210        Args:
211            start_time: time the experiment started.
212            output_dir: directory in which to store the run storage output.
213            experiment_config: the configuration of the experiment.
214            results: the current results list.
215        """
216        self.write_storage_file(output_dir, results)
217
218        self.event_dispatcher.dispatch(ExperimentEventArgs(
219            ON_END_EXPERIMENT_PIPELINE,
220            experiment_config.name
221        ), elapsed_time=time.time() - start_time)

End the run, writing the storage file and storing the results.

Args: start_time: time the experiment started. output_dir: directory in which to store the run storage output. experiment_config: the configuration of the experiment. results: the current results list.

def write_storage_file(self, output_dir: str, results: List[Dict[str, str]]) -> None:
223    def write_storage_file(
224            self,
225            output_dir: str,
226            results: List[Dict[str, str]]) -> None:
227        """Write a JSON file with overview of the results file paths.
228
229        Args:
230            output_dir: path to the directory to store the result overview.
231            results: the result overview containing completed computations.
232        """
233        formatted_results = map(lambda result: {
234            'name': result['dataset'] + ' - ' + result['model'],
235            'dataset': result['dataset'],
236            'recommender_system': result['model'],
237            'evaluation_path': os.path.join(result['dir'], 'evaluations.json'),
238            'ratings_path': os.path.join(result['dir'], 'ratings.tsv'),
239            'ratings_settings_path': os.path.join(result['dir'], 'settings.json')
240        }, results)
241
242        create_json(
243            os.path.join(output_dir, 'overview.json'),
244            {'overview': list(formatted_results)},
245            self.event_dispatcher,
246            indent=4
247        )

Write a JSON file with overview of the results file paths.

Args: output_dir: path to the directory to store the result overview. results: the result overview containing completed computations.

def add_result_to_overview( results: List[Dict[str, str]], model_dirs: List[str]) -> List[Dict[str, str]]:
250def add_result_to_overview(
251        results: List[Dict[str, str]],
252        model_dirs: List[str]) -> List[Dict[str, str]]:
253    """Add result to overview of results file paths.
254
255    Args:
256        results: the accumulated result overview.
257        model_dirs: the completed computations to add to the overview.
258
259    Returns:
260        the result overview appended with the completed computations.
261    """
262    for model_dir in model_dirs:
263        # Our evaluations are in the same directory as the model ratings
264        result = {
265            'dataset': os.path.basename(os.path.dirname(model_dir)),
266            'model': os.path.basename(model_dir),
267            'dir': model_dir
268        }
269        results.append(result)
270
271    return results

Add result to overview of results file paths.

Args: results: the accumulated result overview. model_dirs: the completed computations to add to the overview.

Returns: the result overview appended with the completed computations.