src.fairreckitlib.model.algorithms.base_predictor

This module contains the base class for predictors.

Classes:

BasePredictor: base class for predictors.
Predictor: implements basic shared functionality.

This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)

  1"""This module contains the base class for predictors.
  2
  3Classes:
  4
  5    BasePredictor: base class for predictors.
  6    Predictor: implements basic shared functionality.
  7
  8This program has been developed by students from the bachelor Computer Science at
  9Utrecht University within the Software Project course.
 10© Copyright Utrecht University (Department of Information and Computing Sciences)
 11"""
 12
 13from abc import ABCMeta, abstractmethod
 14import math
 15from typing import Any, Dict
 16
 17import numpy as np
 18import pandas as pd
 19
 20from .base_algorithm import BaseAlgorithm
 21
 22
 23class BasePredictor(BaseAlgorithm, metaclass=ABCMeta):
 24    """Base class for FairRecKit predictors.
 25
 26    A predictor is used for prediction experiments. It computes predictions
 27    for any user and item that it was trained on.
 28    Derived predictors are expected to implement the abstract interface.
 29
 30    Abstract methods:
 31
 32    on_predict
 33    on_predict_batch (optional)
 34
 35    Public methods:
 36
 37    predict
 38    predict_batch
 39    """
 40
 41    def __init__(self):
 42        """Construct the predictor."""
 43        BaseAlgorithm.__init__(self)
 44
 45    def predict(self, user: int, item: int) -> float:
 46        """Compute a prediction for the specified user and item.
 47
 48        A prediction is impossible when the user and/or item is not
 49        present in the unique users and/or items it was trained on.
 50        Moreover, the prediction could also fail in the derived
 51        implementation of the predictor.
 52
 53        Args:
 54            user: the user ID.
 55            item: the item ID.
 56
 57        Raises:
 58            ArithmeticError: possibly raised by a predictor on testing.
 59            MemoryError: possibly raised by a predictor on testing.
 60            RuntimeError: when the predictor is not trained yet.
 61
 62        Returns:
 63            the predicted rating or NaN when impossible.
 64        """
 65        if self.train_set is None:
 66            raise RuntimeError('Predictor is not trained for predictions')
 67
 68        if self.train_set.knows_user(user) and self.train_set.knows_item(item):
 69            return self.on_predict(user, item)
 70
 71        return math.nan
 72
 73    @abstractmethod
 74    def on_predict(self, user: int, item: int) -> float:
 75        """Compute a prediction for the specified user and item.
 76
 77        The user and item are assumed to be present in the train
 78        set that the predictor was trained on.
 79        Derived implementations are allowed to return NaN when the
 80        prediction is impossible to compute.
 81
 82        Args:
 83            user: the user ID.
 84            item: the item ID.
 85
 86        Raises:
 87            ArithmeticError: possibly raised by a predictor on testing.
 88            MemoryError: possibly raised by a predictor on testing.
 89            RuntimeError: when the predictor is not trained yet.
 90
 91        Returns:
 92            the predicted rating or NaN when impossible.
 93        """
 94        raise NotImplementedError()
 95
 96    def predict_batch(self, user_item_pairs: pd.DataFrame) -> pd.DataFrame:
 97        """Compute the predictions for each of the specified user and item pairs.
 98
 99        All the users and items in the pairs that are not present in the train set that
100        the predictor was trained on are set to NaN after predictions are made.
101
102        Args:
103            user_item_pairs: with at least two columns: 'user' and 'item'.
104
105        Raises:
106            ArithmeticError: possibly raised by a predictor on testing.
107            MemoryError: possibly raised by a predictor on testing.
108            RuntimeError: when the predictor is not trained yet.
109
110        Returns:
111            a dataFrame with the columns: 'user', 'item', 'prediction'.
112        """
113        if self.train_set is None:
114            raise RuntimeError('Predictor is not trained for predictions')
115
116        user_item_pairs = user_item_pairs[['user', 'item']]
117        user_item_pairs = self.on_predict_batch(user_item_pairs)
118
119        # resolve the rows that contain unknown users and/or items
120        unknown_user_item = ~self.train_set.knows_user_list(user_item_pairs['user']) | \
121                            ~self.train_set.knows_item_list(user_item_pairs['item'])
122
123        # replace unknown user or item predictions with NaN
124        # these predictions are already NaN in most cases except for a few (stochastic) predictors
125        user_item_pairs.loc[unknown_user_item, 'prediction'] = math.nan
126
127        return user_item_pairs
128
129    def on_predict_batch(self, user_item_pairs: pd.DataFrame) -> pd.DataFrame:
130        """Compute the predictions for each of the specified user and item pairs.
131
132        A standard batch implementation is provided, but derived classes are
133        allowed to override batching with their own logic.
134
135        Args:
136            user_item_pairs: with two columns: 'user' and 'item'.
137
138        Raises:
139            ArithmeticError: possibly raised by a predictor on testing.
140            MemoryError: possibly raised by a predictor on testing.
141            RuntimeError: when the predictor is not trained yet.
142
143        Returns:
144            a dataFrame with the columns: 'user', 'item', 'prediction'.
145        """
146        user_item_pairs['prediction'] = np.zeros(len(user_item_pairs))
147        for i, row in user_item_pairs.iterrows():
148            user_item_pairs.at[i, 'prediction'] = self.predict(
149                row['user'],
150                row['item']
151            )
152
153        return user_item_pairs
154
155
156class Predictor(BasePredictor, metaclass=ABCMeta):
157    """Predictor that implements basic shared functionality."""
158
159    def __init__(self, name: str, params: Dict[str, Any], num_threads: int):
160        """Construct the predictor.
161
162        Args:
163            name: the name of the predictor.
164            params: the parameters of the predictor.
165            num_threads: the max number of threads the predictor can use.
166        """
167        BasePredictor.__init__(self)
168        self.num_threads = num_threads
169        self.predictor_name = name
170        self.params = params
171
172    def get_name(self) -> str:
173        """Get the name of the predictor.
174
175        Returns:
176            the predictor name.
177        """
178        return self.predictor_name
179
180    def get_num_threads(self) -> int:
181        """Get the max number of threads the predictor can use.
182
183        Returns:
184            the number of threads.
185        """
186        return self.num_threads
187
188    def get_params(self) -> Dict[str, Any]:
189        """Get the parameters of the predictor.
190
191        Returns:
192            the predictor parameters.
193        """
194        return dict(self.params)
 24class BasePredictor(BaseAlgorithm, metaclass=ABCMeta):
 25    """Base class for FairRecKit predictors.
 26
 27    A predictor is used for prediction experiments. It computes predictions
 28    for any user and item that it was trained on.
 29    Derived predictors are expected to implement the abstract interface.
 30
 31    Abstract methods:
 32
 33    on_predict
 34    on_predict_batch (optional)
 35
 36    Public methods:
 37
 38    predict
 39    predict_batch
 40    """
 41
 42    def __init__(self):
 43        """Construct the predictor."""
 44        BaseAlgorithm.__init__(self)
 45
 46    def predict(self, user: int, item: int) -> float:
 47        """Compute a prediction for the specified user and item.
 48
 49        A prediction is impossible when the user and/or item is not
 50        present in the unique users and/or items it was trained on.
 51        Moreover, the prediction could also fail in the derived
 52        implementation of the predictor.
 53
 54        Args:
 55            user: the user ID.
 56            item: the item ID.
 57
 58        Raises:
 59            ArithmeticError: possibly raised by a predictor on testing.
 60            MemoryError: possibly raised by a predictor on testing.
 61            RuntimeError: when the predictor is not trained yet.
 62
 63        Returns:
 64            the predicted rating or NaN when impossible.
 65        """
 66        if self.train_set is None:
 67            raise RuntimeError('Predictor is not trained for predictions')
 68
 69        if self.train_set.knows_user(user) and self.train_set.knows_item(item):
 70            return self.on_predict(user, item)
 71
 72        return math.nan
 73
 74    @abstractmethod
 75    def on_predict(self, user: int, item: int) -> float:
 76        """Compute a prediction for the specified user and item.
 77
 78        The user and item are assumed to be present in the train
 79        set that the predictor was trained on.
 80        Derived implementations are allowed to return NaN when the
 81        prediction is impossible to compute.
 82
 83        Args:
 84            user: the user ID.
 85            item: the item ID.
 86
 87        Raises:
 88            ArithmeticError: possibly raised by a predictor on testing.
 89            MemoryError: possibly raised by a predictor on testing.
 90            RuntimeError: when the predictor is not trained yet.
 91
 92        Returns:
 93            the predicted rating or NaN when impossible.
 94        """
 95        raise NotImplementedError()
 96
 97    def predict_batch(self, user_item_pairs: pd.DataFrame) -> pd.DataFrame:
 98        """Compute the predictions for each of the specified user and item pairs.
 99
100        All the users and items in the pairs that are not present in the train set that
101        the predictor was trained on are set to NaN after predictions are made.
102
103        Args:
104            user_item_pairs: with at least two columns: 'user' and 'item'.
105
106        Raises:
107            ArithmeticError: possibly raised by a predictor on testing.
108            MemoryError: possibly raised by a predictor on testing.
109            RuntimeError: when the predictor is not trained yet.
110
111        Returns:
112            a dataFrame with the columns: 'user', 'item', 'prediction'.
113        """
114        if self.train_set is None:
115            raise RuntimeError('Predictor is not trained for predictions')
116
117        user_item_pairs = user_item_pairs[['user', 'item']]
118        user_item_pairs = self.on_predict_batch(user_item_pairs)
119
120        # resolve the rows that contain unknown users and/or items
121        unknown_user_item = ~self.train_set.knows_user_list(user_item_pairs['user']) | \
122                            ~self.train_set.knows_item_list(user_item_pairs['item'])
123
124        # replace unknown user or item predictions with NaN
125        # these predictions are already NaN in most cases except for a few (stochastic) predictors
126        user_item_pairs.loc[unknown_user_item, 'prediction'] = math.nan
127
128        return user_item_pairs
129
130    def on_predict_batch(self, user_item_pairs: pd.DataFrame) -> pd.DataFrame:
131        """Compute the predictions for each of the specified user and item pairs.
132
133        A standard batch implementation is provided, but derived classes are
134        allowed to override batching with their own logic.
135
136        Args:
137            user_item_pairs: with two columns: 'user' and 'item'.
138
139        Raises:
140            ArithmeticError: possibly raised by a predictor on testing.
141            MemoryError: possibly raised by a predictor on testing.
142            RuntimeError: when the predictor is not trained yet.
143
144        Returns:
145            a dataFrame with the columns: 'user', 'item', 'prediction'.
146        """
147        user_item_pairs['prediction'] = np.zeros(len(user_item_pairs))
148        for i, row in user_item_pairs.iterrows():
149            user_item_pairs.at[i, 'prediction'] = self.predict(
150                row['user'],
151                row['item']
152            )
153
154        return user_item_pairs

Base class for FairRecKit predictors.

A predictor is used for prediction experiments. It computes predictions for any user and item that it was trained on. Derived predictors are expected to implement the abstract interface.

Abstract methods:

on_predict on_predict_batch (optional)

Public methods:

predict predict_batch

BasePredictor()
42    def __init__(self):
43        """Construct the predictor."""
44        BaseAlgorithm.__init__(self)

Construct the predictor.

def predict(self, user: int, item: int) -> float:
46    def predict(self, user: int, item: int) -> float:
47        """Compute a prediction for the specified user and item.
48
49        A prediction is impossible when the user and/or item is not
50        present in the unique users and/or items it was trained on.
51        Moreover, the prediction could also fail in the derived
52        implementation of the predictor.
53
54        Args:
55            user: the user ID.
56            item: the item ID.
57
58        Raises:
59            ArithmeticError: possibly raised by a predictor on testing.
60            MemoryError: possibly raised by a predictor on testing.
61            RuntimeError: when the predictor is not trained yet.
62
63        Returns:
64            the predicted rating or NaN when impossible.
65        """
66        if self.train_set is None:
67            raise RuntimeError('Predictor is not trained for predictions')
68
69        if self.train_set.knows_user(user) and self.train_set.knows_item(item):
70            return self.on_predict(user, item)
71
72        return math.nan

Compute a prediction for the specified user and item.

A prediction is impossible when the user and/or item is not present in the unique users and/or items it was trained on. Moreover, the prediction could also fail in the derived implementation of the predictor.

Args: user: the user ID. item: the item ID.

Raises: ArithmeticError: possibly raised by a predictor on testing. MemoryError: possibly raised by a predictor on testing. RuntimeError: when the predictor is not trained yet.

Returns: the predicted rating or NaN when impossible.

@abstractmethod
def on_predict(self, user: int, item: int) -> float:
74    @abstractmethod
75    def on_predict(self, user: int, item: int) -> float:
76        """Compute a prediction for the specified user and item.
77
78        The user and item are assumed to be present in the train
79        set that the predictor was trained on.
80        Derived implementations are allowed to return NaN when the
81        prediction is impossible to compute.
82
83        Args:
84            user: the user ID.
85            item: the item ID.
86
87        Raises:
88            ArithmeticError: possibly raised by a predictor on testing.
89            MemoryError: possibly raised by a predictor on testing.
90            RuntimeError: when the predictor is not trained yet.
91
92        Returns:
93            the predicted rating or NaN when impossible.
94        """
95        raise NotImplementedError()

Compute a prediction for the specified user and item.

The user and item are assumed to be present in the train set that the predictor was trained on. Derived implementations are allowed to return NaN when the prediction is impossible to compute.

Args: user: the user ID. item: the item ID.

Raises: ArithmeticError: possibly raised by a predictor on testing. MemoryError: possibly raised by a predictor on testing. RuntimeError: when the predictor is not trained yet.

Returns: the predicted rating or NaN when impossible.

def predict_batch( self, user_item_pairs: pandas.core.frame.DataFrame) -> pandas.core.frame.DataFrame:
 97    def predict_batch(self, user_item_pairs: pd.DataFrame) -> pd.DataFrame:
 98        """Compute the predictions for each of the specified user and item pairs.
 99
100        All the users and items in the pairs that are not present in the train set that
101        the predictor was trained on are set to NaN after predictions are made.
102
103        Args:
104            user_item_pairs: with at least two columns: 'user' and 'item'.
105
106        Raises:
107            ArithmeticError: possibly raised by a predictor on testing.
108            MemoryError: possibly raised by a predictor on testing.
109            RuntimeError: when the predictor is not trained yet.
110
111        Returns:
112            a dataFrame with the columns: 'user', 'item', 'prediction'.
113        """
114        if self.train_set is None:
115            raise RuntimeError('Predictor is not trained for predictions')
116
117        user_item_pairs = user_item_pairs[['user', 'item']]
118        user_item_pairs = self.on_predict_batch(user_item_pairs)
119
120        # resolve the rows that contain unknown users and/or items
121        unknown_user_item = ~self.train_set.knows_user_list(user_item_pairs['user']) | \
122                            ~self.train_set.knows_item_list(user_item_pairs['item'])
123
124        # replace unknown user or item predictions with NaN
125        # these predictions are already NaN in most cases except for a few (stochastic) predictors
126        user_item_pairs.loc[unknown_user_item, 'prediction'] = math.nan
127
128        return user_item_pairs

Compute the predictions for each of the specified user and item pairs.

All the users and items in the pairs that are not present in the train set that the predictor was trained on are set to NaN after predictions are made.

Args: user_item_pairs: with at least two columns: 'user' and 'item'.

Raises: ArithmeticError: possibly raised by a predictor on testing. MemoryError: possibly raised by a predictor on testing. RuntimeError: when the predictor is not trained yet.

Returns: a dataFrame with the columns: 'user', 'item', 'prediction'.

def on_predict_batch( self, user_item_pairs: pandas.core.frame.DataFrame) -> pandas.core.frame.DataFrame:
130    def on_predict_batch(self, user_item_pairs: pd.DataFrame) -> pd.DataFrame:
131        """Compute the predictions for each of the specified user and item pairs.
132
133        A standard batch implementation is provided, but derived classes are
134        allowed to override batching with their own logic.
135
136        Args:
137            user_item_pairs: with two columns: 'user' and 'item'.
138
139        Raises:
140            ArithmeticError: possibly raised by a predictor on testing.
141            MemoryError: possibly raised by a predictor on testing.
142            RuntimeError: when the predictor is not trained yet.
143
144        Returns:
145            a dataFrame with the columns: 'user', 'item', 'prediction'.
146        """
147        user_item_pairs['prediction'] = np.zeros(len(user_item_pairs))
148        for i, row in user_item_pairs.iterrows():
149            user_item_pairs.at[i, 'prediction'] = self.predict(
150                row['user'],
151                row['item']
152            )
153
154        return user_item_pairs

Compute the predictions for each of the specified user and item pairs.

A standard batch implementation is provided, but derived classes are allowed to override batching with their own logic.

Args: user_item_pairs: with two columns: 'user' and 'item'.

Raises: ArithmeticError: possibly raised by a predictor on testing. MemoryError: possibly raised by a predictor on testing. RuntimeError: when the predictor is not trained yet.

Returns: a dataFrame with the columns: 'user', 'item', 'prediction'.

class Predictor(BasePredictor):
157class Predictor(BasePredictor, metaclass=ABCMeta):
158    """Predictor that implements basic shared functionality."""
159
160    def __init__(self, name: str, params: Dict[str, Any], num_threads: int):
161        """Construct the predictor.
162
163        Args:
164            name: the name of the predictor.
165            params: the parameters of the predictor.
166            num_threads: the max number of threads the predictor can use.
167        """
168        BasePredictor.__init__(self)
169        self.num_threads = num_threads
170        self.predictor_name = name
171        self.params = params
172
173    def get_name(self) -> str:
174        """Get the name of the predictor.
175
176        Returns:
177            the predictor name.
178        """
179        return self.predictor_name
180
181    def get_num_threads(self) -> int:
182        """Get the max number of threads the predictor can use.
183
184        Returns:
185            the number of threads.
186        """
187        return self.num_threads
188
189    def get_params(self) -> Dict[str, Any]:
190        """Get the parameters of the predictor.
191
192        Returns:
193            the predictor parameters.
194        """
195        return dict(self.params)

Predictor that implements basic shared functionality.

Predictor(name: str, params: Dict[str, Any], num_threads: int)
160    def __init__(self, name: str, params: Dict[str, Any], num_threads: int):
161        """Construct the predictor.
162
163        Args:
164            name: the name of the predictor.
165            params: the parameters of the predictor.
166            num_threads: the max number of threads the predictor can use.
167        """
168        BasePredictor.__init__(self)
169        self.num_threads = num_threads
170        self.predictor_name = name
171        self.params = params

Construct the predictor.

Args: name: the name of the predictor. params: the parameters of the predictor. num_threads: the max number of threads the predictor can use.

def get_name(self) -> str:
173    def get_name(self) -> str:
174        """Get the name of the predictor.
175
176        Returns:
177            the predictor name.
178        """
179        return self.predictor_name

Get the name of the predictor.

Returns: the predictor name.

def get_num_threads(self) -> int:
181    def get_num_threads(self) -> int:
182        """Get the max number of threads the predictor can use.
183
184        Returns:
185            the number of threads.
186        """
187        return self.num_threads

Get the max number of threads the predictor can use.

Returns: the number of threads.

def get_params(self) -> Dict[str, Any]:
189    def get_params(self) -> Dict[str, Any]:
190        """Get the parameters of the predictor.
191
192        Returns:
193            the predictor parameters.
194        """
195        return dict(self.params)

Get the parameters of the predictor.

Returns: the predictor parameters.