src.fairreckitlib.model.algorithms.matrix

This module contains the matrix classes that can be used for algorithm training.

Classes:

MatrixDataFrame: (base) matrix implementation for a pandas dataframe matrix.
MatrixCSR: matrix implementation that uses a sparse CSR matrix.

This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)

  1"""This module contains the matrix classes that can be used for algorithm training.
  2
  3Classes:
  4
  5    MatrixDataFrame: (base) matrix implementation for a pandas dataframe matrix.
  6    MatrixCSR: matrix implementation that uses a sparse CSR matrix.
  7
  8This program has been developed by students from the bachelor Computer Science at
  9Utrecht University within the Software Project course.
 10© Copyright Utrecht University (Department of Information and Computing Sciences)
 11"""
 12
 13import numpy as np
 14import pandas as pd
 15from scipy import sparse
 16
 17
 18class Matrix:
 19    """Base class for all train set matrices using a pandas dataframe.
 20
 21    The intended use of the matrix class is to add an extra layer of
 22    abstraction to load a matrix into a specific format depending on
 23    the algorithm implementation that is used. Effectively this will
 24    reduce the memory usage of algorithms by loading the matrix directly
 25    into the expected format.
 26
 27    Public methods:
 28
 29    get_matrix
 30    get_items
 31    get_users
 32    get_user_rated_items
 33    knows_item
 34    knows_item_list
 35    knows_user
 36    knows_user_list
 37    """
 38
 39    def __init__(self, file_path: str):
 40        """Construct the Matrix.
 41
 42        The matrix is expected to be stored in a tab separated file without header,
 43        with the 'user', 'item', 'rating' columns in this order.
 44
 45        Args:
 46            file_path: the file path to where the matrix is stored.
 47
 48        Raises:
 49            FileNotFoundError: when the matrix file is not found.
 50        """
 51        self.matrix = pd.read_csv(
 52            file_path,
 53            sep='\t',
 54            header=None,
 55            names=['user', 'item', 'rating']
 56        )
 57        self.users = self.matrix['user'].unique()
 58        self.items = self.matrix['item'].unique()
 59
 60    def get_matrix(self) -> pd.DataFrame:
 61        """Get the matrix.
 62
 63        Returns:
 64            the matrix dataframe.
 65        """
 66        return self.matrix
 67
 68    def get_items(self) -> np.ndarray:
 69        """Get the (unique) items of the matrix.
 70
 71        Returns:
 72            a list of unique item IDs.
 73        """
 74        return self.items
 75
 76    def get_users(self) -> np.ndarray:
 77        """Get the (unique) users of the matrix.
 78
 79        Returns:
 80            a list of unique user IDs.
 81        """
 82        return self.users
 83
 84    def get_user_rated_items(self, user: int) -> np.ndarray:
 85        """Get the rated items for the specified user.
 86
 87        Args:
 88            user: the user to get the rated items of.
 89
 90        Raises:
 91            KeyError: when the user is not part of the matrix.
 92
 93        Returns:
 94            a list of item IDs that are rated by the user.
 95        """
 96        if user not in self.users:
 97            raise KeyError('User is not part of the matrix')
 98
 99        return self._get_user_rated_items(user)
100
101    def _get_user_rated_items(self, user: int) -> np.ndarray:
102        """Get the rated items for the specified user.
103
104        Args:
105            user: the user to get the rated items of.
106
107        Returns:
108            a list of item IDs that are rated by the user.
109        """
110        is_user = self.matrix['user'] == user
111        return np.array(self.matrix.loc[is_user]['item'].tolist())
112
113    def knows_item(self, item: int) -> bool:
114        """Get if the specified item is known in the matrix.
115
116        Args:
117            item: the item ID to evaluate.
118
119        Returns:
120            whether the item ID is known.
121        """
122        return item in self.items
123
124    def knows_item_list(self, items: pd.Series) -> pd.Series:
125        """Get if the specified items are known in the matrix.
126
127        Args:
128            items: the item IDs to evaluate.
129
130        Returns:
131            a boolean series of the input showing whether each item is a known item ID.
132        """
133        return items.isin(self.items)
134
135    def knows_user(self, user: int) -> bool:
136        """Get if the specified user is known in the matrix.
137
138        Args:
139            user: the user ID to evaluate.
140
141        Returns:
142            whether the user ID is known.
143        """
144        return user in self.users
145
146    def knows_user_list(self, users: pd.Series) -> pd.Series:
147        """Get if the specified users are known in the matrix.
148
149        Args:
150            users: the user IDs to evaluate.
151
152        Returns:
153            a boolean series of the input showing whether each user is a known user ID.
154        """
155        return users.isin(self.users)
156
157
158class MatrixCSR(Matrix):
159    """Matrix implementation with a sparse CSR matrix."""
160
161    def __init__(self, file_path: str):
162        """Construct the CSR Matrix.
163
164        The csr matrix is expected to be stored in a tab separated file without header,
165        with the 'user', 'item', 'rating' columns in this order.
166        The matrix is loaded into a dataframe and converted to a CSR matrix.
167
168        Args:
169            file_path: the file path to where the matrix is stored.
170        """
171        Matrix.__init__(self, file_path)
172        self.matrix = sparse.csr_matrix(
173            (self.matrix['rating'], (self.matrix['user'], self.matrix['item']))
174        )
175
176    def get_matrix(self) -> sparse.csr_matrix:
177        """Get the matrix.
178
179        Returns:
180            the csr matrix.
181        """
182        return self.matrix
183
184    def _get_user_rated_items(self, user: int) -> np.ndarray:
185        """Get the rated items for the specified user.
186
187        Args:
188            user: the user to get the rated items of.
189
190        Returns:
191            a list of item IDs that are rated by the user.
192        """
193        return self.matrix[user].tocoo().col
class Matrix:
 19class Matrix:
 20    """Base class for all train set matrices using a pandas dataframe.
 21
 22    The intended use of the matrix class is to add an extra layer of
 23    abstraction to load a matrix into a specific format depending on
 24    the algorithm implementation that is used. Effectively this will
 25    reduce the memory usage of algorithms by loading the matrix directly
 26    into the expected format.
 27
 28    Public methods:
 29
 30    get_matrix
 31    get_items
 32    get_users
 33    get_user_rated_items
 34    knows_item
 35    knows_item_list
 36    knows_user
 37    knows_user_list
 38    """
 39
 40    def __init__(self, file_path: str):
 41        """Construct the Matrix.
 42
 43        The matrix is expected to be stored in a tab separated file without header,
 44        with the 'user', 'item', 'rating' columns in this order.
 45
 46        Args:
 47            file_path: the file path to where the matrix is stored.
 48
 49        Raises:
 50            FileNotFoundError: when the matrix file is not found.
 51        """
 52        self.matrix = pd.read_csv(
 53            file_path,
 54            sep='\t',
 55            header=None,
 56            names=['user', 'item', 'rating']
 57        )
 58        self.users = self.matrix['user'].unique()
 59        self.items = self.matrix['item'].unique()
 60
 61    def get_matrix(self) -> pd.DataFrame:
 62        """Get the matrix.
 63
 64        Returns:
 65            the matrix dataframe.
 66        """
 67        return self.matrix
 68
 69    def get_items(self) -> np.ndarray:
 70        """Get the (unique) items of the matrix.
 71
 72        Returns:
 73            a list of unique item IDs.
 74        """
 75        return self.items
 76
 77    def get_users(self) -> np.ndarray:
 78        """Get the (unique) users of the matrix.
 79
 80        Returns:
 81            a list of unique user IDs.
 82        """
 83        return self.users
 84
 85    def get_user_rated_items(self, user: int) -> np.ndarray:
 86        """Get the rated items for the specified user.
 87
 88        Args:
 89            user: the user to get the rated items of.
 90
 91        Raises:
 92            KeyError: when the user is not part of the matrix.
 93
 94        Returns:
 95            a list of item IDs that are rated by the user.
 96        """
 97        if user not in self.users:
 98            raise KeyError('User is not part of the matrix')
 99
100        return self._get_user_rated_items(user)
101
102    def _get_user_rated_items(self, user: int) -> np.ndarray:
103        """Get the rated items for the specified user.
104
105        Args:
106            user: the user to get the rated items of.
107
108        Returns:
109            a list of item IDs that are rated by the user.
110        """
111        is_user = self.matrix['user'] == user
112        return np.array(self.matrix.loc[is_user]['item'].tolist())
113
114    def knows_item(self, item: int) -> bool:
115        """Get if the specified item is known in the matrix.
116
117        Args:
118            item: the item ID to evaluate.
119
120        Returns:
121            whether the item ID is known.
122        """
123        return item in self.items
124
125    def knows_item_list(self, items: pd.Series) -> pd.Series:
126        """Get if the specified items are known in the matrix.
127
128        Args:
129            items: the item IDs to evaluate.
130
131        Returns:
132            a boolean series of the input showing whether each item is a known item ID.
133        """
134        return items.isin(self.items)
135
136    def knows_user(self, user: int) -> bool:
137        """Get if the specified user is known in the matrix.
138
139        Args:
140            user: the user ID to evaluate.
141
142        Returns:
143            whether the user ID is known.
144        """
145        return user in self.users
146
147    def knows_user_list(self, users: pd.Series) -> pd.Series:
148        """Get if the specified users are known in the matrix.
149
150        Args:
151            users: the user IDs to evaluate.
152
153        Returns:
154            a boolean series of the input showing whether each user is a known user ID.
155        """
156        return users.isin(self.users)

Base class for all train set matrices using a pandas dataframe.

The intended use of the matrix class is to add an extra layer of abstraction to load a matrix into a specific format depending on the algorithm implementation that is used. Effectively this will reduce the memory usage of algorithms by loading the matrix directly into the expected format.

Public methods:

get_matrix get_items get_users get_user_rated_items knows_item knows_item_list knows_user knows_user_list

Matrix(file_path: str)
40    def __init__(self, file_path: str):
41        """Construct the Matrix.
42
43        The matrix is expected to be stored in a tab separated file without header,
44        with the 'user', 'item', 'rating' columns in this order.
45
46        Args:
47            file_path: the file path to where the matrix is stored.
48
49        Raises:
50            FileNotFoundError: when the matrix file is not found.
51        """
52        self.matrix = pd.read_csv(
53            file_path,
54            sep='\t',
55            header=None,
56            names=['user', 'item', 'rating']
57        )
58        self.users = self.matrix['user'].unique()
59        self.items = self.matrix['item'].unique()

Construct the Matrix.

The matrix is expected to be stored in a tab separated file without header, with the 'user', 'item', 'rating' columns in this order.

Args: file_path: the file path to where the matrix is stored.

Raises: FileNotFoundError: when the matrix file is not found.

def get_matrix(self) -> pandas.core.frame.DataFrame:
61    def get_matrix(self) -> pd.DataFrame:
62        """Get the matrix.
63
64        Returns:
65            the matrix dataframe.
66        """
67        return self.matrix

Get the matrix.

Returns: the matrix dataframe.

def get_items(self) -> numpy.ndarray:
69    def get_items(self) -> np.ndarray:
70        """Get the (unique) items of the matrix.
71
72        Returns:
73            a list of unique item IDs.
74        """
75        return self.items

Get the (unique) items of the matrix.

Returns: a list of unique item IDs.

def get_users(self) -> numpy.ndarray:
77    def get_users(self) -> np.ndarray:
78        """Get the (unique) users of the matrix.
79
80        Returns:
81            a list of unique user IDs.
82        """
83        return self.users

Get the (unique) users of the matrix.

Returns: a list of unique user IDs.

def get_user_rated_items(self, user: int) -> numpy.ndarray:
 85    def get_user_rated_items(self, user: int) -> np.ndarray:
 86        """Get the rated items for the specified user.
 87
 88        Args:
 89            user: the user to get the rated items of.
 90
 91        Raises:
 92            KeyError: when the user is not part of the matrix.
 93
 94        Returns:
 95            a list of item IDs that are rated by the user.
 96        """
 97        if user not in self.users:
 98            raise KeyError('User is not part of the matrix')
 99
100        return self._get_user_rated_items(user)

Get the rated items for the specified user.

Args: user: the user to get the rated items of.

Raises: KeyError: when the user is not part of the matrix.

Returns: a list of item IDs that are rated by the user.

def knows_item(self, item: int) -> bool:
114    def knows_item(self, item: int) -> bool:
115        """Get if the specified item is known in the matrix.
116
117        Args:
118            item: the item ID to evaluate.
119
120        Returns:
121            whether the item ID is known.
122        """
123        return item in self.items

Get if the specified item is known in the matrix.

Args: item: the item ID to evaluate.

Returns: whether the item ID is known.

def knows_item_list(self, items: pandas.core.series.Series) -> pandas.core.series.Series:
125    def knows_item_list(self, items: pd.Series) -> pd.Series:
126        """Get if the specified items are known in the matrix.
127
128        Args:
129            items: the item IDs to evaluate.
130
131        Returns:
132            a boolean series of the input showing whether each item is a known item ID.
133        """
134        return items.isin(self.items)

Get if the specified items are known in the matrix.

Args: items: the item IDs to evaluate.

Returns: a boolean series of the input showing whether each item is a known item ID.

def knows_user(self, user: int) -> bool:
136    def knows_user(self, user: int) -> bool:
137        """Get if the specified user is known in the matrix.
138
139        Args:
140            user: the user ID to evaluate.
141
142        Returns:
143            whether the user ID is known.
144        """
145        return user in self.users

Get if the specified user is known in the matrix.

Args: user: the user ID to evaluate.

Returns: whether the user ID is known.

def knows_user_list(self, users: pandas.core.series.Series) -> pandas.core.series.Series:
147    def knows_user_list(self, users: pd.Series) -> pd.Series:
148        """Get if the specified users are known in the matrix.
149
150        Args:
151            users: the user IDs to evaluate.
152
153        Returns:
154            a boolean series of the input showing whether each user is a known user ID.
155        """
156        return users.isin(self.users)

Get if the specified users are known in the matrix.

Args: users: the user IDs to evaluate.

Returns: a boolean series of the input showing whether each user is a known user ID.

class MatrixCSR(Matrix):
159class MatrixCSR(Matrix):
160    """Matrix implementation with a sparse CSR matrix."""
161
162    def __init__(self, file_path: str):
163        """Construct the CSR Matrix.
164
165        The csr matrix is expected to be stored in a tab separated file without header,
166        with the 'user', 'item', 'rating' columns in this order.
167        The matrix is loaded into a dataframe and converted to a CSR matrix.
168
169        Args:
170            file_path: the file path to where the matrix is stored.
171        """
172        Matrix.__init__(self, file_path)
173        self.matrix = sparse.csr_matrix(
174            (self.matrix['rating'], (self.matrix['user'], self.matrix['item']))
175        )
176
177    def get_matrix(self) -> sparse.csr_matrix:
178        """Get the matrix.
179
180        Returns:
181            the csr matrix.
182        """
183        return self.matrix
184
185    def _get_user_rated_items(self, user: int) -> np.ndarray:
186        """Get the rated items for the specified user.
187
188        Args:
189            user: the user to get the rated items of.
190
191        Returns:
192            a list of item IDs that are rated by the user.
193        """
194        return self.matrix[user].tocoo().col

Matrix implementation with a sparse CSR matrix.

MatrixCSR(file_path: str)
162    def __init__(self, file_path: str):
163        """Construct the CSR Matrix.
164
165        The csr matrix is expected to be stored in a tab separated file without header,
166        with the 'user', 'item', 'rating' columns in this order.
167        The matrix is loaded into a dataframe and converted to a CSR matrix.
168
169        Args:
170            file_path: the file path to where the matrix is stored.
171        """
172        Matrix.__init__(self, file_path)
173        self.matrix = sparse.csr_matrix(
174            (self.matrix['rating'], (self.matrix['user'], self.matrix['item']))
175        )

Construct the CSR Matrix.

The csr matrix is expected to be stored in a tab separated file without header, with the 'user', 'item', 'rating' columns in this order. The matrix is loaded into a dataframe and converted to a CSR matrix.

Args: file_path: the file path to where the matrix is stored.

def get_matrix(self) -> scipy.sparse._csr.csr_matrix:
177    def get_matrix(self) -> sparse.csr_matrix:
178        """Get the matrix.
179
180        Returns:
181            the csr matrix.
182        """
183        return self.matrix

Get the matrix.

Returns: the csr matrix.