src.fairreckitlib.data.split.random_splitter

This module contains random splitting functionality.

Classes:

RandomSplitter: can split randomly.

Functions:

create_random_splitter: create an instance of the class (factory creation compatible).

This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)

 1"""This module contains random splitting functionality.
 2
 3Classes:
 4
 5    RandomSplitter: can split randomly.
 6
 7Functions:
 8
 9    create_random_splitter: create an instance of the class (factory creation compatible).
10
11
12This program has been developed by students from the bachelor Computer Science at
13Utrecht University within the Software Project course.
14© Copyright Utrecht University (Department of Information and Computing Sciences)
15"""
16
17import time
18from typing import Any, Dict, Tuple
19
20import lenskit.crossfold as xf
21import pandas as pd
22from seedbank import numpy_rng
23
24from .base_splitter import DataSplitter
25
26
27class RandomSplitter(DataSplitter):
28    """Random Splitter.
29
30    Splits the dataframe into a train and test set randomly user-by-user.
31    """
32
33    def run(self, dataframe: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
34        """Split the dataframe into a train and test set.
35
36        Args:
37            dataframe: with at least the 'user' column.
38
39        Returns:
40            the train and test set dataframes of the split.
41        """
42        rng_spec = numpy_rng(spec=self.params['seed'])
43        frac = xf.SampleFrac(self.test_ratio)
44        for train_set, test_set in xf.partition_users(dataframe, 1, frac, rng_spec=rng_spec):
45            return train_set, test_set
46
47
48def create_random_splitter(name: str, params: Dict[str, Any], **kwargs) -> RandomSplitter:
49    """Create the Random Splitter.
50
51    Returns:
52        the random data splitter.
53    """
54    if params['seed'] is None:
55        params['seed'] = int(time.time())
56
57    return RandomSplitter(name, params, kwargs['test_ratio'])
class RandomSplitter(src.fairreckitlib.data.split.base_splitter.DataSplitter):
28class RandomSplitter(DataSplitter):
29    """Random Splitter.
30
31    Splits the dataframe into a train and test set randomly user-by-user.
32    """
33
34    def run(self, dataframe: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
35        """Split the dataframe into a train and test set.
36
37        Args:
38            dataframe: with at least the 'user' column.
39
40        Returns:
41            the train and test set dataframes of the split.
42        """
43        rng_spec = numpy_rng(spec=self.params['seed'])
44        frac = xf.SampleFrac(self.test_ratio)
45        for train_set, test_set in xf.partition_users(dataframe, 1, frac, rng_spec=rng_spec):
46            return train_set, test_set

Random Splitter.

Splits the dataframe into a train and test set randomly user-by-user.

def run( self, dataframe: pandas.core.frame.DataFrame) -> Tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]:
34    def run(self, dataframe: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
35        """Split the dataframe into a train and test set.
36
37        Args:
38            dataframe: with at least the 'user' column.
39
40        Returns:
41            the train and test set dataframes of the split.
42        """
43        rng_spec = numpy_rng(spec=self.params['seed'])
44        frac = xf.SampleFrac(self.test_ratio)
45        for train_set, test_set in xf.partition_users(dataframe, 1, frac, rng_spec=rng_spec):
46            return train_set, test_set

Split the dataframe into a train and test set.

Args: dataframe: with at least the 'user' column.

Returns: the train and test set dataframes of the split.

def create_random_splitter( name: str, params: Dict[str, Any], **kwargs) -> src.fairreckitlib.data.split.random_splitter.RandomSplitter:
49def create_random_splitter(name: str, params: Dict[str, Any], **kwargs) -> RandomSplitter:
50    """Create the Random Splitter.
51
52    Returns:
53        the random data splitter.
54    """
55    if params['seed'] is None:
56        params['seed'] = int(time.time())
57
58    return RandomSplitter(name, params, kwargs['test_ratio'])

Create the Random Splitter.

Returns: the random data splitter.