src.fairreckitlib.data.split.temporal_splitter

This module contains time splitting functionality.

Classes:

TemporalSplitter: can split on timestamp.

Functions:

create_temporal_splitter: create an instance of the class (factory creation compatible).

This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)

 1"""This module contains time splitting functionality.
 2
 3Classes:
 4
 5    TemporalSplitter: can split on timestamp.
 6
 7Functions:
 8
 9    create_temporal_splitter: create an instance of the class (factory creation compatible).
10
11This program has been developed by students from the bachelor Computer Science at
12Utrecht University within the Software Project course.
13© Copyright Utrecht University (Department of Information and Computing Sciences)
14"""
15
16from typing import Any, Dict, Tuple
17
18import lenskit.crossfold as xf
19import pandas as pd
20
21from .base_splitter import DataSplitter
22
23
24class TemporalSplitter(DataSplitter):
25    """Temporal Splitter.
26
27    Splits the dataframe into a train and test set based on time user-by-user.
28    """
29
30    def run(self, dataframe: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
31        """Split the dataframe into a train and test set.
32
33        For this function to work, it needs a 'user' and 'timestamp' column.
34
35        Args:
36            dataframe: with at least the 'user' column.
37
38        Returns:
39            the train and test set dataframes of the split.
40        """
41        frac = xf.LastFrac(self.test_ratio, col='timestamp')
42        for train_set, test_set in xf.partition_users(dataframe, 1, frac):
43            return train_set, test_set
44
45
46def create_temporal_splitter(name: str, params: Dict[str, Any], **kwargs) -> TemporalSplitter:
47    """Create the Temporal Splitter.
48
49    Returns:
50        the temporal data splitter.
51    """
52    return TemporalSplitter(name, params, kwargs['test_ratio'])
class TemporalSplitter(src.fairreckitlib.data.split.base_splitter.DataSplitter):
25class TemporalSplitter(DataSplitter):
26    """Temporal Splitter.
27
28    Splits the dataframe into a train and test set based on time user-by-user.
29    """
30
31    def run(self, dataframe: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
32        """Split the dataframe into a train and test set.
33
34        For this function to work, it needs a 'user' and 'timestamp' column.
35
36        Args:
37            dataframe: with at least the 'user' column.
38
39        Returns:
40            the train and test set dataframes of the split.
41        """
42        frac = xf.LastFrac(self.test_ratio, col='timestamp')
43        for train_set, test_set in xf.partition_users(dataframe, 1, frac):
44            return train_set, test_set

Temporal Splitter.

Splits the dataframe into a train and test set based on time user-by-user.

def run( self, dataframe: pandas.core.frame.DataFrame) -> Tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]:
31    def run(self, dataframe: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
32        """Split the dataframe into a train and test set.
33
34        For this function to work, it needs a 'user' and 'timestamp' column.
35
36        Args:
37            dataframe: with at least the 'user' column.
38
39        Returns:
40            the train and test set dataframes of the split.
41        """
42        frac = xf.LastFrac(self.test_ratio, col='timestamp')
43        for train_set, test_set in xf.partition_users(dataframe, 1, frac):
44            return train_set, test_set

Split the dataframe into a train and test set.

For this function to work, it needs a 'user' and 'timestamp' column.

Args: dataframe: with at least the 'user' column.

Returns: the train and test set dataframes of the split.

def create_temporal_splitter( name: str, params: Dict[str, Any], **kwargs) -> src.fairreckitlib.data.split.temporal_splitter.TemporalSplitter:
47def create_temporal_splitter(name: str, params: Dict[str, Any], **kwargs) -> TemporalSplitter:
48    """Create the Temporal Splitter.
49
50    Returns:
51        the temporal data splitter.
52    """
53    return TemporalSplitter(name, params, kwargs['test_ratio'])

Create the Temporal Splitter.

Returns: the temporal data splitter.