src.fairreckitlib.data.split.base_splitter

This module contains the base class for data splitting.

Classes:

DataSplitter: the base class for data splitting.

This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)

 1"""This module contains the base class for data splitting.
 2
 3Classes:
 4
 5    DataSplitter: the base class for data splitting.
 6
 7This program has been developed by students from the bachelor Computer Science at
 8Utrecht University within the Software Project course.
 9© Copyright Utrecht University (Department of Information and Computing Sciences)
10"""
11
12from typing import Any, Dict, Tuple
13
14import pandas as pd
15
16from ..data_modifier import DataModifier
17from .split_constants import MIN_TEST_RATIO, MAX_TEST_RATIO
18
19
20class DataSplitter(DataModifier):
21    """Base class for FairRecKit data splitters.
22
23    A splitter is used to split a dataframe into a train and test set.
24
25    Public methods:
26
27    get_test_ratio
28    """
29
30    def __init__(self, name: str, params: Dict[str, Any], test_ratio: float):
31        """Construct the base splitter.
32
33        Args:
34            name: the name of the splitter.
35            params: a dictionary containing the parameters for the splitter.
36            test_ratio: the fraction of users to use for testing.
37
38        Raises:
39            RuntimeError: when the test ratio is not within the accepted min and max range.
40        """
41        DataModifier.__init__(self, name, params)
42        self.test_ratio = test_ratio
43
44        if self.test_ratio < MIN_TEST_RATIO or self.test_ratio > MAX_TEST_RATIO:
45            raise RuntimeError()
46
47    def get_test_ratio(self) -> float:
48        """Get the test ratio used by the splitter when run.
49
50        Returns:
51            the test ratio
52        """
53        return self.test_ratio
54
55    def run(self, dataframe: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
56        """Run the splitter on the specified dataframe.
57
58        Args:
59            dataframe: with at least the 'user' column.
60
61        Returns:
62            the train and test set dataframes of the split.
63        """
64        raise NotImplementedError()
class DataSplitter(src.fairreckitlib.data.data_modifier.DataModifier):
21class DataSplitter(DataModifier):
22    """Base class for FairRecKit data splitters.
23
24    A splitter is used to split a dataframe into a train and test set.
25
26    Public methods:
27
28    get_test_ratio
29    """
30
31    def __init__(self, name: str, params: Dict[str, Any], test_ratio: float):
32        """Construct the base splitter.
33
34        Args:
35            name: the name of the splitter.
36            params: a dictionary containing the parameters for the splitter.
37            test_ratio: the fraction of users to use for testing.
38
39        Raises:
40            RuntimeError: when the test ratio is not within the accepted min and max range.
41        """
42        DataModifier.__init__(self, name, params)
43        self.test_ratio = test_ratio
44
45        if self.test_ratio < MIN_TEST_RATIO or self.test_ratio > MAX_TEST_RATIO:
46            raise RuntimeError()
47
48    def get_test_ratio(self) -> float:
49        """Get the test ratio used by the splitter when run.
50
51        Returns:
52            the test ratio
53        """
54        return self.test_ratio
55
56    def run(self, dataframe: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
57        """Run the splitter on the specified dataframe.
58
59        Args:
60            dataframe: with at least the 'user' column.
61
62        Returns:
63            the train and test set dataframes of the split.
64        """
65        raise NotImplementedError()

Base class for FairRecKit data splitters.

A splitter is used to split a dataframe into a train and test set.

Public methods:

get_test_ratio

DataSplitter(name: str, params: Dict[str, Any], test_ratio: float)
31    def __init__(self, name: str, params: Dict[str, Any], test_ratio: float):
32        """Construct the base splitter.
33
34        Args:
35            name: the name of the splitter.
36            params: a dictionary containing the parameters for the splitter.
37            test_ratio: the fraction of users to use for testing.
38
39        Raises:
40            RuntimeError: when the test ratio is not within the accepted min and max range.
41        """
42        DataModifier.__init__(self, name, params)
43        self.test_ratio = test_ratio
44
45        if self.test_ratio < MIN_TEST_RATIO or self.test_ratio > MAX_TEST_RATIO:
46            raise RuntimeError()

Construct the base splitter.

Args: name: the name of the splitter. params: a dictionary containing the parameters for the splitter. test_ratio: the fraction of users to use for testing.

Raises: RuntimeError: when the test ratio is not within the accepted min and max range.

def get_test_ratio(self) -> float:
48    def get_test_ratio(self) -> float:
49        """Get the test ratio used by the splitter when run.
50
51        Returns:
52            the test ratio
53        """
54        return self.test_ratio

Get the test ratio used by the splitter when run.

Returns: the test ratio

def run( self, dataframe: pandas.core.frame.DataFrame) -> Tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]:
56    def run(self, dataframe: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
57        """Run the splitter on the specified dataframe.
58
59        Args:
60            dataframe: with at least the 'user' column.
61
62        Returns:
63            the train and test set dataframes of the split.
64        """
65        raise NotImplementedError()

Run the splitter on the specified dataframe.

Args: dataframe: with at least the 'user' column.

Returns: the train and test set dataframes of the split.