src.fairreckitlib.data.filter.numerical_filter

Module to filter on numerical data, like age or rating.

Classes:

NumericalFilter: Filter the dataframe on numerical data, such as age or rating.

Functions:

create_numerical_filter: Create an instance of NumericalFilter.

This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)

 1"""Module to filter on numerical data, like age or rating.
 2
 3Classes:
 4
 5    NumericalFilter: Filter the dataframe on numerical data, such as age or rating.
 6
 7Functions:
 8
 9    create_numerical_filter: Create an instance of NumericalFilter.
10
11
12This program has been developed by students from the bachelor Computer Science at
13Utrecht University within the Software Project course.
14© Copyright Utrecht University (Department of Information and Computing Sciences)
15"""
16
17import math
18from typing import Any, Dict
19import pandas as pd
20from .base_filter import DataFilter
21from .filter_constants import FILTER_NUMERICAL
22
23class NumericalFilter(DataFilter):
24    """Filters the dataframe on numerical data, such as age or rating.
25
26    Public method:
27        filter
28    """
29
30    def get_type(self) -> str:
31        """Get the type of the filter.
32
33        Returns:
34            The type name of the filter.
35        """
36        return FILTER_NUMERICAL
37
38    def filter(self, dataframe: pd.DataFrame, column_name='',
39               min_val=0, max_val=math.inf) -> pd.DataFrame:
40        """Filter the dataframe on values in the range of min_val and max_val.
41
42        Args:
43            dataframe: Dataframe to be filtered on.
44            column_name (str): Name of the column where the conditions need to be met.
45            min_val (int | float): Minimal number (default 0).
46            max_val (int | float): Maximum number (default infinite).
47
48        Returns:
49            A filtered dataframe.
50        """
51        if column_name not in dataframe.columns:
52            return self.__empty_df__(dataframe)
53        df_filter = dataframe[column_name].between(min_val, max_val, inclusive="both")
54        return dataframe[df_filter].reset_index(drop=True)
55
56    def _filter(self, dataframe: pd.DataFrame) -> pd.DataFrame:
57        """Private filter used in run(). Requires configuration file."""
58        numerical_range = self.params['range']
59        return self.filter(dataframe, self.get_name(),
60                           numerical_range["min"], numerical_range["max"])
61
62    def _filter_empty(self, dataframe: pd.DataFrame) -> pd.DataFrame:
63        """Filter only the empty value: -1."""
64        return self.filter(dataframe, self.params['colum_name'], -1, -1)
65
66
67def create_numerical_filter(name: str,
68                            params: Dict[str, Any],
69                            **kwargs) -> DataFilter:
70    """Create an instance of the class NumericalFilter.
71
72    Args:
73        name: Name of the filter.
74        params: Configuration file.
75        **kwargs: Contains dataset and matrix_name.
76
77    Returns:
78        An instance of the NumericalFilter class.
79    """
80    return NumericalFilter(name, params, **kwargs)
class NumericalFilter(src.fairreckitlib.data.filter.base_filter.DataFilter):
24class NumericalFilter(DataFilter):
25    """Filters the dataframe on numerical data, such as age or rating.
26
27    Public method:
28        filter
29    """
30
31    def get_type(self) -> str:
32        """Get the type of the filter.
33
34        Returns:
35            The type name of the filter.
36        """
37        return FILTER_NUMERICAL
38
39    def filter(self, dataframe: pd.DataFrame, column_name='',
40               min_val=0, max_val=math.inf) -> pd.DataFrame:
41        """Filter the dataframe on values in the range of min_val and max_val.
42
43        Args:
44            dataframe: Dataframe to be filtered on.
45            column_name (str): Name of the column where the conditions need to be met.
46            min_val (int | float): Minimal number (default 0).
47            max_val (int | float): Maximum number (default infinite).
48
49        Returns:
50            A filtered dataframe.
51        """
52        if column_name not in dataframe.columns:
53            return self.__empty_df__(dataframe)
54        df_filter = dataframe[column_name].between(min_val, max_val, inclusive="both")
55        return dataframe[df_filter].reset_index(drop=True)
56
57    def _filter(self, dataframe: pd.DataFrame) -> pd.DataFrame:
58        """Private filter used in run(). Requires configuration file."""
59        numerical_range = self.params['range']
60        return self.filter(dataframe, self.get_name(),
61                           numerical_range["min"], numerical_range["max"])
62
63    def _filter_empty(self, dataframe: pd.DataFrame) -> pd.DataFrame:
64        """Filter only the empty value: -1."""
65        return self.filter(dataframe, self.params['colum_name'], -1, -1)

Filters the dataframe on numerical data, such as age or rating.

Public method: filter

def get_type(self) -> str:
31    def get_type(self) -> str:
32        """Get the type of the filter.
33
34        Returns:
35            The type name of the filter.
36        """
37        return FILTER_NUMERICAL

Get the type of the filter.

Returns: The type name of the filter.

def filter( self, dataframe: pandas.core.frame.DataFrame, column_name='', min_val=0, max_val=inf) -> pandas.core.frame.DataFrame:
39    def filter(self, dataframe: pd.DataFrame, column_name='',
40               min_val=0, max_val=math.inf) -> pd.DataFrame:
41        """Filter the dataframe on values in the range of min_val and max_val.
42
43        Args:
44            dataframe: Dataframe to be filtered on.
45            column_name (str): Name of the column where the conditions need to be met.
46            min_val (int | float): Minimal number (default 0).
47            max_val (int | float): Maximum number (default infinite).
48
49        Returns:
50            A filtered dataframe.
51        """
52        if column_name not in dataframe.columns:
53            return self.__empty_df__(dataframe)
54        df_filter = dataframe[column_name].between(min_val, max_val, inclusive="both")
55        return dataframe[df_filter].reset_index(drop=True)

Filter the dataframe on values in the range of min_val and max_val.

Args: dataframe: Dataframe to be filtered on. column_name (str): Name of the column where the conditions need to be met. min_val (int | float): Minimal number (default 0). max_val (int | float): Maximum number (default infinite).

Returns: A filtered dataframe.

def create_numerical_filter( name: str, params: Dict[str, Any], **kwargs) -> src.fairreckitlib.data.filter.base_filter.DataFilter:
68def create_numerical_filter(name: str,
69                            params: Dict[str, Any],
70                            **kwargs) -> DataFilter:
71    """Create an instance of the class NumericalFilter.
72
73    Args:
74        name: Name of the filter.
75        params: Configuration file.
76        **kwargs: Contains dataset and matrix_name.
77
78    Returns:
79        An instance of the NumericalFilter class.
80    """
81    return NumericalFilter(name, params, **kwargs)

Create an instance of the class NumericalFilter.

Args: name: Name of the filter. params: Configuration file. **kwargs: Contains dataset and matrix_name.

Returns: An instance of the NumericalFilter class.