src.fairreckitlib.data.filter.categorical_filter

Module to filter on categories, like country or gender.

Classes:

CategoricalFilter: Filter the dataframe on categorical data, such as country or gender.

Functions:

create_categorical_filter: Create an instance of CategoricalFilter.

This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)

 1"""Module to filter on categories, like country or gender.
 2
 3Classes:
 4
 5    CategoricalFilter: Filter the dataframe on categorical data, such as country or gender.
 6
 7Functions:
 8
 9    create_categorical_filter: Create an instance of CategoricalFilter.
10
11This program has been developed by students from the bachelor Computer Science at
12Utrecht University within the Software Project course.
13© Copyright Utrecht University (Department of Information and Computing Sciences)
14"""
15
16from typing import Any, Dict, List
17import numpy
18import pandas as pd
19from .filter_constants import FILTER_CATEGORICAL
20from .base_filter import DataFilter
21
22
23class CategoricalFilter(DataFilter):
24    """Filter the dataframe on categorical data, such as country or gender.
25
26    Public method:
27        filter
28    """
29
30    def get_type(self) -> str:
31        """Get the type of the filter.
32
33        Returns:
34            The type name of the filter.
35        """
36        return FILTER_CATEGORICAL
37
38    def filter(self, dataframe: pd.DataFrame, column_name='',
39               conditions: List[Any]=None) -> pd.DataFrame:
40        """Filter on a list of categories.
41
42        Args:
43            dataframe: Dataframe to be filtered.
44            column_name (str): Name of the column where the conditions need to be met.
45            conditions (List[Any]): A list of values,
46                where values of the column_name in the resulting dataframe meet some condition.
47
48        Returns:
49            A filtered dataframe.
50        """
51        if column_name not in dataframe.columns:
52            return self.__empty_df__(dataframe)
53        conditions = self._handle_none_value(conditions)
54        df_filter = dataframe[column_name].isin(conditions)
55        return dataframe[df_filter].reset_index(drop=True)
56
57    def _filter(self, dataframe: pd.DataFrame) -> pd.DataFrame:
58        """Private filter used in run(). Requires configuration file."""
59        return self.filter(dataframe, self.get_name(), self.params['values'])
60
61    @staticmethod
62    def _handle_none_value(conditions: List[Any]):
63        """Change None value to empty value: numpy.NaN."""
64        if conditions is None:
65            return []
66        if None in conditions:
67            conditions.append(numpy.NaN)
68        return conditions
69
70
71def create_categorical_filter(name: str, params: Dict[str, Any], **kwargs) -> DataFilter:
72    """Create an instance of the class CategoricalFilter.
73
74    Args:
75        name: Name of the filter.
76        params: Configuration file.
77        **kwargs: Contains dataset and matrix_name.
78
79    Returns:
80        An instance of the CategoricalFilter class.
81    """
82    return CategoricalFilter(name, params, **kwargs)
class CategoricalFilter(src.fairreckitlib.data.filter.base_filter.DataFilter):
24class CategoricalFilter(DataFilter):
25    """Filter the dataframe on categorical data, such as country or gender.
26
27    Public method:
28        filter
29    """
30
31    def get_type(self) -> str:
32        """Get the type of the filter.
33
34        Returns:
35            The type name of the filter.
36        """
37        return FILTER_CATEGORICAL
38
39    def filter(self, dataframe: pd.DataFrame, column_name='',
40               conditions: List[Any]=None) -> pd.DataFrame:
41        """Filter on a list of categories.
42
43        Args:
44            dataframe: Dataframe to be filtered.
45            column_name (str): Name of the column where the conditions need to be met.
46            conditions (List[Any]): A list of values,
47                where values of the column_name in the resulting dataframe meet some condition.
48
49        Returns:
50            A filtered dataframe.
51        """
52        if column_name not in dataframe.columns:
53            return self.__empty_df__(dataframe)
54        conditions = self._handle_none_value(conditions)
55        df_filter = dataframe[column_name].isin(conditions)
56        return dataframe[df_filter].reset_index(drop=True)
57
58    def _filter(self, dataframe: pd.DataFrame) -> pd.DataFrame:
59        """Private filter used in run(). Requires configuration file."""
60        return self.filter(dataframe, self.get_name(), self.params['values'])
61
62    @staticmethod
63    def _handle_none_value(conditions: List[Any]):
64        """Change None value to empty value: numpy.NaN."""
65        if conditions is None:
66            return []
67        if None in conditions:
68            conditions.append(numpy.NaN)
69        return conditions

Filter the dataframe on categorical data, such as country or gender.

Public method: filter

def get_type(self) -> str:
31    def get_type(self) -> str:
32        """Get the type of the filter.
33
34        Returns:
35            The type name of the filter.
36        """
37        return FILTER_CATEGORICAL

Get the type of the filter.

Returns: The type name of the filter.

def filter( self, dataframe: pandas.core.frame.DataFrame, column_name='', conditions: List[Any] = None) -> pandas.core.frame.DataFrame:
39    def filter(self, dataframe: pd.DataFrame, column_name='',
40               conditions: List[Any]=None) -> pd.DataFrame:
41        """Filter on a list of categories.
42
43        Args:
44            dataframe: Dataframe to be filtered.
45            column_name (str): Name of the column where the conditions need to be met.
46            conditions (List[Any]): A list of values,
47                where values of the column_name in the resulting dataframe meet some condition.
48
49        Returns:
50            A filtered dataframe.
51        """
52        if column_name not in dataframe.columns:
53            return self.__empty_df__(dataframe)
54        conditions = self._handle_none_value(conditions)
55        df_filter = dataframe[column_name].isin(conditions)
56        return dataframe[df_filter].reset_index(drop=True)

Filter on a list of categories.

Args: dataframe: Dataframe to be filtered. column_name (str): Name of the column where the conditions need to be met. conditions (List[Any]): A list of values, where values of the column_name in the resulting dataframe meet some condition.

Returns: A filtered dataframe.

def create_categorical_filter( name: str, params: Dict[str, Any], **kwargs) -> src.fairreckitlib.data.filter.base_filter.DataFilter:
72def create_categorical_filter(name: str, params: Dict[str, Any], **kwargs) -> DataFilter:
73    """Create an instance of the class CategoricalFilter.
74
75    Args:
76        name: Name of the filter.
77        params: Configuration file.
78        **kwargs: Contains dataset and matrix_name.
79
80    Returns:
81        An instance of the CategoricalFilter class.
82    """
83    return CategoricalFilter(name, params, **kwargs)

Create an instance of the class CategoricalFilter.

Args: name: Name of the filter. params: Configuration file. **kwargs: Contains dataset and matrix_name.

Returns: An instance of the CategoricalFilter class.