src.fairreckitlib.data.filter.base_filter

Module that provides a base for all three types of filters: Numerical, Categorical, Count.

Classes:

DataFilter: Base filter class.

This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)

  1"""Module that provides a base for all three types of filters: Numerical, Categorical, Count.
  2
  3Classes:
  4
  5    DataFilter: Base filter class.
  6
  7This program has been developed by students from the bachelor Computer Science at
  8Utrecht University within the Software Project course.
  9© Copyright Utrecht University (Department of Information and Computing Sciences)
 10"""
 11
 12from abc import ABCMeta, abstractmethod
 13from typing import Any, Dict
 14
 15import pandas as pd
 16
 17from ..set import dataset as ds
 18from ..data_modifier import DataModifier
 19
 20
 21class DataFilter(DataModifier, metaclass=ABCMeta):
 22    """Base class to filter a df (not a dataframe in particular).
 23
 24    Public method:
 25        run
 26    """
 27
 28    def __init__(self, name: str, params: Dict[str, Any], **kwargs):
 29        """Make Constructor of the class.
 30
 31        Uses optional arguments to enable sole use of subclass.filter().
 32
 33        Args:
 34            name: Configuration name of the filter.
 35            params: Configuration parameters.
 36        """
 37        DataModifier.__init__(self, name, params)
 38        self.dataset = kwargs['dataset']
 39        self.matrix_name = kwargs['matrix_name']
 40
 41    def run(self, dataframe: pd.DataFrame) -> pd.DataFrame:
 42        """Carry out the filtering.
 43
 44        Args:
 45            dataframe: Dataframe to be filtered on.
 46
 47        Return:
 48            The filtered dataframe.
 49        """
 50        # Filtering that requires external columns i.e., filter column not available in dataframe.
 51        return self._external_col_filter(dataframe)
 52
 53    @abstractmethod
 54    def get_type(self) -> str:
 55        """Get the type of the filter.
 56
 57        Returns:
 58            The type name of the filter.
 59        """
 60        raise NotImplementedError()
 61
 62    @abstractmethod
 63    def _filter(self, dataframe: pd.DataFrame) -> pd.DataFrame:
 64        """Sugar coats subclasses' filter() for run and _external_col_filter as sugar.
 65
 66        Raises:
 67            NotImplementedError: This method should be implemented in the subclasses.
 68        """
 69        raise NotImplementedError()
 70
 71    def _external_col_filter(self, dataframe: pd.DataFrame) -> pd.DataFrame:
 72        """When filter needs a column from some dataset table located elsewhere.
 73
 74        Args:
 75            dataframe: The dataframe to be filtered.
 76
 77        Returns:
 78            A filtered dataframe.
 79        """
 80        # Add required columns
 81        og_cols = dataframe.columns
 82        new_dataframe = ds.add_dataset_columns(
 83            self.dataset, self.matrix_name, dataframe, [self.get_name()])
 84        new_cols = new_dataframe.columns
 85
 86        new_dataframe = self._filter(new_dataframe)
 87
 88        # Remove columns not in original dataframe
 89        for new_col in new_cols:
 90            if new_col not in og_cols:
 91                new_dataframe = new_dataframe.drop([new_col], axis=1, errors='ignore')
 92        return new_dataframe
 93
 94    @staticmethod
 95    def __empty_df__(dataframe: pd.DataFrame) -> pd.DataFrame:
 96        """Return an empty dataframe with same columns."""
 97        return dataframe.iloc[:0,:].copy()
 98
 99    def __str__(self):
100        """To string.
101
102        Returns:
103            The name of the class.
104        """
105        return self.__class__.__name__
class DataFilter(src.fairreckitlib.data.data_modifier.DataModifier):
 22class DataFilter(DataModifier, metaclass=ABCMeta):
 23    """Base class to filter a df (not a dataframe in particular).
 24
 25    Public method:
 26        run
 27    """
 28
 29    def __init__(self, name: str, params: Dict[str, Any], **kwargs):
 30        """Make Constructor of the class.
 31
 32        Uses optional arguments to enable sole use of subclass.filter().
 33
 34        Args:
 35            name: Configuration name of the filter.
 36            params: Configuration parameters.
 37        """
 38        DataModifier.__init__(self, name, params)
 39        self.dataset = kwargs['dataset']
 40        self.matrix_name = kwargs['matrix_name']
 41
 42    def run(self, dataframe: pd.DataFrame) -> pd.DataFrame:
 43        """Carry out the filtering.
 44
 45        Args:
 46            dataframe: Dataframe to be filtered on.
 47
 48        Return:
 49            The filtered dataframe.
 50        """
 51        # Filtering that requires external columns i.e., filter column not available in dataframe.
 52        return self._external_col_filter(dataframe)
 53
 54    @abstractmethod
 55    def get_type(self) -> str:
 56        """Get the type of the filter.
 57
 58        Returns:
 59            The type name of the filter.
 60        """
 61        raise NotImplementedError()
 62
 63    @abstractmethod
 64    def _filter(self, dataframe: pd.DataFrame) -> pd.DataFrame:
 65        """Sugar coats subclasses' filter() for run and _external_col_filter as sugar.
 66
 67        Raises:
 68            NotImplementedError: This method should be implemented in the subclasses.
 69        """
 70        raise NotImplementedError()
 71
 72    def _external_col_filter(self, dataframe: pd.DataFrame) -> pd.DataFrame:
 73        """When filter needs a column from some dataset table located elsewhere.
 74
 75        Args:
 76            dataframe: The dataframe to be filtered.
 77
 78        Returns:
 79            A filtered dataframe.
 80        """
 81        # Add required columns
 82        og_cols = dataframe.columns
 83        new_dataframe = ds.add_dataset_columns(
 84            self.dataset, self.matrix_name, dataframe, [self.get_name()])
 85        new_cols = new_dataframe.columns
 86
 87        new_dataframe = self._filter(new_dataframe)
 88
 89        # Remove columns not in original dataframe
 90        for new_col in new_cols:
 91            if new_col not in og_cols:
 92                new_dataframe = new_dataframe.drop([new_col], axis=1, errors='ignore')
 93        return new_dataframe
 94
 95    @staticmethod
 96    def __empty_df__(dataframe: pd.DataFrame) -> pd.DataFrame:
 97        """Return an empty dataframe with same columns."""
 98        return dataframe.iloc[:0,:].copy()
 99
100    def __str__(self):
101        """To string.
102
103        Returns:
104            The name of the class.
105        """
106        return self.__class__.__name__

Base class to filter a df (not a dataframe in particular).

Public method: run

DataFilter(name: str, params: Dict[str, Any], **kwargs)
29    def __init__(self, name: str, params: Dict[str, Any], **kwargs):
30        """Make Constructor of the class.
31
32        Uses optional arguments to enable sole use of subclass.filter().
33
34        Args:
35            name: Configuration name of the filter.
36            params: Configuration parameters.
37        """
38        DataModifier.__init__(self, name, params)
39        self.dataset = kwargs['dataset']
40        self.matrix_name = kwargs['matrix_name']

Make Constructor of the class.

Uses optional arguments to enable sole use of subclass.filter().

Args: name: Configuration name of the filter. params: Configuration parameters.

def run( self, dataframe: pandas.core.frame.DataFrame) -> pandas.core.frame.DataFrame:
42    def run(self, dataframe: pd.DataFrame) -> pd.DataFrame:
43        """Carry out the filtering.
44
45        Args:
46            dataframe: Dataframe to be filtered on.
47
48        Return:
49            The filtered dataframe.
50        """
51        # Filtering that requires external columns i.e., filter column not available in dataframe.
52        return self._external_col_filter(dataframe)

Carry out the filtering.

Args: dataframe: Dataframe to be filtered on.

Return: The filtered dataframe.

@abstractmethod
def get_type(self) -> str:
54    @abstractmethod
55    def get_type(self) -> str:
56        """Get the type of the filter.
57
58        Returns:
59            The type name of the filter.
60        """
61        raise NotImplementedError()

Get the type of the filter.

Returns: The type name of the filter.