Module secfsdstools.f_standardize.base_prepivot_rules
Contains PrePivotRules Definitions.
Expand source code
"""
Contains PrePivotRules Definitions.
"""
import pandas as pd
import pandera as pa
from secfsdstools.f_standardize.base_rule_framework import PrePivotRule
class PrePivotDeduplicate(PrePivotRule):
    """
    Deduplicates the dataset based on the index_cols that are defined in the base class.
    sometimes, only single tags are duplicated, however, there are also reports
    where all tags of a report are duplicated.
    without deduplication, the pivot command will fail.
    """
    def __init__(self):
        super().__init__("DeDup")
    def mask(self, data_df: pd.DataFrame) -> pa.typing.Series[bool]:
        """
            returns a Series[bool] which defines the rows to which this rule has to be applied.
        Args:
            data_df: dataframe on which the rules should be applied
        Returns:
            pa.typing.Series[bool]: a boolean Series that marks which rows have to be calculated
        """
        return data_df.duplicated(self.index_cols)
    def apply(self, data_df: pd.DataFrame, mask: pa.typing.Series[bool]):
        """
        apply the rule on the provided dataframe. the rows, on which the rule has to be applied
        is defined by the provide mask Series.
        Important, the rules have to be applied "in-place", so no new dataframe is produced.
        Args:
            df: dataframe on which the rule has to be applied
            mask: a Series marking the rows in the dataframe on which the rule has to be applied
        """
        data_df.drop(data_df[mask].index, inplace=True)
    def get_description(self) -> str:
        """
        Returns the description String
        Returns:
            str: description
        """
        return f"Deduplicates the dataframe based on the columns {self.index_cols + ['value']}"Classes
- class PrePivotDeduplicate
- 
Deduplicates the dataset based on the index_cols that are defined in the base class. sometimes, only single tags are duplicated, however, there are also reports where all tags of a report are duplicated. without deduplication, the pivot command will fail. Expand source codeclass PrePivotDeduplicate(PrePivotRule): """ Deduplicates the dataset based on the index_cols that are defined in the base class. sometimes, only single tags are duplicated, however, there are also reports where all tags of a report are duplicated. without deduplication, the pivot command will fail. """ def __init__(self): super().__init__("DeDup") def mask(self, data_df: pd.DataFrame) -> pa.typing.Series[bool]: """ returns a Series[bool] which defines the rows to which this rule has to be applied. Args: data_df: dataframe on which the rules should be applied Returns: pa.typing.Series[bool]: a boolean Series that marks which rows have to be calculated """ return data_df.duplicated(self.index_cols) def apply(self, data_df: pd.DataFrame, mask: pa.typing.Series[bool]): """ apply the rule on the provided dataframe. the rows, on which the rule has to be applied is defined by the provide mask Series. Important, the rules have to be applied "in-place", so no new dataframe is produced. Args: df: dataframe on which the rule has to be applied mask: a Series marking the rows in the dataframe on which the rule has to be applied """ data_df.drop(data_df[mask].index, inplace=True) def get_description(self) -> str: """ Returns the description String Returns: str: description """ return f"Deduplicates the dataframe based on the columns {self.index_cols + ['value']}"Ancestors- PrePivotRule
- AbstractRule
- RuleEntity
- abc.ABC
 Class variables- var identifier : str
 Methods- def apply(self, data_df: pandas.core.frame.DataFrame, mask: pandera.typing.pandas.Series[bool])
- 
apply the rule on the provided dataframe. the rows, on which the rule has to be applied is defined by the provide mask Series. Important, the rules have to be applied "in-place", so no new dataframe is produced. Args- df
- dataframe on which the rule has to be applied
- mask
- a Series marking the rows in the dataframe on which the rule has to be applied
 Expand source codedef apply(self, data_df: pd.DataFrame, mask: pa.typing.Series[bool]): """ apply the rule on the provided dataframe. the rows, on which the rule has to be applied is defined by the provide mask Series. Important, the rules have to be applied "in-place", so no new dataframe is produced. Args: df: dataframe on which the rule has to be applied mask: a Series marking the rows in the dataframe on which the rule has to be applied """ data_df.drop(data_df[mask].index, inplace=True)
 Inherited members