Module `secfsdstools.e_filter.rawfiltering`

This module contains some basic filter implementations on the RawDataBag.

Note: the filters don't create new copies of the pandas dataset

Expand source code

"""
This module contains some basic filter implementations on the RawDataBag.

Note: the filters don't create new copies of the pandas dataset
"""
from typing import List

from secfsdstools.a_utils.basic import calculate_previous_period
from secfsdstools.d_container.databagmodel import RawDataBag
from secfsdstools.d_container.filter import FilterBase


class AdshRawFilter(FilterBase[RawDataBag]):
    """
    Filters the data by a list of adshs. This filter operates on the sub, pre_df and the num_df.
    """

    def __init__(self, adshs: List[str]):
        self.adshs = adshs

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        filters the databag so that only datapoints of reports defined by the adshs list
        are contained.

        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data
        """
        sub_filtered_for_adshs = databag.sub_df[databag.sub_df.adsh.isin(self.adshs)]
        pre_filtered_for_adshs = databag.pre_df[databag.pre_df.adsh.isin(self.adshs)]
        num_filtered_for_adshs = databag.num_df[databag.num_df.adsh.isin(self.adshs)]

        return RawDataBag.create(sub_df=sub_filtered_for_adshs,
                                 pre_df=pre_filtered_for_adshs,
                                 num_df=num_filtered_for_adshs)


class StmtRawFilter(FilterBase[RawDataBag]):
    """
    Filters the data by a list of statement type (BS, IS, CF, ...).
    This filter operates on the pre_df.
    """

    def __init__(self, stmts: List[str]):
        self.stmts = stmts

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        filters the databag so that only datapoints of reports defined by the adshs list
        are contained.

        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data
        """
        pre_filtered_for_stmts = databag.pre_df[databag.pre_df.stmt.isin(self.stmts)]

        return RawDataBag.create(sub_df=databag.sub_df,
                                 pre_df=pre_filtered_for_stmts,
                                 num_df=databag.num_df)


class ReportPeriodRawFilter(FilterBase[RawDataBag]):
    """
    Filters the data so that only datapoints are contained which ddate-attribute equals the
    period date of the report. Therefore, the filter operates on the num_df dataframe.
    """

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        filter the databag so that only datapoints are contained which have a ddate-attribute
        that equals the period-attribute of the report.
        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data
        """

        adsh_period_map = \
            databag.sub_df[['adsh', 'period']].set_index('adsh').to_dict()['period']

        mask = databag.num_df['adsh'].map(adsh_period_map) == databag.num_df['ddate']
        num_filtered_for_ddates = databag.num_df[mask]

        return RawDataBag.create(sub_df=databag.sub_df,
                                 pre_df=databag.pre_df,
                                 num_df=num_filtered_for_ddates)


class ReportPeriodAndPreviousPeriodRawFilter(FilterBase[RawDataBag]):
    """
    Filters the data so that only datapoints are contained which ddate-attribute equals the
    period date of the report or the period date of the previous (a year ago) report.
    Therefore, the filter operates on the num_df dataframe.
    """

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        filter the databag so that only datapoints are contained which have a ddate-attribute
        that equals the period-attribute of the report or the period of the previous (a year ago)
        report.
        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data
        """

        adsh_period_map = \
            databag.sub_df[['adsh', 'period']].set_index('adsh').to_dict()['period']

        # caculate the dates for the previous year
        adsh_previous_period_map = {adsh: calculate_previous_period(period)
                                    for adsh, period in adsh_period_map.items()}

        mask = (databag.num_df['adsh'].map(adsh_period_map) == databag.num_df['ddate']) | \
               (databag.num_df['adsh'].map(adsh_previous_period_map) == databag.num_df['ddate'])

        num_filtered_for_ddates = databag.num_df[mask]

        return RawDataBag.create(sub_df=databag.sub_df,
                                 pre_df=databag.pre_df,
                                 num_df=num_filtered_for_ddates)


class TagRawFilter(FilterBase[RawDataBag]):
    """
    Filters the data by a list of tags. This filter operates on the pre_df and the num_df.
    """

    def __init__(self, tags: List[str]):
        self.tags = tags

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        filters the databag so that only datapoints are contained which have a tag-attribute
        that is in the provided list.
        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data
        """
        pre_filtered_for_tags = databag.pre_df[databag.pre_df.tag.isin(self.tags)]
        num_filtered_for_tags = databag.num_df[databag.num_df.tag.isin(self.tags)]

        return RawDataBag.create(sub_df=databag.sub_df,
                                 pre_df=pre_filtered_for_tags,
                                 num_df=num_filtered_for_tags)


class MainCoregRawFilter(FilterBase[RawDataBag]):
    """
    Filters only for the main coreg entries (coreg == '')
    """

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        filters the databag so that only the main coreg entries are contained
        (no data subsidiaries).
        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data
        """
        num_filtered_for_main_coreg = databag.num_df[databag.num_df.coreg == '']

        return RawDataBag.create(sub_df=databag.sub_df,
                                 pre_df=databag.pre_df,
                                 num_df=num_filtered_for_main_coreg)


class OfficialTagsOnlyRawFilter(FilterBase[RawDataBag]):
    """
    Filters only the official tags. These are the tags that contain an official XBRL version
    within the version column. "inofficial" (resp. company specific) tags are identified with
    the version column containing the value of the adsh.
    """

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        filters the databag so that official tags are contained.

        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data
        """
        # using isin is performant, so we just make sure to filter the rows
        # which do not have an adsh as version
        pre_filtered_for_tags = databag.pre_df[~databag.pre_df.version.isin(databag.sub_df.adsh)]
        num_filtered_for_tags = databag.num_df[~databag.num_df.version.isin(databag.sub_df.adsh)]

        return RawDataBag.create(sub_df=databag.sub_df,
                                 pre_df=pre_filtered_for_tags,
                                 num_df=num_filtered_for_tags)


class USDOnlyRawFilter(FilterBase[RawDataBag]):
    """
    Removes all entries which have a currency in the column uom that is not USD.
    """

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        Removes all currency entries in the uom colum of the num_df that are not USD.

        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data

        """

        # currency is always in uppercase, so if it is not all uppercase, it is not a currency
        mask_has_lower = ~databag.num_df.uom.str.isupper()

        # currency is always 3 letters
        mask_is_none_currency = databag.num_df.uom.str.len() != 3

        # keep USD
        mask_usd_only = databag.num_df.uom == "USD"

        num_filtered_for_usd = \
            databag.num_df[mask_has_lower | mask_is_none_currency | mask_usd_only]

        return RawDataBag.create(sub_df=databag.sub_df,
                                 pre_df=databag.pre_df,
                                 num_df=num_filtered_for_usd)

Classes

class AdshRawFilter (adshs: List[str])

Filters the data by a list of adshs. This filter operates on the sub, pre_df and the num_df.

Expand source code

class AdshRawFilter(FilterBase[RawDataBag]):
    """
    Filters the data by a list of adshs. This filter operates on the sub, pre_df and the num_df.
    """

    def __init__(self, adshs: List[str]):
        self.adshs = adshs

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        filters the databag so that only datapoints of reports defined by the adshs list
        are contained.

        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data
        """
        sub_filtered_for_adshs = databag.sub_df[databag.sub_df.adsh.isin(self.adshs)]
        pre_filtered_for_adshs = databag.pre_df[databag.pre_df.adsh.isin(self.adshs)]
        num_filtered_for_adshs = databag.num_df[databag.num_df.adsh.isin(self.adshs)]

        return RawDataBag.create(sub_df=sub_filtered_for_adshs,
                                 pre_df=pre_filtered_for_adshs,
                                 num_df=num_filtered_for_adshs)

Ancestors

FilterBase
typing.Generic

Methods

def filter(self, databag: RawDataBag) ‑> RawDataBag

filters the databag so that only datapoints of reports defined by the adshs list are contained.

Args

databag(RawDataBag) : rawdatabag to apply the filter to

Returns

RawDataBag: the databag with the filtered data

Expand source code

def filter(self, databag: RawDataBag) -> RawDataBag:
    """
    filters the databag so that only datapoints of reports defined by the adshs list
    are contained.

    Args:
        databag(RawDataBag) : rawdatabag to apply the filter to

    Returns:
        RawDataBag: the databag with the filtered data
    """
    sub_filtered_for_adshs = databag.sub_df[databag.sub_df.adsh.isin(self.adshs)]
    pre_filtered_for_adshs = databag.pre_df[databag.pre_df.adsh.isin(self.adshs)]
    num_filtered_for_adshs = databag.num_df[databag.num_df.adsh.isin(self.adshs)]

    return RawDataBag.create(sub_df=sub_filtered_for_adshs,
                             pre_df=pre_filtered_for_adshs,
                             num_df=num_filtered_for_adshs)

class MainCoregRawFilter (*args, **kwds)

Filters only for the main coreg entries (coreg == '')

Expand source code

class MainCoregRawFilter(FilterBase[RawDataBag]):
    """
    Filters only for the main coreg entries (coreg == '')
    """

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        filters the databag so that only the main coreg entries are contained
        (no data subsidiaries).
        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data
        """
        num_filtered_for_main_coreg = databag.num_df[databag.num_df.coreg == '']

        return RawDataBag.create(sub_df=databag.sub_df,
                                 pre_df=databag.pre_df,
                                 num_df=num_filtered_for_main_coreg)

Ancestors

FilterBase
typing.Generic

Methods

def filter(self, databag: RawDataBag) ‑> RawDataBag

filters the databag so that only the main coreg entries are contained (no data subsidiaries).

Args

databag(RawDataBag) : rawdatabag to apply the filter to

Returns

RawDataBag: the databag with the filtered data

Expand source code

def filter(self, databag: RawDataBag) -> RawDataBag:
    """
    filters the databag so that only the main coreg entries are contained
    (no data subsidiaries).
    Args:
        databag(RawDataBag) : rawdatabag to apply the filter to

    Returns:
        RawDataBag: the databag with the filtered data
    """
    num_filtered_for_main_coreg = databag.num_df[databag.num_df.coreg == '']

    return RawDataBag.create(sub_df=databag.sub_df,
                             pre_df=databag.pre_df,
                             num_df=num_filtered_for_main_coreg)

class OfficialTagsOnlyRawFilter (*args, **kwds)

Filters only the official tags. These are the tags that contain an official XBRL version within the version column. "inofficial" (resp. company specific) tags are identified with the version column containing the value of the adsh.

Expand source code

class OfficialTagsOnlyRawFilter(FilterBase[RawDataBag]):
    """
    Filters only the official tags. These are the tags that contain an official XBRL version
    within the version column. "inofficial" (resp. company specific) tags are identified with
    the version column containing the value of the adsh.
    """

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        filters the databag so that official tags are contained.

        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data
        """
        # using isin is performant, so we just make sure to filter the rows
        # which do not have an adsh as version
        pre_filtered_for_tags = databag.pre_df[~databag.pre_df.version.isin(databag.sub_df.adsh)]
        num_filtered_for_tags = databag.num_df[~databag.num_df.version.isin(databag.sub_df.adsh)]

        return RawDataBag.create(sub_df=databag.sub_df,
                                 pre_df=pre_filtered_for_tags,
                                 num_df=num_filtered_for_tags)

Ancestors

FilterBase
typing.Generic

Methods

def filter(self, databag: RawDataBag) ‑> RawDataBag

filters the databag so that official tags are contained.

Args

databag(RawDataBag) : rawdatabag to apply the filter to

Returns

RawDataBag: the databag with the filtered data

Expand source code

def filter(self, databag: RawDataBag) -> RawDataBag:
    """
    filters the databag so that official tags are contained.

    Args:
        databag(RawDataBag) : rawdatabag to apply the filter to

    Returns:
        RawDataBag: the databag with the filtered data
    """
    # using isin is performant, so we just make sure to filter the rows
    # which do not have an adsh as version
    pre_filtered_for_tags = databag.pre_df[~databag.pre_df.version.isin(databag.sub_df.adsh)]
    num_filtered_for_tags = databag.num_df[~databag.num_df.version.isin(databag.sub_df.adsh)]

    return RawDataBag.create(sub_df=databag.sub_df,
                             pre_df=pre_filtered_for_tags,
                             num_df=num_filtered_for_tags)

class ReportPeriodAndPreviousPeriodRawFilter (*args, **kwds)

Filters the data so that only datapoints are contained which ddate-attribute equals the period date of the report or the period date of the previous (a year ago) report. Therefore, the filter operates on the num_df dataframe.

Expand source code

class ReportPeriodAndPreviousPeriodRawFilter(FilterBase[RawDataBag]):
    """
    Filters the data so that only datapoints are contained which ddate-attribute equals the
    period date of the report or the period date of the previous (a year ago) report.
    Therefore, the filter operates on the num_df dataframe.
    """

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        filter the databag so that only datapoints are contained which have a ddate-attribute
        that equals the period-attribute of the report or the period of the previous (a year ago)
        report.
        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data
        """

        adsh_period_map = \
            databag.sub_df[['adsh', 'period']].set_index('adsh').to_dict()['period']

        # caculate the dates for the previous year
        adsh_previous_period_map = {adsh: calculate_previous_period(period)
                                    for adsh, period in adsh_period_map.items()}

        mask = (databag.num_df['adsh'].map(adsh_period_map) == databag.num_df['ddate']) | \
               (databag.num_df['adsh'].map(adsh_previous_period_map) == databag.num_df['ddate'])

        num_filtered_for_ddates = databag.num_df[mask]

        return RawDataBag.create(sub_df=databag.sub_df,
                                 pre_df=databag.pre_df,
                                 num_df=num_filtered_for_ddates)

Ancestors

FilterBase
typing.Generic

Methods

def filter(self, databag: RawDataBag) ‑> RawDataBag

filter the databag so that only datapoints are contained which have a ddate-attribute that equals the period-attribute of the report or the period of the previous (a year ago) report.

Args

databag(RawDataBag) : rawdatabag to apply the filter to

Returns

RawDataBag: the databag with the filtered data

Expand source code

def filter(self, databag: RawDataBag) -> RawDataBag:
    """
    filter the databag so that only datapoints are contained which have a ddate-attribute
    that equals the period-attribute of the report or the period of the previous (a year ago)
    report.
    Args:
        databag(RawDataBag) : rawdatabag to apply the filter to

    Returns:
        RawDataBag: the databag with the filtered data
    """

    adsh_period_map = \
        databag.sub_df[['adsh', 'period']].set_index('adsh').to_dict()['period']

    # caculate the dates for the previous year
    adsh_previous_period_map = {adsh: calculate_previous_period(period)
                                for adsh, period in adsh_period_map.items()}

    mask = (databag.num_df['adsh'].map(adsh_period_map) == databag.num_df['ddate']) | \
           (databag.num_df['adsh'].map(adsh_previous_period_map) == databag.num_df['ddate'])

    num_filtered_for_ddates = databag.num_df[mask]

    return RawDataBag.create(sub_df=databag.sub_df,
                             pre_df=databag.pre_df,
                             num_df=num_filtered_for_ddates)

class ReportPeriodRawFilter (*args, **kwds)

Filters the data so that only datapoints are contained which ddate-attribute equals the period date of the report. Therefore, the filter operates on the num_df dataframe.

Expand source code

class ReportPeriodRawFilter(FilterBase[RawDataBag]):
    """
    Filters the data so that only datapoints are contained which ddate-attribute equals the
    period date of the report. Therefore, the filter operates on the num_df dataframe.
    """

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        filter the databag so that only datapoints are contained which have a ddate-attribute
        that equals the period-attribute of the report.
        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data
        """

        adsh_period_map = \
            databag.sub_df[['adsh', 'period']].set_index('adsh').to_dict()['period']

        mask = databag.num_df['adsh'].map(adsh_period_map) == databag.num_df['ddate']
        num_filtered_for_ddates = databag.num_df[mask]

        return RawDataBag.create(sub_df=databag.sub_df,
                                 pre_df=databag.pre_df,
                                 num_df=num_filtered_for_ddates)

Ancestors

FilterBase
typing.Generic

Methods

def filter(self, databag: RawDataBag) ‑> RawDataBag

filter the databag so that only datapoints are contained which have a ddate-attribute that equals the period-attribute of the report.

Args

databag(RawDataBag) : rawdatabag to apply the filter to

Returns

RawDataBag: the databag with the filtered data

Expand source code

def filter(self, databag: RawDataBag) -> RawDataBag:
    """
    filter the databag so that only datapoints are contained which have a ddate-attribute
    that equals the period-attribute of the report.
    Args:
        databag(RawDataBag) : rawdatabag to apply the filter to

    Returns:
        RawDataBag: the databag with the filtered data
    """

    adsh_period_map = \
        databag.sub_df[['adsh', 'period']].set_index('adsh').to_dict()['period']

    mask = databag.num_df['adsh'].map(adsh_period_map) == databag.num_df['ddate']
    num_filtered_for_ddates = databag.num_df[mask]

    return RawDataBag.create(sub_df=databag.sub_df,
                             pre_df=databag.pre_df,
                             num_df=num_filtered_for_ddates)

class StmtRawFilter (stmts: List[str])

Filters the data by a list of statement type (BS, IS, CF, …). This filter operates on the pre_df.

Expand source code

class StmtRawFilter(FilterBase[RawDataBag]):
    """
    Filters the data by a list of statement type (BS, IS, CF, ...).
    This filter operates on the pre_df.
    """

    def __init__(self, stmts: List[str]):
        self.stmts = stmts

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        filters the databag so that only datapoints of reports defined by the adshs list
        are contained.

        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data
        """
        pre_filtered_for_stmts = databag.pre_df[databag.pre_df.stmt.isin(self.stmts)]

        return RawDataBag.create(sub_df=databag.sub_df,
                                 pre_df=pre_filtered_for_stmts,
                                 num_df=databag.num_df)

Ancestors

FilterBase
typing.Generic

Methods

def filter(self, databag: RawDataBag) ‑> RawDataBag

filters the databag so that only datapoints of reports defined by the adshs list are contained.

Args

databag(RawDataBag) : rawdatabag to apply the filter to

Returns

RawDataBag: the databag with the filtered data

Expand source code

def filter(self, databag: RawDataBag) -> RawDataBag:
    """
    filters the databag so that only datapoints of reports defined by the adshs list
    are contained.

    Args:
        databag(RawDataBag) : rawdatabag to apply the filter to

    Returns:
        RawDataBag: the databag with the filtered data
    """
    pre_filtered_for_stmts = databag.pre_df[databag.pre_df.stmt.isin(self.stmts)]

    return RawDataBag.create(sub_df=databag.sub_df,
                             pre_df=pre_filtered_for_stmts,
                             num_df=databag.num_df)

class TagRawFilter (tags: List[str])

Filters the data by a list of tags. This filter operates on the pre_df and the num_df.

Expand source code

class TagRawFilter(FilterBase[RawDataBag]):
    """
    Filters the data by a list of tags. This filter operates on the pre_df and the num_df.
    """

    def __init__(self, tags: List[str]):
        self.tags = tags

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        filters the databag so that only datapoints are contained which have a tag-attribute
        that is in the provided list.
        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data
        """
        pre_filtered_for_tags = databag.pre_df[databag.pre_df.tag.isin(self.tags)]
        num_filtered_for_tags = databag.num_df[databag.num_df.tag.isin(self.tags)]

        return RawDataBag.create(sub_df=databag.sub_df,
                                 pre_df=pre_filtered_for_tags,
                                 num_df=num_filtered_for_tags)

Ancestors

FilterBase
typing.Generic

Methods

def filter(self, databag: RawDataBag) ‑> RawDataBag

filters the databag so that only datapoints are contained which have a tag-attribute that is in the provided list.

Args

databag(RawDataBag) : rawdatabag to apply the filter to

Returns

RawDataBag: the databag with the filtered data

Expand source code

def filter(self, databag: RawDataBag) -> RawDataBag:
    """
    filters the databag so that only datapoints are contained which have a tag-attribute
    that is in the provided list.
    Args:
        databag(RawDataBag) : rawdatabag to apply the filter to

    Returns:
        RawDataBag: the databag with the filtered data
    """
    pre_filtered_for_tags = databag.pre_df[databag.pre_df.tag.isin(self.tags)]
    num_filtered_for_tags = databag.num_df[databag.num_df.tag.isin(self.tags)]

    return RawDataBag.create(sub_df=databag.sub_df,
                             pre_df=pre_filtered_for_tags,
                             num_df=num_filtered_for_tags)

class USDOnlyRawFilter (*args, **kwds)

Removes all entries which have a currency in the column uom that is not USD.

Expand source code

class USDOnlyRawFilter(FilterBase[RawDataBag]):
    """
    Removes all entries which have a currency in the column uom that is not USD.
    """

    def filter(self, databag: RawDataBag) -> RawDataBag:
        """
        Removes all currency entries in the uom colum of the num_df that are not USD.

        Args:
            databag(RawDataBag) : rawdatabag to apply the filter to

        Returns:
            RawDataBag: the databag with the filtered data

        """

        # currency is always in uppercase, so if it is not all uppercase, it is not a currency
        mask_has_lower = ~databag.num_df.uom.str.isupper()

        # currency is always 3 letters
        mask_is_none_currency = databag.num_df.uom.str.len() != 3

        # keep USD
        mask_usd_only = databag.num_df.uom == "USD"

        num_filtered_for_usd = \
            databag.num_df[mask_has_lower | mask_is_none_currency | mask_usd_only]

        return RawDataBag.create(sub_df=databag.sub_df,
                                 pre_df=databag.pre_df,
                                 num_df=num_filtered_for_usd)

Ancestors

FilterBase
typing.Generic

Methods

def filter(self, databag: RawDataBag) ‑> RawDataBag

Removes all currency entries in the uom colum of the num_df that are not USD.

Args

databag(RawDataBag) : rawdatabag to apply the filter to

Returns

RawDataBag: the databag with the filtered data

Expand source code

def filter(self, databag: RawDataBag) -> RawDataBag:
    """
    Removes all currency entries in the uom colum of the num_df that are not USD.

    Args:
        databag(RawDataBag) : rawdatabag to apply the filter to

    Returns:
        RawDataBag: the databag with the filtered data

    """

    # currency is always in uppercase, so if it is not all uppercase, it is not a currency
    mask_has_lower = ~databag.num_df.uom.str.isupper()

    # currency is always 3 letters
    mask_is_none_currency = databag.num_df.uom.str.len() != 3

    # keep USD
    mask_usd_only = databag.num_df.uom == "USD"

    num_filtered_for_usd = \
        databag.num_df[mask_has_lower | mask_is_none_currency | mask_usd_only]

    return RawDataBag.create(sub_df=databag.sub_df,
                             pre_df=databag.pre_df,
                             num_df=num_filtered_for_usd)