Module secfsdstools.e_filter.joinedfiltering
This module contains some basic pathfilter implementations on the JoinedDataBag.
Note: the filters don't create new copies of the pandas dataset
Expand source code
"""
This module contains some basic pathfilter implementations on the JoinedDataBag.
Note: the filters don't create new copies of the pandas dataset
"""
from typing import List
from secfsdstools.a_utils.basic import calculate_previous_period
from secfsdstools.d_container.databagmodel import JoinedDataBag
from secfsdstools.d_container.filter import FilterBase
class AdshJoinedFilter(FilterBase[JoinedDataBag]):
"""
Filters the data by a list of adshs. This pathfilter operates on the sub, pre_df and the num_df.
"""
def __init__(self, adshs: List[str]):
self.adshs = adshs
def filter(self, databag: JoinedDataBag) -> JoinedDataBag:
"""
filters the databag so that only datapoints of reports defined by the adshs list
are contained.
Args:
databag(JoinedDataBag) : databag to apply the pathfilter to
Returns:
JoinedDataBag: the databag with the filtered data
"""
sub_filtered_for_adshs = databag.sub_df[databag.sub_df.adsh.isin(self.adshs)]
pre_num_filtered_for_adshs = databag.pre_num_df[databag.pre_num_df.adsh.isin(self.adshs)]
return JoinedDataBag.create(sub_df=sub_filtered_for_adshs,
pre_num_df=pre_num_filtered_for_adshs)
class StmtJoinedFilter(FilterBase[JoinedDataBag]):
"""
Filters the data by a list of statement type (BS, IS, CF, ...).
This pathfilter operates on the pre_df.
"""
def __init__(self, stmts: List[str]):
self.stmts = stmts
def filter(self, databag: JoinedDataBag) -> JoinedDataBag:
"""
filters the databag so that only datapoints of reports defined by the adshs list
are contained.
Args:
databag(JoinedDataBag) : Joineddatabag to apply the pathfilter to
Returns:
JoinedDataBag: the databag with the filtered data
"""
pre_num_filtered_for_stmts = databag.pre_num_df[databag.pre_num_df.stmt.isin(self.stmts)]
return JoinedDataBag.create(sub_df=databag.sub_df,
pre_num_df=pre_num_filtered_for_stmts)
class ReportPeriodJoinedFilter(FilterBase[JoinedDataBag]):
"""
Filters the data so that only datapoints are contained which ddate-attribute equals the
period date of the report. Therefore, the pathfilter operates on the num_df dataframe.
"""
def filter(self, databag: JoinedDataBag) -> JoinedDataBag:
"""
pathfilter the databag so that only datapoints are contained which have a ddate-attribute
that equals the period-attribute of the report.
Args:
databag(JoinedDataBag) : databag to apply the pathfilter to
Returns:
JoinedDataBag: the databag with the filtered data
"""
adsh_period_map = \
databag.sub_df[['adsh', 'period']].set_index('adsh').to_dict()['period']
mask = databag.pre_num_df['adsh'].map(adsh_period_map) == databag.pre_num_df['ddate']
pre_num_filtered_for_ddates = databag.pre_num_df[mask]
return JoinedDataBag.create(sub_df=databag.sub_df,
pre_num_df=pre_num_filtered_for_ddates)
class ReportPeriodAndPreviousPeriodJoinedFilter(FilterBase[JoinedDataBag]):
"""
Filters the data so that only datapoints are contained which ddate-attribute equals the
period date of the report or the period date of the previous (a year ago) report.
Therefore, the pathfilter operates on the num_df dataframe.
"""
def filter(self, databag: JoinedDataBag) -> JoinedDataBag:
"""
pathfilter the databag so that only datapoints are contained which have a ddate-attribute
that equals the period-attribute of the report or the period of the previous (a year ago)
report.
Args:
databag(JoinedDataBag) : databag to apply the pathfilter to
Returns:
JoinedDataBag: the databag with the filtered data
"""
adsh_period_map = \
databag.sub_df[['adsh', 'period']].set_index('adsh').to_dict()['period']
# caculate the dates for the previous year
adsh_previous_period_map = {adsh: calculate_previous_period(period)
for adsh, period in adsh_period_map.items()}
mask = (databag.pre_num_df['adsh'].map(adsh_period_map) == databag.pre_num_df['ddate']) | \
(databag.pre_num_df['adsh'].map(adsh_previous_period_map) == databag.pre_num_df[
'ddate'])
pre_num_filtered_for_ddates = databag.pre_num_df[mask]
return JoinedDataBag.create(sub_df=databag.sub_df,
pre_num_df=pre_num_filtered_for_ddates)
class TagJoinedFilter(FilterBase[JoinedDataBag]):
"""
Filters the data by a list of tags. This pathfilter operates on the pre_df and the num_df.
"""
def __init__(self, tags: List[str]):
self.tags = tags
def filter(self, databag: JoinedDataBag) -> JoinedDataBag:
"""
filters the databag so that only datapoints are contained which have a tag-attribute
that is in the provided list.
Args:
databag(JoinedDataBag) : databag to apply the pathfilter to
Returns:
JoinedDataBag: the databag with the filtered data
"""
pre_num_filtered_for_tags = databag.pre_num_df[databag.pre_num_df.tag.isin(self.tags)]
return JoinedDataBag.create(sub_df=databag.sub_df,
pre_num_df=pre_num_filtered_for_tags)
class MainCoregJoinedFilter(FilterBase[JoinedDataBag]):
"""
Filters only for the main coreg entries (coreg == '')
"""
def filter(self, databag: JoinedDataBag) -> JoinedDataBag:
"""
filters the databag so that only the main coreg entries are contained
(no data subsidiaries).
Args:
databag(JoinedDataBag) : databag to apply the pathfilter to
Returns:
JoinedDataBag: the databag with the filtered data
"""
pre_num_filtered_for_main_coreg = databag.pre_num_df[databag.pre_num_df.coreg == '']
return JoinedDataBag.create(sub_df=databag.sub_df,
pre_num_df=pre_num_filtered_for_main_coreg)
class OfficialTagsOnlyJoinedFilter(FilterBase[JoinedDataBag]):
"""
Filters only the official tags. These are the tags that contain an official XBRL version
within the version column. "inofficial" (resp. company specific) tags are identified with
the version column containing the value of the adsh.
"""
def filter(self, databag: JoinedDataBag) -> JoinedDataBag:
"""
filters the databag so that official tags are contained.
Args:
databag(JoinedDataBag) : databag to apply the pathfilter to
Returns:
JoinedDataBag: the databag with the filtered data
"""
pre_num_filtered_for_tags = databag.pre_num_df[
~databag.pre_num_df.version.isin(databag.sub_df.adsh)]
return JoinedDataBag.create(sub_df=databag.sub_df,
pre_num_df=pre_num_filtered_for_tags)
class USDOnlyJoinedFilter(FilterBase[JoinedDataBag]):
"""
Removes all entries which have a currency in the column uom that is not USD.
"""
def filter(self, databag: JoinedDataBag) -> JoinedDataBag:
"""
Removes all currency entries in the uom colum of the pre_num_df that are not USD.
Args:
databag(JoinedDataBag) : Joineddatabag to apply the pathfilter to
Returns:
JoinedDataBag: the databag with the filtered data
"""
# currency is always in uppercase, so if it is not all uppercase, it is not a currency
mask_has_lower = ~databag.pre_num_df.uom.str.isupper()
mask_is_none_currency = databag.pre_num_df.uom.str.len() != 3
mask_usd_only = databag.pre_num_df.uom == "USD"
prenum_filtered_for_usd = databag.pre_num_df[
mask_has_lower | mask_is_none_currency | mask_usd_only]
return JoinedDataBag.create(sub_df=databag.sub_df,
pre_num_df=prenum_filtered_for_usd)
class NoSegmentInfoJoinedFilter(FilterBase[JoinedDataBag]):
"""
Filters only for the main coreg entries (coreg == '')
"""
def filter(self, databag: JoinedDataBag) -> JoinedDataBag:
"""
filters the databag so that only the main coreg entries are contained
(no data subsidiaries).
Args:
databag(JoinedDataBag) : databag to apply the pathfilter to
Returns:
JoinedDataBag: the databag with the filtered data
"""
pre_num_filtered_for_main_coreg = databag.pre_num_df[databag.pre_num_df.segments == '']
return JoinedDataBag.create(sub_df=databag.sub_df,
pre_num_df=pre_num_filtered_for_main_coreg)
class CIKJoinedFilter(FilterBase[JoinedDataBag]):
"""
Filters the data by a list of ciks. This filter operates on the sub, pre_df and the num_df.
"""
def __init__(self, ciks: List[int]):
self.ciks = ciks
def filter(self, databag: JoinedDataBag) -> JoinedDataBag:
"""
filters the databag so that only datapoints belonging to the provided ciks
are contained.
Args:
databag(JoinedDataBag) : joineddatabag to apply the filter to
Returns:
JoinedDataBag: the databag with the filtered data
"""
sub_filtered_for_adshs = databag.sub_df[databag.sub_df.cik.isin(self.ciks)]
adshs = sub_filtered_for_adshs.adsh.tolist()
pre_num_filtered_for_adshs = databag.pre_num_df[databag.pre_num_df.adsh.isin(adshs)]
return JoinedDataBag.create(sub_df=sub_filtered_for_adshs,
pre_num_df=pre_num_filtered_for_adshs)
Classes
class AdshJoinedFilter (adshs: List[str])
-
Filters the data by a list of adshs. This pathfilter operates on the sub, pre_df and the num_df.
Expand source code
class AdshJoinedFilter(FilterBase[JoinedDataBag]): """ Filters the data by a list of adshs. This pathfilter operates on the sub, pre_df and the num_df. """ def __init__(self, adshs: List[str]): self.adshs = adshs def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ filters the databag so that only datapoints of reports defined by the adshs list are contained. Args: databag(JoinedDataBag) : databag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ sub_filtered_for_adshs = databag.sub_df[databag.sub_df.adsh.isin(self.adshs)] pre_num_filtered_for_adshs = databag.pre_num_df[databag.pre_num_df.adsh.isin(self.adshs)] return JoinedDataBag.create(sub_df=sub_filtered_for_adshs, pre_num_df=pre_num_filtered_for_adshs)
Ancestors
- FilterBase
- typing.Generic
Methods
def filter(self, databag: JoinedDataBag) ‑> JoinedDataBag
-
filters the databag so that only datapoints of reports defined by the adshs list are contained.
Args
databag(JoinedDataBag) : databag to apply the pathfilter to
Returns
JoinedDataBag
- the databag with the filtered data
Expand source code
def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ filters the databag so that only datapoints of reports defined by the adshs list are contained. Args: databag(JoinedDataBag) : databag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ sub_filtered_for_adshs = databag.sub_df[databag.sub_df.adsh.isin(self.adshs)] pre_num_filtered_for_adshs = databag.pre_num_df[databag.pre_num_df.adsh.isin(self.adshs)] return JoinedDataBag.create(sub_df=sub_filtered_for_adshs, pre_num_df=pre_num_filtered_for_adshs)
class CIKJoinedFilter (ciks: List[int])
-
Filters the data by a list of ciks. This filter operates on the sub, pre_df and the num_df.
Expand source code
class CIKJoinedFilter(FilterBase[JoinedDataBag]): """ Filters the data by a list of ciks. This filter operates on the sub, pre_df and the num_df. """ def __init__(self, ciks: List[int]): self.ciks = ciks def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ filters the databag so that only datapoints belonging to the provided ciks are contained. Args: databag(JoinedDataBag) : joineddatabag to apply the filter to Returns: JoinedDataBag: the databag with the filtered data """ sub_filtered_for_adshs = databag.sub_df[databag.sub_df.cik.isin(self.ciks)] adshs = sub_filtered_for_adshs.adsh.tolist() pre_num_filtered_for_adshs = databag.pre_num_df[databag.pre_num_df.adsh.isin(adshs)] return JoinedDataBag.create(sub_df=sub_filtered_for_adshs, pre_num_df=pre_num_filtered_for_adshs)
Ancestors
- FilterBase
- typing.Generic
Methods
def filter(self, databag: JoinedDataBag) ‑> JoinedDataBag
-
filters the databag so that only datapoints belonging to the provided ciks are contained.
Args
databag(JoinedDataBag) : joineddatabag to apply the filter to
Returns
JoinedDataBag
- the databag with the filtered data
Expand source code
def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ filters the databag so that only datapoints belonging to the provided ciks are contained. Args: databag(JoinedDataBag) : joineddatabag to apply the filter to Returns: JoinedDataBag: the databag with the filtered data """ sub_filtered_for_adshs = databag.sub_df[databag.sub_df.cik.isin(self.ciks)] adshs = sub_filtered_for_adshs.adsh.tolist() pre_num_filtered_for_adshs = databag.pre_num_df[databag.pre_num_df.adsh.isin(adshs)] return JoinedDataBag.create(sub_df=sub_filtered_for_adshs, pre_num_df=pre_num_filtered_for_adshs)
class MainCoregJoinedFilter (*args, **kwds)
-
Filters only for the main coreg entries (coreg == '')
Expand source code
class MainCoregJoinedFilter(FilterBase[JoinedDataBag]): """ Filters only for the main coreg entries (coreg == '') """ def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ filters the databag so that only the main coreg entries are contained (no data subsidiaries). Args: databag(JoinedDataBag) : databag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ pre_num_filtered_for_main_coreg = databag.pre_num_df[databag.pre_num_df.coreg == ''] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=pre_num_filtered_for_main_coreg)
Ancestors
- FilterBase
- typing.Generic
Methods
def filter(self, databag: JoinedDataBag) ‑> JoinedDataBag
-
filters the databag so that only the main coreg entries are contained (no data subsidiaries).
Args
databag(JoinedDataBag) : databag to apply the pathfilter to
Returns
JoinedDataBag
- the databag with the filtered data
Expand source code
def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ filters the databag so that only the main coreg entries are contained (no data subsidiaries). Args: databag(JoinedDataBag) : databag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ pre_num_filtered_for_main_coreg = databag.pre_num_df[databag.pre_num_df.coreg == ''] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=pre_num_filtered_for_main_coreg)
class NoSegmentInfoJoinedFilter (*args, **kwds)
-
Filters only for the main coreg entries (coreg == '')
Expand source code
class NoSegmentInfoJoinedFilter(FilterBase[JoinedDataBag]): """ Filters only for the main coreg entries (coreg == '') """ def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ filters the databag so that only the main coreg entries are contained (no data subsidiaries). Args: databag(JoinedDataBag) : databag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ pre_num_filtered_for_main_coreg = databag.pre_num_df[databag.pre_num_df.segments == ''] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=pre_num_filtered_for_main_coreg)
Ancestors
- FilterBase
- typing.Generic
Methods
def filter(self, databag: JoinedDataBag) ‑> JoinedDataBag
-
filters the databag so that only the main coreg entries are contained (no data subsidiaries).
Args
databag(JoinedDataBag) : databag to apply the pathfilter to
Returns
JoinedDataBag
- the databag with the filtered data
Expand source code
def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ filters the databag so that only the main coreg entries are contained (no data subsidiaries). Args: databag(JoinedDataBag) : databag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ pre_num_filtered_for_main_coreg = databag.pre_num_df[databag.pre_num_df.segments == ''] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=pre_num_filtered_for_main_coreg)
class OfficialTagsOnlyJoinedFilter (*args, **kwds)
-
Filters only the official tags. These are the tags that contain an official XBRL version within the version column. "inofficial" (resp. company specific) tags are identified with the version column containing the value of the adsh.
Expand source code
class OfficialTagsOnlyJoinedFilter(FilterBase[JoinedDataBag]): """ Filters only the official tags. These are the tags that contain an official XBRL version within the version column. "inofficial" (resp. company specific) tags are identified with the version column containing the value of the adsh. """ def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ filters the databag so that official tags are contained. Args: databag(JoinedDataBag) : databag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ pre_num_filtered_for_tags = databag.pre_num_df[ ~databag.pre_num_df.version.isin(databag.sub_df.adsh)] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=pre_num_filtered_for_tags)
Ancestors
- FilterBase
- typing.Generic
Methods
def filter(self, databag: JoinedDataBag) ‑> JoinedDataBag
-
filters the databag so that official tags are contained.
Args
databag(JoinedDataBag) : databag to apply the pathfilter to
Returns
JoinedDataBag
- the databag with the filtered data
Expand source code
def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ filters the databag so that official tags are contained. Args: databag(JoinedDataBag) : databag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ pre_num_filtered_for_tags = databag.pre_num_df[ ~databag.pre_num_df.version.isin(databag.sub_df.adsh)] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=pre_num_filtered_for_tags)
class ReportPeriodAndPreviousPeriodJoinedFilter (*args, **kwds)
-
Filters the data so that only datapoints are contained which ddate-attribute equals the period date of the report or the period date of the previous (a year ago) report. Therefore, the pathfilter operates on the num_df dataframe.
Expand source code
class ReportPeriodAndPreviousPeriodJoinedFilter(FilterBase[JoinedDataBag]): """ Filters the data so that only datapoints are contained which ddate-attribute equals the period date of the report or the period date of the previous (a year ago) report. Therefore, the pathfilter operates on the num_df dataframe. """ def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ pathfilter the databag so that only datapoints are contained which have a ddate-attribute that equals the period-attribute of the report or the period of the previous (a year ago) report. Args: databag(JoinedDataBag) : databag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ adsh_period_map = \ databag.sub_df[['adsh', 'period']].set_index('adsh').to_dict()['period'] # caculate the dates for the previous year adsh_previous_period_map = {adsh: calculate_previous_period(period) for adsh, period in adsh_period_map.items()} mask = (databag.pre_num_df['adsh'].map(adsh_period_map) == databag.pre_num_df['ddate']) | \ (databag.pre_num_df['adsh'].map(adsh_previous_period_map) == databag.pre_num_df[ 'ddate']) pre_num_filtered_for_ddates = databag.pre_num_df[mask] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=pre_num_filtered_for_ddates)
Ancestors
- FilterBase
- typing.Generic
Methods
def filter(self, databag: JoinedDataBag) ‑> JoinedDataBag
-
pathfilter the databag so that only datapoints are contained which have a ddate-attribute that equals the period-attribute of the report or the period of the previous (a year ago) report.
Args
databag(JoinedDataBag) : databag to apply the pathfilter to
Returns
JoinedDataBag
- the databag with the filtered data
Expand source code
def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ pathfilter the databag so that only datapoints are contained which have a ddate-attribute that equals the period-attribute of the report or the period of the previous (a year ago) report. Args: databag(JoinedDataBag) : databag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ adsh_period_map = \ databag.sub_df[['adsh', 'period']].set_index('adsh').to_dict()['period'] # caculate the dates for the previous year adsh_previous_period_map = {adsh: calculate_previous_period(period) for adsh, period in adsh_period_map.items()} mask = (databag.pre_num_df['adsh'].map(adsh_period_map) == databag.pre_num_df['ddate']) | \ (databag.pre_num_df['adsh'].map(adsh_previous_period_map) == databag.pre_num_df[ 'ddate']) pre_num_filtered_for_ddates = databag.pre_num_df[mask] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=pre_num_filtered_for_ddates)
class ReportPeriodJoinedFilter (*args, **kwds)
-
Filters the data so that only datapoints are contained which ddate-attribute equals the period date of the report. Therefore, the pathfilter operates on the num_df dataframe.
Expand source code
class ReportPeriodJoinedFilter(FilterBase[JoinedDataBag]): """ Filters the data so that only datapoints are contained which ddate-attribute equals the period date of the report. Therefore, the pathfilter operates on the num_df dataframe. """ def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ pathfilter the databag so that only datapoints are contained which have a ddate-attribute that equals the period-attribute of the report. Args: databag(JoinedDataBag) : databag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ adsh_period_map = \ databag.sub_df[['adsh', 'period']].set_index('adsh').to_dict()['period'] mask = databag.pre_num_df['adsh'].map(adsh_period_map) == databag.pre_num_df['ddate'] pre_num_filtered_for_ddates = databag.pre_num_df[mask] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=pre_num_filtered_for_ddates)
Ancestors
- FilterBase
- typing.Generic
Methods
def filter(self, databag: JoinedDataBag) ‑> JoinedDataBag
-
pathfilter the databag so that only datapoints are contained which have a ddate-attribute that equals the period-attribute of the report.
Args
databag(JoinedDataBag) : databag to apply the pathfilter to
Returns
JoinedDataBag
- the databag with the filtered data
Expand source code
def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ pathfilter the databag so that only datapoints are contained which have a ddate-attribute that equals the period-attribute of the report. Args: databag(JoinedDataBag) : databag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ adsh_period_map = \ databag.sub_df[['adsh', 'period']].set_index('adsh').to_dict()['period'] mask = databag.pre_num_df['adsh'].map(adsh_period_map) == databag.pre_num_df['ddate'] pre_num_filtered_for_ddates = databag.pre_num_df[mask] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=pre_num_filtered_for_ddates)
class StmtJoinedFilter (stmts: List[str])
-
Filters the data by a list of statement type (BS, IS, CF, …). This pathfilter operates on the pre_df.
Expand source code
class StmtJoinedFilter(FilterBase[JoinedDataBag]): """ Filters the data by a list of statement type (BS, IS, CF, ...). This pathfilter operates on the pre_df. """ def __init__(self, stmts: List[str]): self.stmts = stmts def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ filters the databag so that only datapoints of reports defined by the adshs list are contained. Args: databag(JoinedDataBag) : Joineddatabag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ pre_num_filtered_for_stmts = databag.pre_num_df[databag.pre_num_df.stmt.isin(self.stmts)] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=pre_num_filtered_for_stmts)
Ancestors
- FilterBase
- typing.Generic
Methods
def filter(self, databag: JoinedDataBag) ‑> JoinedDataBag
-
filters the databag so that only datapoints of reports defined by the adshs list are contained.
Args
databag(JoinedDataBag) : Joineddatabag to apply the pathfilter to
Returns
JoinedDataBag
- the databag with the filtered data
Expand source code
def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ filters the databag so that only datapoints of reports defined by the adshs list are contained. Args: databag(JoinedDataBag) : Joineddatabag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ pre_num_filtered_for_stmts = databag.pre_num_df[databag.pre_num_df.stmt.isin(self.stmts)] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=pre_num_filtered_for_stmts)
class TagJoinedFilter (tags: List[str])
-
Filters the data by a list of tags. This pathfilter operates on the pre_df and the num_df.
Expand source code
class TagJoinedFilter(FilterBase[JoinedDataBag]): """ Filters the data by a list of tags. This pathfilter operates on the pre_df and the num_df. """ def __init__(self, tags: List[str]): self.tags = tags def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ filters the databag so that only datapoints are contained which have a tag-attribute that is in the provided list. Args: databag(JoinedDataBag) : databag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ pre_num_filtered_for_tags = databag.pre_num_df[databag.pre_num_df.tag.isin(self.tags)] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=pre_num_filtered_for_tags)
Ancestors
- FilterBase
- typing.Generic
Methods
def filter(self, databag: JoinedDataBag) ‑> JoinedDataBag
-
filters the databag so that only datapoints are contained which have a tag-attribute that is in the provided list.
Args
databag(JoinedDataBag) : databag to apply the pathfilter to
Returns
JoinedDataBag
- the databag with the filtered data
Expand source code
def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ filters the databag so that only datapoints are contained which have a tag-attribute that is in the provided list. Args: databag(JoinedDataBag) : databag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ pre_num_filtered_for_tags = databag.pre_num_df[databag.pre_num_df.tag.isin(self.tags)] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=pre_num_filtered_for_tags)
class USDOnlyJoinedFilter (*args, **kwds)
-
Removes all entries which have a currency in the column uom that is not USD.
Expand source code
class USDOnlyJoinedFilter(FilterBase[JoinedDataBag]): """ Removes all entries which have a currency in the column uom that is not USD. """ def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ Removes all currency entries in the uom colum of the pre_num_df that are not USD. Args: databag(JoinedDataBag) : Joineddatabag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ # currency is always in uppercase, so if it is not all uppercase, it is not a currency mask_has_lower = ~databag.pre_num_df.uom.str.isupper() mask_is_none_currency = databag.pre_num_df.uom.str.len() != 3 mask_usd_only = databag.pre_num_df.uom == "USD" prenum_filtered_for_usd = databag.pre_num_df[ mask_has_lower | mask_is_none_currency | mask_usd_only] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=prenum_filtered_for_usd)
Ancestors
- FilterBase
- typing.Generic
Methods
def filter(self, databag: JoinedDataBag) ‑> JoinedDataBag
-
Removes all currency entries in the uom colum of the pre_num_df that are not USD.
Args
databag(JoinedDataBag) : Joineddatabag to apply the pathfilter to
Returns
JoinedDataBag
- the databag with the filtered data
Expand source code
def filter(self, databag: JoinedDataBag) -> JoinedDataBag: """ Removes all currency entries in the uom colum of the pre_num_df that are not USD. Args: databag(JoinedDataBag) : Joineddatabag to apply the pathfilter to Returns: JoinedDataBag: the databag with the filtered data """ # currency is always in uppercase, so if it is not all uppercase, it is not a currency mask_has_lower = ~databag.pre_num_df.uom.str.isupper() mask_is_none_currency = databag.pre_num_df.uom.str.len() != 3 mask_usd_only = databag.pre_num_df.uom == "USD" prenum_filtered_for_usd = databag.pre_num_df[ mask_has_lower | mask_is_none_currency | mask_usd_only] return JoinedDataBag.create(sub_df=databag.sub_df, pre_num_df=prenum_filtered_for_usd)