Module secfsdstools.x_examples.example_collectors

Contains some example code on how to use the different collectors.

Expand source code
"""
Contains some example code on how to use the different collectors.
"""

import pandas as pd

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


def singlereportcollector():
    """
    SingleReportCollector example
    """
    from secfsdstools.e_collector.reportcollecting import \
        SingleReportCollector  # pylint: disable=C0415

    apple_10k_2022_adsh = "0000320193-22-000108"

    collector: SingleReportCollector = SingleReportCollector.get_report_by_adsh(
        adsh=apple_10k_2022_adsh)
    rawdatabag = collector.collect()

    # as expected, there is just one entry in the submission dataframe
    print(rawdatabag.sub_df)
    # just print the size of the pre and num dataframes
    print(rawdatabag.pre_df.shape)
    print(rawdatabag.num_df.shape)


def multireportcollector():
    """
    MultiReportCollector example
    """

    from secfsdstools.e_collector.multireportcollecting import \
        MultiReportCollector  # pylint: disable=C0415

    apple_10k_2022_adsh = "0000320193-22-000108"
    apple_10k_2012_adsh = "0001193125-12-444068"

    # load only the assets tags that are present in the 10-K report of apple in the years
    # 2022 and 2012
    collector: MultiReportCollector = \
        MultiReportCollector.get_reports_by_adshs(adshs=[apple_10k_2022_adsh,
                                                         apple_10k_2012_adsh],
                                                  tag_filter=['Assets'])
    rawdatabag = collector.collect()
    # as expected, there are just two entries in the submission dataframe
    print(rawdatabag.sub_df)
    # .. and the num_df only contains entries for the Assets tag
    print(rawdatabag.num_df)


def zipcollector():
    """
    ZipCollector example
    """

    from secfsdstools.e_collector.zipcollecting import ZipCollector  # pylint: disable=C0415

    # only collect the Balance Sheet of annual reports that
    # were filed during the first quarter in 2022
    collector: ZipCollector = ZipCollector.get_zip_by_name(name="2022q1.zip",
                                                           forms_filter=["10-K"],
                                                           stmt_filter=["BS"])

    rawdatabag = collector.collect()

    # only show the size of the data frame
    # .. over 4000 companies filed a 10 K report in q1 2022
    print(rawdatabag.sub_df.shape)
    print(rawdatabag.pre_df.shape)
    print(rawdatabag.num_df.shape)


def companyreportcollector():
    """
    CompanyReportCollector example
    """

    from secfsdstools.e_collector.companycollecting import \
        CompanyReportCollector  # pylint: disable=C0415

    apple_cik = 320193
    collector = CompanyReportCollector.get_company_collector(ciks=[apple_cik],
                                                             forms_filter=["10-K"])

    rawdatabag = collector.collect()

    # all filed 10-K reports for apple since 2010 are in the databag
    print(rawdatabag.sub_df)

    print(rawdatabag.pre_df.shape)
    print(rawdatabag.num_df.shape)


def run():
    """launch method"""

    singlereportcollector()
    multireportcollector()
    zipcollector()
    companyreportcollector()


if __name__ == '__main__':
    run()

Functions

def companyreportcollector()

CompanyReportCollector example

Expand source code
def companyreportcollector():
    """
    CompanyReportCollector example
    """

    from secfsdstools.e_collector.companycollecting import \
        CompanyReportCollector  # pylint: disable=C0415

    apple_cik = 320193
    collector = CompanyReportCollector.get_company_collector(ciks=[apple_cik],
                                                             forms_filter=["10-K"])

    rawdatabag = collector.collect()

    # all filed 10-K reports for apple since 2010 are in the databag
    print(rawdatabag.sub_df)

    print(rawdatabag.pre_df.shape)
    print(rawdatabag.num_df.shape)
def multireportcollector()

MultiReportCollector example

Expand source code
def multireportcollector():
    """
    MultiReportCollector example
    """

    from secfsdstools.e_collector.multireportcollecting import \
        MultiReportCollector  # pylint: disable=C0415

    apple_10k_2022_adsh = "0000320193-22-000108"
    apple_10k_2012_adsh = "0001193125-12-444068"

    # load only the assets tags that are present in the 10-K report of apple in the years
    # 2022 and 2012
    collector: MultiReportCollector = \
        MultiReportCollector.get_reports_by_adshs(adshs=[apple_10k_2022_adsh,
                                                         apple_10k_2012_adsh],
                                                  tag_filter=['Assets'])
    rawdatabag = collector.collect()
    # as expected, there are just two entries in the submission dataframe
    print(rawdatabag.sub_df)
    # .. and the num_df only contains entries for the Assets tag
    print(rawdatabag.num_df)
def run()

launch method

Expand source code
def run():
    """launch method"""

    singlereportcollector()
    multireportcollector()
    zipcollector()
    companyreportcollector()
def singlereportcollector()

SingleReportCollector example

Expand source code
def singlereportcollector():
    """
    SingleReportCollector example
    """
    from secfsdstools.e_collector.reportcollecting import \
        SingleReportCollector  # pylint: disable=C0415

    apple_10k_2022_adsh = "0000320193-22-000108"

    collector: SingleReportCollector = SingleReportCollector.get_report_by_adsh(
        adsh=apple_10k_2022_adsh)
    rawdatabag = collector.collect()

    # as expected, there is just one entry in the submission dataframe
    print(rawdatabag.sub_df)
    # just print the size of the pre and num dataframes
    print(rawdatabag.pre_df.shape)
    print(rawdatabag.num_df.shape)
def zipcollector()

ZipCollector example

Expand source code
def zipcollector():
    """
    ZipCollector example
    """

    from secfsdstools.e_collector.zipcollecting import ZipCollector  # pylint: disable=C0415

    # only collect the Balance Sheet of annual reports that
    # were filed during the first quarter in 2022
    collector: ZipCollector = ZipCollector.get_zip_by_name(name="2022q1.zip",
                                                           forms_filter=["10-K"],
                                                           stmt_filter=["BS"])

    rawdatabag = collector.collect()

    # only show the size of the data frame
    # .. over 4000 companies filed a 10 K report in q1 2022
    print(rawdatabag.sub_df.shape)
    print(rawdatabag.pre_df.shape)
    print(rawdatabag.num_df.shape)