Module secfsdstools.e_collector.companycollecting
Collects all data by the cik company.
Expand source code
"""
Collects all data by the cik company.
"""
from typing import Optional, List
from secfsdstools.a_config.configmgt import ConfigurationManager
from secfsdstools.a_config.configmodel import Configuration
from secfsdstools.c_index.indexdataaccess import IndexReport, ParquetDBIndexingAccessor
from secfsdstools.e_collector.multireportcollecting import MultiReportCollector
class CompanyReportCollector:
"""
Collects reports for a companies defined by their cik number.
Collects the data from different zip files.
For instance, it is a simple way to read all 10-K reports of serveral companies.
"""
@classmethod
def get_company_collector(
cls, ciks: List[int],
forms_filter: Optional[List[str]] = None,
stmt_filter: Optional[List[str]] = None,
tag_filter: Optional[List[str]] = None,
configuration: Optional[Configuration] = None):
"""
creates a MultiReportCollector instance for the provided ciks and forms (e.g. 10-K..)
If no configuration object is passed,
it reads the configuration from the configuration file.
Args:
ciks (List[int]): a list of central identification keys which is assigned
by the sec to every company
forms_filter (List[str], optional, None):
List of forms that should be read (10-K, 10-Q, ...)
stmt_filter (List[str], optional, None):
List of stmts that should be read (BS, IS, ...)
tag_filter (List[str], optional, None:
List of tags that should be read (Assets, Liabilities, ...)
configuration (Configuration, optional, None): Optional configuration object
Returns:
MultiReportCollector: instance of MultiReportCollector
"""
if configuration is None:
configuration = ConfigurationManager.read_config_file()
dbaccessor = ParquetDBIndexingAccessor(db_dir=configuration.db_dir)
# todo: if daily entries are also in index, it returns mutliple matches!
# probably fix directly in read_index_reports-> pathfilter for two and check source
# prefer to use zip instead of daly?
index_reports: List[IndexReport] = dbaccessor.read_index_reports_for_ciks(ciks,
forms_filter)
return MultiReportCollector.get_reports_by_indexreports(index_reports=index_reports,
stmt_filter=stmt_filter,
tag_filter=tag_filter
)
Classes
class CompanyReportCollector
-
Collects reports for a companies defined by their cik number. Collects the data from different zip files. For instance, it is a simple way to read all 10-K reports of serveral companies.
Expand source code
class CompanyReportCollector: """ Collects reports for a companies defined by their cik number. Collects the data from different zip files. For instance, it is a simple way to read all 10-K reports of serveral companies. """ @classmethod def get_company_collector( cls, ciks: List[int], forms_filter: Optional[List[str]] = None, stmt_filter: Optional[List[str]] = None, tag_filter: Optional[List[str]] = None, configuration: Optional[Configuration] = None): """ creates a MultiReportCollector instance for the provided ciks and forms (e.g. 10-K..) If no configuration object is passed, it reads the configuration from the configuration file. Args: ciks (List[int]): a list of central identification keys which is assigned by the sec to every company forms_filter (List[str], optional, None): List of forms that should be read (10-K, 10-Q, ...) stmt_filter (List[str], optional, None): List of stmts that should be read (BS, IS, ...) tag_filter (List[str], optional, None: List of tags that should be read (Assets, Liabilities, ...) configuration (Configuration, optional, None): Optional configuration object Returns: MultiReportCollector: instance of MultiReportCollector """ if configuration is None: configuration = ConfigurationManager.read_config_file() dbaccessor = ParquetDBIndexingAccessor(db_dir=configuration.db_dir) # todo: if daily entries are also in index, it returns mutliple matches! # probably fix directly in read_index_reports-> pathfilter for two and check source # prefer to use zip instead of daly? index_reports: List[IndexReport] = dbaccessor.read_index_reports_for_ciks(ciks, forms_filter) return MultiReportCollector.get_reports_by_indexreports(index_reports=index_reports, stmt_filter=stmt_filter, tag_filter=tag_filter )
Static methods
def get_company_collector(ciks: List[int], forms_filter: Optional[List[str]] = None, stmt_filter: Optional[List[str]] = None, tag_filter: Optional[List[str]] = None, configuration: Optional[Configuration] = None)
-
creates a MultiReportCollector instance for the provided ciks and forms (e.g. 10-K..) If no configuration object is passed, it reads the configuration from the configuration file.
Args
ciks
:List[int]
- a list of central identification keys which is assigned by the sec to every company
- forms_filter (List[str], optional, None):
- List of forms that should be read (10-K, 10-Q, …)
- stmt_filter (List[str], optional, None):
- List of stmts that should be read (BS, IS, …)
- tag_filter (List[str], optional, None:
- List of tags that should be read (Assets, Liabilities, …)
configuration
:Configuration
, optional, None
- Optional configuration object
Returns
MultiReportCollector
- instance of MultiReportCollector
Expand source code
@classmethod def get_company_collector( cls, ciks: List[int], forms_filter: Optional[List[str]] = None, stmt_filter: Optional[List[str]] = None, tag_filter: Optional[List[str]] = None, configuration: Optional[Configuration] = None): """ creates a MultiReportCollector instance for the provided ciks and forms (e.g. 10-K..) If no configuration object is passed, it reads the configuration from the configuration file. Args: ciks (List[int]): a list of central identification keys which is assigned by the sec to every company forms_filter (List[str], optional, None): List of forms that should be read (10-K, 10-Q, ...) stmt_filter (List[str], optional, None): List of stmts that should be read (BS, IS, ...) tag_filter (List[str], optional, None: List of tags that should be read (Assets, Liabilities, ...) configuration (Configuration, optional, None): Optional configuration object Returns: MultiReportCollector: instance of MultiReportCollector """ if configuration is None: configuration = ConfigurationManager.read_config_file() dbaccessor = ParquetDBIndexingAccessor(db_dir=configuration.db_dir) # todo: if daily entries are also in index, it returns mutliple matches! # probably fix directly in read_index_reports-> pathfilter for two and check source # prefer to use zip instead of daly? index_reports: List[IndexReport] = dbaccessor.read_index_reports_for_ciks(ciks, forms_filter) return MultiReportCollector.get_reports_by_indexreports(index_reports=index_reports, stmt_filter=stmt_filter, tag_filter=tag_filter )