Module secfsdstools.c_index.companyindexreading
Reads company information.
Expand source code
"""
Reads company information.
"""
import os
from typing import Dict, Optional, List
import pandas as pd
from secfsdstools.a_config.configmgt import ConfigurationManager
from secfsdstools.a_config.configmodel import Configuration
from secfsdstools.c_index.indexdataaccess import IndexReport, ParquetDBIndexingAccessor
from secfsdstools.a_utils.constants import SUB_TXT
class CompanyIndexReader:
"""
reads information for a single company
"""
@classmethod
def get_company_index_reader(cls, cik: int, configuration: Optional[Configuration] = None):
"""
creates a company instance for the provided cik. If no configuration object is passed,
it reads the configuration from the config file.
Args:
cik (int): the central identification key which is assigned by the sec for every company
configuration (Configuration, optional, None): Optional configuration object
Returns:
CompanyIndexReader: instance of Company Reader
"""
if configuration is None:
configuration = ConfigurationManager.read_config_file()
dbaccessor = ParquetDBIndexingAccessor(db_dir=configuration.db_dir)
return CompanyIndexReader(cik, dbaccessor=dbaccessor)
def __init__(self, cik: int, dbaccessor: ParquetDBIndexingAccessor):
self.cik = cik
self.dbaccessor = dbaccessor
def get_latest_company_filing(self) -> Dict[str, str]:
"""
returns the latest company information (the content in the sub.txt file)
from the quarter-zip files.
Returns:
Dict[str, str]: dict with the information of the latest
report as present in the sub.txt file.
"""
latest_report = self.dbaccessor.find_latest_company_report(self.cik)
return self._get_latest_company_filing_parquet(latest_report)
def _get_latest_company_filing_parquet(self, latest_report: IndexReport) -> Dict[str, str]:
latest_filing = pd.read_parquet(os.path.join(latest_report.fullPath, f'{SUB_TXT}.parquet'),
filters=[('adsh', '==', latest_report.adsh)])
return latest_filing.iloc[0].to_dict()
def get_all_company_reports(self, forms: Optional[List[str]] = None) -> List[IndexReport]:
"""
gets all reports as IndexReport instances for a company identified by its cik.
if forms is not set, all forms are returned, otherwise forms is a list of the
forms that should be returned
Args:
forms (List[str], optional, None): list of the forms to be returned,
like ['10-Q', '10-K']
Returns:
List[IndexReport]: the list of matching reports as a list of IndexReport instances
"""
return self.dbaccessor.read_index_reports_for_ciks([self.cik], forms)
def get_all_company_reports_df(self, forms: Optional[List[str]] = None) -> pd.DataFrame:
"""
gets all reports as IndexReport instances for a company identified by its cik.
if forms is not set, all forms are returned, otherwise forms is a list of the
forms that should be returned
Args:
forms (List[str], optional, None): list of the forms to be returned,
like ['10-Q', '10-K']
Returns:
pd.DataFrame: the list of matching reports as a panas Dataframe
"""
return self.dbaccessor.read_index_reports_for_ciks_df([self.cik], forms)
Classes
class CompanyIndexReader (cik: int, dbaccessor: ParquetDBIndexingAccessor)
-
reads information for a single company
Expand source code
class CompanyIndexReader: """ reads information for a single company """ @classmethod def get_company_index_reader(cls, cik: int, configuration: Optional[Configuration] = None): """ creates a company instance for the provided cik. If no configuration object is passed, it reads the configuration from the config file. Args: cik (int): the central identification key which is assigned by the sec for every company configuration (Configuration, optional, None): Optional configuration object Returns: CompanyIndexReader: instance of Company Reader """ if configuration is None: configuration = ConfigurationManager.read_config_file() dbaccessor = ParquetDBIndexingAccessor(db_dir=configuration.db_dir) return CompanyIndexReader(cik, dbaccessor=dbaccessor) def __init__(self, cik: int, dbaccessor: ParquetDBIndexingAccessor): self.cik = cik self.dbaccessor = dbaccessor def get_latest_company_filing(self) -> Dict[str, str]: """ returns the latest company information (the content in the sub.txt file) from the quarter-zip files. Returns: Dict[str, str]: dict with the information of the latest report as present in the sub.txt file. """ latest_report = self.dbaccessor.find_latest_company_report(self.cik) return self._get_latest_company_filing_parquet(latest_report) def _get_latest_company_filing_parquet(self, latest_report: IndexReport) -> Dict[str, str]: latest_filing = pd.read_parquet(os.path.join(latest_report.fullPath, f'{SUB_TXT}.parquet'), filters=[('adsh', '==', latest_report.adsh)]) return latest_filing.iloc[0].to_dict() def get_all_company_reports(self, forms: Optional[List[str]] = None) -> List[IndexReport]: """ gets all reports as IndexReport instances for a company identified by its cik. if forms is not set, all forms are returned, otherwise forms is a list of the forms that should be returned Args: forms (List[str], optional, None): list of the forms to be returned, like ['10-Q', '10-K'] Returns: List[IndexReport]: the list of matching reports as a list of IndexReport instances """ return self.dbaccessor.read_index_reports_for_ciks([self.cik], forms) def get_all_company_reports_df(self, forms: Optional[List[str]] = None) -> pd.DataFrame: """ gets all reports as IndexReport instances for a company identified by its cik. if forms is not set, all forms are returned, otherwise forms is a list of the forms that should be returned Args: forms (List[str], optional, None): list of the forms to be returned, like ['10-Q', '10-K'] Returns: pd.DataFrame: the list of matching reports as a panas Dataframe """ return self.dbaccessor.read_index_reports_for_ciks_df([self.cik], forms)
Static methods
def get_company_index_reader(cik: int, configuration: Optional[Configuration] = None)
-
creates a company instance for the provided cik. If no configuration object is passed, it reads the configuration from the config file.
Args
cik
:int
- the central identification key which is assigned by the sec for every company
configuration
:Configuration
, optional, None
- Optional configuration object
Returns
CompanyIndexReader
- instance of Company Reader
Expand source code
@classmethod def get_company_index_reader(cls, cik: int, configuration: Optional[Configuration] = None): """ creates a company instance for the provided cik. If no configuration object is passed, it reads the configuration from the config file. Args: cik (int): the central identification key which is assigned by the sec for every company configuration (Configuration, optional, None): Optional configuration object Returns: CompanyIndexReader: instance of Company Reader """ if configuration is None: configuration = ConfigurationManager.read_config_file() dbaccessor = ParquetDBIndexingAccessor(db_dir=configuration.db_dir) return CompanyIndexReader(cik, dbaccessor=dbaccessor)
Methods
def get_all_company_reports(self, forms: Optional[List[str]] = None) ‑> List[IndexReport]
-
gets all reports as IndexReport instances for a company identified by its cik. if forms is not set, all forms are returned, otherwise forms is a list of the forms that should be returned
Args
forms
:List[str]
, optional, None
- list of the forms to be returned, like ['10-Q', '10-K']
Returns
List[IndexReport]
- the list of matching reports as a list of IndexReport instances
Expand source code
def get_all_company_reports(self, forms: Optional[List[str]] = None) -> List[IndexReport]: """ gets all reports as IndexReport instances for a company identified by its cik. if forms is not set, all forms are returned, otherwise forms is a list of the forms that should be returned Args: forms (List[str], optional, None): list of the forms to be returned, like ['10-Q', '10-K'] Returns: List[IndexReport]: the list of matching reports as a list of IndexReport instances """ return self.dbaccessor.read_index_reports_for_ciks([self.cik], forms)
def get_all_company_reports_df(self, forms: Optional[List[str]] = None) ‑> pandas.core.frame.DataFrame
-
gets all reports as IndexReport instances for a company identified by its cik. if forms is not set, all forms are returned, otherwise forms is a list of the forms that should be returned
Args
forms
:List[str]
, optional, None
- list of the forms to be returned, like ['10-Q', '10-K']
Returns
pd.DataFrame
- the list of matching reports as a panas Dataframe
Expand source code
def get_all_company_reports_df(self, forms: Optional[List[str]] = None) -> pd.DataFrame: """ gets all reports as IndexReport instances for a company identified by its cik. if forms is not set, all forms are returned, otherwise forms is a list of the forms that should be returned Args: forms (List[str], optional, None): list of the forms to be returned, like ['10-Q', '10-K'] Returns: pd.DataFrame: the list of matching reports as a panas Dataframe """ return self.dbaccessor.read_index_reports_for_ciks_df([self.cik], forms)
def get_latest_company_filing(self) ‑> Dict[str, str]
-
returns the latest company information (the content in the sub.txt file) from the quarter-zip files.
Returns
Dict[str, str]
- dict with the information of the latest
report as present in the sub.txt file.
Expand source code
def get_latest_company_filing(self) -> Dict[str, str]: """ returns the latest company information (the content in the sub.txt file) from the quarter-zip files. Returns: Dict[str, str]: dict with the information of the latest report as present in the sub.txt file. """ latest_report = self.dbaccessor.find_latest_company_report(self.cik) return self._get_latest_company_filing_parquet(latest_report)