Module secfsdstools.a_utils.constants

base constant values

Expand source code
"""
base constant values
"""
from typing import Dict

import pyarrow as pa

NUM_TXT = "num.txt"
PRE_TXT = "pre.txt"
SUB_TXT = "sub.txt"
PRE_NUM_TXT = "pre_num.txt"

NUM_COLS = ['adsh', 'tag', 'version', 'coreg', 'ddate', 'qtrs', 'uom',
            'segments', 'value', 'footnote']
PRE_COLS = ['adsh', 'report', 'line', 'stmt', 'inpth', 'rfile',
            'tag', 'version', 'plabel', 'negating']

SUB_COLS = ['adsh', 'form', 'period', 'filed', 'cik']

# period, filed, ddate as float, since period could contain NAs, which are not supported for int

SUB_DTYPE = {'adsh': str,
             'cik': int,
             'name': str,
             'sic': float,  # has to be read as float, since it could be empty in quarterly zips
             'fye': str,
             'form': str,
             'period': float,
             'filed': int,
             'accepted': str,
             'fy': float,  # has to be read as float, since it could be empty in quarterly zips
             'fp': str,
             'aciks': str}

NUM_DTYPE = {'adsh': str,
             'tag': str,
             'version': str,
             'coreg': str,
             'ddate': int,
             'qtrs': int,
             'uom': str,
             'segments': str,
             'value': str,  # daily files can also contain ticker name and stockexchange as strings
             'footnote': str}

PRE_DTYPE = {'adsh': str,
             'report': int,
             'line': float,  # may be nan in some entries
             'stmt': str,
             'inpth': int,
             'tag': str,
             'version': str,
             'negating': int}

# pyarrow schema map
PA_SCHEMA_MAP: Dict[str, pa.Schema] = {
    SUB_TXT: pa.schema([
        ("adsh", pa.string()),
        ('cik', pa.int32()),
        ('name', pa.string()),
        ('sic', pa.float64()),
        ('countryba', pa.string()),
        ('stprba', pa.string()),
        ('cityba', pa.string()),
        ('zipba', pa.string()),
        ('bas1', pa.string()),
        ('bas2', pa.string()),
        ('baph', pa.string()),
        ('countryma', pa.string()),
        ('stprma', pa.string()),
        ('cityma', pa.string()),
        ('zipma', pa.string()),
        ('mas1', pa.string()),
        ('mas2', pa.string()),
        ('countryinc', pa.string()),
        ('stprinc', pa.string()),
        ('ein', pa.float64()),
        ('former', pa.string()),
        ('changed', pa.float64()),
        ('afs', pa.string()),
        ('wksi', pa.int64()),
        ('fye', pa.string()),
        ('form', pa.string()),
        ('period', pa.int32()),
        ('fy', pa.float64()),
        ('fp', pa.string()),
        ('filed', pa.int32()),
        ('accepted', pa.string()),
        ('prevrpt', pa.int64()),
        ('detail', pa.int64()),
        ('instance', pa.string()),
        ('nciks', pa.int64()),
        ('aciks', pa.string()),
    ]),
    PRE_TXT: pa.schema([
        ("adsh", pa.string()),
        ("tag", pa.string()),
        ("version", pa.string()),
        ("report", pa.int32()),
        ("line", pa.int32()),
        ("stmt", pa.string()),
        ("inpth", pa.int32()),
        ("rfile", pa.string()),
        ("plabel", pa.string()),
        ("negating", pa.int32()),
    ]),
    NUM_TXT: pa.schema([
        ("adsh", pa.string()),
        ("tag", pa.string()),
        ("version", pa.string()),
        ("ddate", pa.int32()),
        ("qtrs", pa.int32()),
        ("uom", pa.string()),
        ("segments", pa.string()),
        ("coreg", pa.string()),
        ("value", pa.float64()),
        ("footnote", pa.string()),
    ]),
    PRE_NUM_TXT: pa.schema([
        ("adsh", pa.string()),
        ("tag", pa.string()),
        ("version", pa.string()),
        ("ddate", pa.int32()),
        ("qtrs", pa.int32()),
        ("uom", pa.string()),
        ("segments", pa.string()),
        ("coreg", pa.string()),
        ("value", pa.float64()),
        ("footnote", pa.string()),
        ("report", pa.int32()),
        ("line", pa.int32()),
        ("stmt", pa.string()),
        ("inpth", pa.int32()),
        ("rfile", pa.string()),
        ("plabel", pa.string()),
        ("negating", pa.int32()),
    ])
}