from __future__ import annotations
from typing import List
import pandas as pd
from finsets.wrds import wrds_apimergent
Retrieve and process data from WRDS Mergent FISD
Since this is a proprietary dataset, in the documentation below, I can not show any of the data that is being retrieved/generated (will show only column names).
PROVIDER = 'Wharton Research Data Services (WRDS)'
URL = 'https://wrds-www.wharton.upenn.edu/pages/get-data/mergent-fixed-income-securities-database-fisd/'
LIBRARY = 'fisd'
TABLE = 'fisd_mergedissue'
ISSUER_TABLE = 'fisd_mergedissuer'
FREQ = 'D'
MIN_YEAR = 1894
MAX_YEAR = None
ENTITY_ID_IN_RAW_DSET = 'complete_cusip'
ENTITY_ID_IN_CLEAN_DSET = 'cusip'
TIME_VAR_IN_RAW_DSET = 'offering_date'
TIME_VAR_IN_CLEAN_DSET = 'offering_date'list_all_vars
list_all_vars ()
Collects names of all available variables from WRDS f{LIBRARY}.{TABLE}.
all_vars = list_all_vars()all_vars.head(50)| name | type | wrds_library | wrds_table | |
|---|---|---|---|---|
| 0 | issue_id | DOUBLE_PRECISION | fisd | fisd_mergedissue |
| 1 | issuer_id | DOUBLE_PRECISION | fisd | fisd_mergedissue |
| 2 | prospectus_issuer_name | VARCHAR(64) | fisd | fisd_mergedissue |
| 3 | issuer_cusip | VARCHAR(6) | fisd | fisd_mergedissue |
| 4 | issue_cusip | VARCHAR(3) | fisd | fisd_mergedissue |
| 5 | issue_name | VARCHAR(64) | fisd | fisd_mergedissue |
| 6 | maturity | DATE | fisd | fisd_mergedissue |
| 7 | security_level | VARCHAR(4) | fisd | fisd_mergedissue |
| 8 | security_pledge | VARCHAR(4) | fisd | fisd_mergedissue |
| 9 | enhancement | VARCHAR(1) | fisd | fisd_mergedissue |
| 10 | coupon_type | VARCHAR(4) | fisd | fisd_mergedissue |
| 11 | convertible | VARCHAR(1) | fisd | fisd_mergedissue |
| 12 | mtn | VARCHAR(1) | fisd | fisd_mergedissue |
| 13 | asset_backed | VARCHAR(1) | fisd | fisd_mergedissue |
| 14 | yankee | VARCHAR(1) | fisd | fisd_mergedissue |
| 15 | canadian | VARCHAR(1) | fisd | fisd_mergedissue |
| 16 | oid | VARCHAR(1) | fisd | fisd_mergedissue |
| 17 | foreign_currency | VARCHAR(1) | fisd | fisd_mergedissue |
| 18 | slob | VARCHAR(1) | fisd | fisd_mergedissue |
| 19 | issue_offered_global | VARCHAR(1) | fisd | fisd_mergedissue |
| 20 | settlement_type | VARCHAR(4) | fisd | fisd_mergedissue |
| 21 | gross_spread | DOUBLE_PRECISION | fisd | fisd_mergedissue |
| 22 | selling_concession | DOUBLE_PRECISION | fisd | fisd_mergedissue |
| 23 | reallowance | DOUBLE_PRECISION | fisd | fisd_mergedissue |
| 24 | comp_neg_exch_deal | VARCHAR(4) | fisd | fisd_mergedissue |
| 25 | rule_415_reg | VARCHAR(1) | fisd | fisd_mergedissue |
| 26 | sec_reg_type1 | VARCHAR(4) | fisd | fisd_mergedissue |
| 27 | sec_reg_type2 | VARCHAR(4) | fisd | fisd_mergedissue |
| 28 | rule_144a | VARCHAR(1) | fisd | fisd_mergedissue |
| 29 | treasury_spread | DOUBLE_PRECISION | fisd | fisd_mergedissue |
| 30 | treasury_maturity | VARCHAR(20) | fisd | fisd_mergedissue |
| 31 | offering_amt | DOUBLE_PRECISION | fisd | fisd_mergedissue |
| 32 | offering_date | DATE | fisd | fisd_mergedissue |
| 33 | offering_price | DOUBLE_PRECISION | fisd | fisd_mergedissue |
| 34 | offering_yield | DOUBLE_PRECISION | fisd | fisd_mergedissue |
| 35 | delivery_date | DATE | fisd | fisd_mergedissue |
| 36 | unit_deal | VARCHAR(1) | fisd | fisd_mergedissue |
| 37 | form_of_own | VARCHAR(4) | fisd | fisd_mergedissue |
| 38 | denomination | VARCHAR(9) | fisd | fisd_mergedissue |
| 39 | principal_amt | DOUBLE_PRECISION | fisd | fisd_mergedissue |
| 40 | covenants | VARCHAR(1) | fisd | fisd_mergedissue |
| 41 | defeased | VARCHAR(1) | fisd | fisd_mergedissue |
| 42 | defeasance_type | VARCHAR(4) | fisd | fisd_mergedissue |
| 43 | defeased_date | DATE | fisd | fisd_mergedissue |
| 44 | defaulted | VARCHAR(1) | fisd | fisd_mergedissue |
| 45 | tender_exch_offer | VARCHAR(1) | fisd | fisd_mergedissue |
| 46 | redeemable | VARCHAR(1) | fisd | fisd_mergedissue |
| 47 | refund_protection | VARCHAR(1) | fisd | fisd_mergedissue |
| 48 | refunding_date | DATE | fisd | fisd_mergedissue |
| 49 | putable | VARCHAR(1) | fisd | fisd_mergedissue |
default_raw_vars
default_raw_vars ()
Defines default variables used in get_raw_data if none are specified.
print(default_raw_vars())['offering_date', 'issue_id', 'issuer_id', 'issuer_cusip', 'issue_cusip', 'complete_cusip', 'isin', 'security_level', 'coupon_type', 'convertible', 'foreign_currency', 'rule_144a', 'redeemable', 'bond_type', 'maturity', 'coupon', 'offering_amt', 'offering_price', 'principal_amt', 'defaulted', 'day_count_basis', 'last_interest_date', 'first_interest_date', 'conv_commod_type', 'cusip_name', 'naics_code', 'sic_code', 'treasury_maturity', 'putable', 'country_domicile', 'private_placement', 'asset_backed', 'interest_frequency', 'dated_date']
parse_varlist
parse_varlist (vars:List[str]=None, required_vars=[])
Validates that vars are available in {LIBRARY}.{TABLE} table and adds a. prefixes to variable names to feed into an SQL query
parse_varlist()'a.security_level,a.private_placement,b.naics_code,a.bond_type,a.putable,a.issuer_cusip,a.defaulted,a.interest_frequency,a.principal_amt,a.issue_cusip,a.rule_144a,a.complete_cusip,a.offering_amt,a.treasury_maturity,a.redeemable,a.dated_date,a.coupon,a.foreign_currency,a.maturity,a.issue_id,a.isin,a.conv_commod_type,a.first_interest_date,b.sic_code,a.offering_price,a.coupon_type,a.offering_date,a.last_interest_date,b.country_domicile,b.cusip_name,a.day_count_basis,a.convertible,a.issuer_id,a.asset_backed'
get_raw_data
get_raw_data (vars:List[str]=None, required_vars:List[str]=['offering_date', 'complete_cusip', 'issuer_id'], nrows:int=None, start_date:str=None, end_date:str=None)
Downloads vars from start_date to end_date from WRDS {LIBRARY}.{TABLE} library
| Type | Default | Details | |
|---|---|---|---|
| vars | List[str] | None | If None, downloads default_raw_vars; use ’*’ to get all available variables |
| required_vars | List[str] | [‘offering_date’, ‘complete_cusip’, ‘issuer_id’] | Will get downloaded, even if not in vars |
| nrows | int | None | Number of rows to download. If None, full dataset will be downloaded |
| start_date | str | None | Start date in MM/DD/YYYY format |
| end_date | str | None | End date in MM/DD/YYYY format |
| Returns | pd.DataFrame |
raw = get_raw_data(start_date='01/01/2022')raw.head(0)| offering_date | complete_cusip | issuer_id | security_level | private_placement | naics_code | bond_type | putable | issuer_cusip | defaulted | ... | first_interest_date | sic_code | offering_price | coupon_type | last_interest_date | country_domicile | cusip_name | day_count_basis | convertible | asset_backed |
|---|
0 rows × 34 columns
process_raw_data
process_raw_data (df:pandas.core.frame.DataFrame=None)
Mainly converts variables to categorical type to save memory.
| Type | Default | Details | |
|---|---|---|---|
| df | pd.DataFrame | None | Must contain permno and datadate columns |
| Returns | pd.DataFrame |
df_clean = process_raw_data(raw)df_clean.head(0)| offering_date | cusip | issuer_id | security_level | private_placement | naics_code | bond_type | putable | issuer_cusip | defaulted | ... | first_interest_date | sic_code | offering_price | coupon_type | last_interest_date | country_domicile | cusip_name | day_count_basis | convertible | asset_backed |
|---|
0 rows × 34 columns