from __future__ import annotations
from typing import List
import pandas as pd
from finsets.wrds import wrds_api
mergent
Retrieve and process data from WRDS Mergent FISD
Since this is a proprietary dataset, in the documentation below, I can not show any of the data that is being retrieved/generated (will show only column names).
= 'Wharton Research Data Services (WRDS)'
PROVIDER = 'https://wrds-www.wharton.upenn.edu/pages/get-data/mergent-fixed-income-securities-database-fisd/'
URL = 'fisd'
LIBRARY = 'fisd_mergedissue'
TABLE = 'fisd_mergedissuer'
ISSUER_TABLE = 'D'
FREQ = 1894
MIN_YEAR = None
MAX_YEAR = 'complete_cusip'
ENTITY_ID_IN_RAW_DSET = 'cusip'
ENTITY_ID_IN_CLEAN_DSET = 'offering_date'
TIME_VAR_IN_RAW_DSET = 'offering_date' TIME_VAR_IN_CLEAN_DSET
list_all_vars
list_all_vars ()
Collects names of all available variables from WRDS f{LIBRARY}.{TABLE}
.
= list_all_vars() all_vars
50) all_vars.head(
name | type | wrds_library | wrds_table | |
---|---|---|---|---|
0 | issue_id | DOUBLE_PRECISION | fisd | fisd_mergedissue |
1 | issuer_id | DOUBLE_PRECISION | fisd | fisd_mergedissue |
2 | prospectus_issuer_name | VARCHAR(64) | fisd | fisd_mergedissue |
3 | issuer_cusip | VARCHAR(6) | fisd | fisd_mergedissue |
4 | issue_cusip | VARCHAR(3) | fisd | fisd_mergedissue |
5 | issue_name | VARCHAR(64) | fisd | fisd_mergedissue |
6 | maturity | DATE | fisd | fisd_mergedissue |
7 | security_level | VARCHAR(4) | fisd | fisd_mergedissue |
8 | security_pledge | VARCHAR(4) | fisd | fisd_mergedissue |
9 | enhancement | VARCHAR(1) | fisd | fisd_mergedissue |
10 | coupon_type | VARCHAR(4) | fisd | fisd_mergedissue |
11 | convertible | VARCHAR(1) | fisd | fisd_mergedissue |
12 | mtn | VARCHAR(1) | fisd | fisd_mergedissue |
13 | asset_backed | VARCHAR(1) | fisd | fisd_mergedissue |
14 | yankee | VARCHAR(1) | fisd | fisd_mergedissue |
15 | canadian | VARCHAR(1) | fisd | fisd_mergedissue |
16 | oid | VARCHAR(1) | fisd | fisd_mergedissue |
17 | foreign_currency | VARCHAR(1) | fisd | fisd_mergedissue |
18 | slob | VARCHAR(1) | fisd | fisd_mergedissue |
19 | issue_offered_global | VARCHAR(1) | fisd | fisd_mergedissue |
20 | settlement_type | VARCHAR(4) | fisd | fisd_mergedissue |
21 | gross_spread | DOUBLE_PRECISION | fisd | fisd_mergedissue |
22 | selling_concession | DOUBLE_PRECISION | fisd | fisd_mergedissue |
23 | reallowance | DOUBLE_PRECISION | fisd | fisd_mergedissue |
24 | comp_neg_exch_deal | VARCHAR(4) | fisd | fisd_mergedissue |
25 | rule_415_reg | VARCHAR(1) | fisd | fisd_mergedissue |
26 | sec_reg_type1 | VARCHAR(4) | fisd | fisd_mergedissue |
27 | sec_reg_type2 | VARCHAR(4) | fisd | fisd_mergedissue |
28 | rule_144a | VARCHAR(1) | fisd | fisd_mergedissue |
29 | treasury_spread | DOUBLE_PRECISION | fisd | fisd_mergedissue |
30 | treasury_maturity | VARCHAR(20) | fisd | fisd_mergedissue |
31 | offering_amt | DOUBLE_PRECISION | fisd | fisd_mergedissue |
32 | offering_date | DATE | fisd | fisd_mergedissue |
33 | offering_price | DOUBLE_PRECISION | fisd | fisd_mergedissue |
34 | offering_yield | DOUBLE_PRECISION | fisd | fisd_mergedissue |
35 | delivery_date | DATE | fisd | fisd_mergedissue |
36 | unit_deal | VARCHAR(1) | fisd | fisd_mergedissue |
37 | form_of_own | VARCHAR(4) | fisd | fisd_mergedissue |
38 | denomination | VARCHAR(9) | fisd | fisd_mergedissue |
39 | principal_amt | DOUBLE_PRECISION | fisd | fisd_mergedissue |
40 | covenants | VARCHAR(1) | fisd | fisd_mergedissue |
41 | defeased | VARCHAR(1) | fisd | fisd_mergedissue |
42 | defeasance_type | VARCHAR(4) | fisd | fisd_mergedissue |
43 | defeased_date | DATE | fisd | fisd_mergedissue |
44 | defaulted | VARCHAR(1) | fisd | fisd_mergedissue |
45 | tender_exch_offer | VARCHAR(1) | fisd | fisd_mergedissue |
46 | redeemable | VARCHAR(1) | fisd | fisd_mergedissue |
47 | refund_protection | VARCHAR(1) | fisd | fisd_mergedissue |
48 | refunding_date | DATE | fisd | fisd_mergedissue |
49 | putable | VARCHAR(1) | fisd | fisd_mergedissue |
default_raw_vars
default_raw_vars ()
Defines default variables used in get_raw_data
if none are specified.
print(default_raw_vars())
['offering_date', 'issue_id', 'issuer_id', 'issuer_cusip', 'issue_cusip', 'complete_cusip', 'isin', 'security_level', 'coupon_type', 'convertible', 'foreign_currency', 'rule_144a', 'redeemable', 'bond_type', 'maturity', 'coupon', 'offering_amt', 'offering_price', 'principal_amt', 'defaulted', 'day_count_basis', 'last_interest_date', 'first_interest_date', 'conv_commod_type', 'cusip_name', 'naics_code', 'sic_code', 'treasury_maturity', 'putable', 'country_domicile', 'private_placement', 'asset_backed', 'interest_frequency', 'dated_date']
parse_varlist
parse_varlist (vars:List[str]=None, required_vars=[])
Validates that vars
are available in {LIBRARY}.{TABLE}
table and adds a. prefixes to variable names to feed into an SQL query
parse_varlist()
'a.security_level,a.private_placement,b.naics_code,a.bond_type,a.putable,a.issuer_cusip,a.defaulted,a.interest_frequency,a.principal_amt,a.issue_cusip,a.rule_144a,a.complete_cusip,a.offering_amt,a.treasury_maturity,a.redeemable,a.dated_date,a.coupon,a.foreign_currency,a.maturity,a.issue_id,a.isin,a.conv_commod_type,a.first_interest_date,b.sic_code,a.offering_price,a.coupon_type,a.offering_date,a.last_interest_date,b.country_domicile,b.cusip_name,a.day_count_basis,a.convertible,a.issuer_id,a.asset_backed'
get_raw_data
get_raw_data (vars:List[str]=None, required_vars:List[str]=['offering_date', 'complete_cusip', 'issuer_id'], nrows:int=None, start_date:str=None, end_date:str=None)
Downloads vars
from start_date
to end_date
from WRDS {LIBRARY}.{TABLE}
library
Type | Default | Details | |
---|---|---|---|
vars | List[str] | None | If None, downloads default_raw_vars ; use ’*’ to get all available variables |
required_vars | List[str] | [‘offering_date’, ‘complete_cusip’, ‘issuer_id’] | Will get downloaded, even if not in vars |
nrows | int | None | Number of rows to download. If None, full dataset will be downloaded |
start_date | str | None | Start date in MM/DD/YYYY format |
end_date | str | None | End date in MM/DD/YYYY format |
Returns | pd.DataFrame |
= get_raw_data(start_date='01/01/2022') raw
0) raw.head(
offering_date | complete_cusip | issuer_id | security_level | private_placement | naics_code | bond_type | putable | issuer_cusip | defaulted | ... | first_interest_date | sic_code | offering_price | coupon_type | last_interest_date | country_domicile | cusip_name | day_count_basis | convertible | asset_backed |
---|
0 rows × 34 columns
process_raw_data
process_raw_data (df:pandas.core.frame.DataFrame=None)
Mainly converts variables to categorical type to save memory.
Type | Default | Details | |
---|---|---|---|
df | pd.DataFrame | None | Must contain permno and datadate columns |
Returns | pd.DataFrame |
= process_raw_data(raw) df_clean
0) df_clean.head(
offering_date | cusip | issuer_id | security_level | private_placement | naics_code | bond_type | putable | issuer_cusip | defaulted | ... | first_interest_date | sic_code | offering_price | coupon_type | last_interest_date | country_domicile | cusip_name | day_count_basis | convertible | asset_backed |
---|
0 rows × 34 columns