from __future__ import annotations
from typing import List
import pandas as pd
import numpy as np
import pandasmore as pdm
from finsets.wrds import wrds_api
bondret
Retrieve and process data from WRDS Bond Returns File
Since this is a proprietary dataset, in the documentation below, I can not show any of the data that is being retrieved/generated (will show only column names).
= 'Wharton Research Data Services (WRDS)'
PROVIDER = 'https://wrds-www.wharton.upenn.edu/pages/get-data/wrds-bond-returns/'
URL = 'wrdsapps'
LIBRARY = 'bondret'
TABLE = 'bondcrsp_link'
LINK_TABLE = 'M'
FREQ = 2002
MIN_YEAR = None
MAX_YEAR = 'permno'
ENTITY_ID_IN_RAW_DSET = 'permno'
ENTITY_ID_IN_CLEAN_DSET = 'date'
TIME_VAR_IN_RAW_DSET = f'{FREQ}date' TIME_VAR_IN_CLEAN_DSET
list_all_vars
list_all_vars ()
Collects names of all available variables from WRDS f{LIBRARY}.{TABLE}
.
= list_all_vars() all_vars
all_vars.head()
name | type | wrds_library | wrds_table | |
---|---|---|---|---|
0 | date | DATE | wrdsapps | bondret |
1 | issue_id | DOUBLE_PRECISION | wrdsapps | bondret |
2 | cusip | VARCHAR(9) | wrdsapps | bondret |
3 | bond_sym_id | VARCHAR(14) | wrdsapps | bondret |
4 | bsym | VARCHAR(12) | wrdsapps | bondret |
parse_varlist
parse_varlist (vars:List[str]=None, required_vars=[])
Validates that vars
are available in {LIBRARY}.{TABLE}
table and adds a. prefixes to variable names to feed into an SQL query
parse_varlist()
'a.date,a.issue_id,a.cusip,a.bond_sym_id,a.bsym,a.isin,a.company_symbol,a.bond_type,a.security_level,a.conv,a.offering_date,a.offering_amt,a.offering_price,a.principal_amt,a.maturity,a.treasury_maturity,a.coupon,a.day_count_basis,a.dated_date,a.first_interest_date,a.last_interest_date,a.ncoups,a.amount_outstanding,a.r_sp,a.r_mr,a.r_fr,a.n_sp,a.n_mr,a.n_fr,a.rating_num,a.rating_cat,a.rating_class,a.t_date,a.t_volume,a.t_dvolume,a.t_spread,a.t_yld_pt,a.yield,a.price_eom,a.price_ldm,a.price_l5m,a.gap,a.coupmonth,a.nextcoup,a.coupamt,a.coupacc,a.multicoups,a.ret_eom,a.ret_ldm,a.ret_l5m,a.tmt,a.remcoups,a.duration,a.defaulted,a.default_date,a.default_type,a.reinstated,a.reinstated_date'
get_raw_data
get_raw_data (vars:List[str]='*', required_vars:List[str]=['cusip', 'date'], nrows:int=None, start_date:str=None, end_date:str=None)
Downloads vars
from start_date
to end_date
from WRDS {LIBRARY}.{TABLE}
library and adds PERMNO and PERMCO
Type | Default | Details | |
---|---|---|---|
vars | List[str] | * | Downloads all available variables by defaul |
required_vars | List[str] | [‘cusip’, ‘date’] | list of variables that will get downloaded, even if not in vars |
nrows | int | None | Number of rows to download. If None, full dataset will be downloaded |
start_date | str | None | Start date in MM/DD/YYYY format |
end_date | str | None | End date in MM/DD/YYYY format |
Returns | pd.DataFrame |
= get_raw_data(nrows=1000) raw
0) raw.head(
permno | permco | date | issue_id | cusip | bond_sym_id | bsym | isin | company_symbol | bond_type | ... | ret_ldm | ret_l5m | tmt | remcoups | duration | defaulted | default_date | default_type | reinstated | reinstated_date |
---|
0 rows × 60 columns
process_raw_data
process_raw_data (df:pandas.core.frame.DataFrame=None, clean_kwargs:dict={})
Applies pandasmore.setup_panel
to df
Type | Default | Details | |
---|---|---|---|
df | pd.DataFrame | None | Must contain permno and datadate columns |
clean_kwargs | dict | {} | Params to pass to pdm.setup_panel other than panel_ids , time_var , and freq |
Returns | pd.DataFrame |
= process_raw_data(raw) df_clean
df_clean.head()
date | dtdate | permco | issue_id | cusip | bond_sym_id | bsym | isin | company_symbol | bond_type | ... | ret_ldm | ret_l5m | tmt | remcoups | duration | defaulted | default_date | default_type | reinstated | reinstated_date | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
permno | Mdate | |||||||||||||||||||||
26403 | 2002-07 | 2002-07-31 | 2002-07-31 | 20587 | 383238.0 | 00072AAA8 | DIS.GA | None | US00072AAA88 | DIS | CDEB | ... | NaN | NaN | 6.641667 | 14.0 | 4.797721 | N | None | None | None | None |
2002-08 | 2002-08-31 | 2002-08-31 | 20587 | 383238.0 | 00072AAA8 | DIS.GA | None | US00072AAA88 | DIS | CDEB | ... | NaN | 0.015265 | 6.555556 | 13.0 | 4.928041 | N | None | None | None | None | |
2002-10 | 2002-10-31 | 2002-10-31 | 20587 | 383238.0 | 00072AAA8 | DIS.GA | None | US00072AAA88 | DIS | CDEB | ... | NaN | NaN | 6.386111 | 13.0 | 4.721617 | N | None | None | None | None | |
2002-11 | 2002-11-30 | 2002-11-30 | 20587 | 383238.0 | 00072AAA8 | DIS.GA | None | US00072AAA88 | DIS | CDEB | ... | NaN | 0.062946 | 6.302778 | 13.0 | 4.714069 | N | None | None | None | None | |
2003-02 | 2003-02-28 | 2003-02-28 | 20587 | 383238.0 | 00072AAA8 | DIS.GA | None | US00072AAA88 | DIS | CDEB | ... | NaN | NaN | 6.052778 | 12.0 | 4.673783 | N | None | None | None | None |
5 rows × 60 columns