from __future__ import annotations
from typing import List
import pandas as pd
import numpy as np
import pandasmore as pdm
from finsets.wrds import wrds_apibondret
Retrieve and process data from WRDS Bond Returns File
Since this is a proprietary dataset, in the documentation below, I can not show any of the data that is being retrieved/generated (will show only column names).
PROVIDER = 'Wharton Research Data Services (WRDS)'
URL = 'https://wrds-www.wharton.upenn.edu/pages/get-data/wrds-bond-returns/'
LIBRARY = 'wrdsapps'
TABLE = 'bondret'
LINK_TABLE = 'bondcrsp_link'
FREQ = 'M'
MIN_YEAR = 2002
MAX_YEAR = None
ENTITY_ID_IN_RAW_DSET = 'permno'
ENTITY_ID_IN_CLEAN_DSET = 'permno'
TIME_VAR_IN_RAW_DSET = 'date'
TIME_VAR_IN_CLEAN_DSET = f'{FREQ}date'list_all_vars
list_all_vars ()
Collects names of all available variables from WRDS f{LIBRARY}.{TABLE}.
all_vars = list_all_vars()all_vars.head()| name | type | wrds_library | wrds_table | |
|---|---|---|---|---|
| 0 | date | DATE | wrdsapps | bondret |
| 1 | issue_id | DOUBLE_PRECISION | wrdsapps | bondret |
| 2 | cusip | VARCHAR(9) | wrdsapps | bondret |
| 3 | bond_sym_id | VARCHAR(14) | wrdsapps | bondret |
| 4 | bsym | VARCHAR(12) | wrdsapps | bondret |
parse_varlist
parse_varlist (vars:List[str]=None, required_vars=[])
Validates that vars are available in {LIBRARY}.{TABLE} table and adds a. prefixes to variable names to feed into an SQL query
parse_varlist()'a.date,a.issue_id,a.cusip,a.bond_sym_id,a.bsym,a.isin,a.company_symbol,a.bond_type,a.security_level,a.conv,a.offering_date,a.offering_amt,a.offering_price,a.principal_amt,a.maturity,a.treasury_maturity,a.coupon,a.day_count_basis,a.dated_date,a.first_interest_date,a.last_interest_date,a.ncoups,a.amount_outstanding,a.r_sp,a.r_mr,a.r_fr,a.n_sp,a.n_mr,a.n_fr,a.rating_num,a.rating_cat,a.rating_class,a.t_date,a.t_volume,a.t_dvolume,a.t_spread,a.t_yld_pt,a.yield,a.price_eom,a.price_ldm,a.price_l5m,a.gap,a.coupmonth,a.nextcoup,a.coupamt,a.coupacc,a.multicoups,a.ret_eom,a.ret_ldm,a.ret_l5m,a.tmt,a.remcoups,a.duration,a.defaulted,a.default_date,a.default_type,a.reinstated,a.reinstated_date'
get_raw_data
get_raw_data (vars:List[str]='*', required_vars:List[str]=['cusip', 'date'], nrows:int=None, start_date:str=None, end_date:str=None)
Downloads vars from start_date to end_date from WRDS {LIBRARY}.{TABLE} library and adds PERMNO and PERMCO
| Type | Default | Details | |
|---|---|---|---|
| vars | List[str] | * | Downloads all available variables by defaul |
| required_vars | List[str] | [‘cusip’, ‘date’] | list of variables that will get downloaded, even if not in vars |
| nrows | int | None | Number of rows to download. If None, full dataset will be downloaded |
| start_date | str | None | Start date in MM/DD/YYYY format |
| end_date | str | None | End date in MM/DD/YYYY format |
| Returns | pd.DataFrame |
raw = get_raw_data(nrows=1000)raw.head(0)| permno | permco | date | issue_id | cusip | bond_sym_id | bsym | isin | company_symbol | bond_type | ... | ret_ldm | ret_l5m | tmt | remcoups | duration | defaulted | default_date | default_type | reinstated | reinstated_date |
|---|
0 rows × 60 columns
process_raw_data
process_raw_data (df:pandas.core.frame.DataFrame=None, clean_kwargs:dict={})
Applies pandasmore.setup_panel to df
| Type | Default | Details | |
|---|---|---|---|
| df | pd.DataFrame | None | Must contain permno and datadate columns |
| clean_kwargs | dict | {} | Params to pass to pdm.setup_panel other than panel_ids, time_var, and freq |
| Returns | pd.DataFrame |
df_clean = process_raw_data(raw)df_clean.head()| date | dtdate | permco | issue_id | cusip | bond_sym_id | bsym | isin | company_symbol | bond_type | ... | ret_ldm | ret_l5m | tmt | remcoups | duration | defaulted | default_date | default_type | reinstated | reinstated_date | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| permno | Mdate | |||||||||||||||||||||
| 26403 | 2002-07 | 2002-07-31 | 2002-07-31 | 20587 | 383238.0 | 00072AAA8 | DIS.GA | None | US00072AAA88 | DIS | CDEB | ... | NaN | NaN | 6.641667 | 14.0 | 4.797721 | N | None | None | None | None |
| 2002-08 | 2002-08-31 | 2002-08-31 | 20587 | 383238.0 | 00072AAA8 | DIS.GA | None | US00072AAA88 | DIS | CDEB | ... | NaN | 0.015265 | 6.555556 | 13.0 | 4.928041 | N | None | None | None | None | |
| 2002-10 | 2002-10-31 | 2002-10-31 | 20587 | 383238.0 | 00072AAA8 | DIS.GA | None | US00072AAA88 | DIS | CDEB | ... | NaN | NaN | 6.386111 | 13.0 | 4.721617 | N | None | None | None | None | |
| 2002-11 | 2002-11-30 | 2002-11-30 | 20587 | 383238.0 | 00072AAA8 | DIS.GA | None | US00072AAA88 | DIS | CDEB | ... | NaN | 0.062946 | 6.302778 | 13.0 | 4.714069 | N | None | None | None | None | |
| 2003-02 | 2003-02-28 | 2003-02-28 | 20587 | 383238.0 | 00072AAA8 | DIS.GA | None | US00072AAA88 | DIS | CDEB | ... | NaN | NaN | 6.052778 | 12.0 | 4.673783 | N | None | None | None | None |
5 rows × 60 columns