mergent

Retrieve and process data from WRDS Mergent FISD

Since this is a proprietary dataset, in the documentation below, I can not show any of the data that is being retrieved/generated (will show only column names).

from __future__ import annotations
from typing import List

import pandas as pd

from finsets.wrds import wrds_api

PROVIDER = 'Wharton Research Data Services (WRDS)'
URL = 'https://wrds-www.wharton.upenn.edu/pages/get-data/mergent-fixed-income-securities-database-fisd/'
LIBRARY = 'fisd'
TABLE = 'fisd_mergedissue'
ISSUER_TABLE = 'fisd_mergedissuer'
FREQ = 'D'
MIN_YEAR = 1894
MAX_YEAR = None
ENTITY_ID_IN_RAW_DSET = 'complete_cusip'
ENTITY_ID_IN_CLEAN_DSET = 'cusip'
TIME_VAR_IN_RAW_DSET = 'offering_date'
TIME_VAR_IN_CLEAN_DSET = 'offering_date'

source

list_all_vars

 list_all_vars ()

Collects names of all available variables from WRDS f{LIBRARY}.{TABLE}.

all_vars = list_all_vars()

all_vars.head(50)

	name	type	wrds_library	wrds_table
0	issue_id	DOUBLE_PRECISION	fisd	fisd_mergedissue
1	issuer_id	DOUBLE_PRECISION	fisd	fisd_mergedissue
2	prospectus_issuer_name	VARCHAR(64)	fisd	fisd_mergedissue
3	issuer_cusip	VARCHAR(6)	fisd	fisd_mergedissue
4	issue_cusip	VARCHAR(3)	fisd	fisd_mergedissue
5	issue_name	VARCHAR(64)	fisd	fisd_mergedissue
6	maturity	DATE	fisd	fisd_mergedissue
7	security_level	VARCHAR(4)	fisd	fisd_mergedissue
8	security_pledge	VARCHAR(4)	fisd	fisd_mergedissue
9	enhancement	VARCHAR(1)	fisd	fisd_mergedissue
10	coupon_type	VARCHAR(4)	fisd	fisd_mergedissue
11	convertible	VARCHAR(1)	fisd	fisd_mergedissue
12	mtn	VARCHAR(1)	fisd	fisd_mergedissue
13	asset_backed	VARCHAR(1)	fisd	fisd_mergedissue
14	yankee	VARCHAR(1)	fisd	fisd_mergedissue
15	canadian	VARCHAR(1)	fisd	fisd_mergedissue
16	oid	VARCHAR(1)	fisd	fisd_mergedissue
17	foreign_currency	VARCHAR(1)	fisd	fisd_mergedissue
18	slob	VARCHAR(1)	fisd	fisd_mergedissue
19	issue_offered_global	VARCHAR(1)	fisd	fisd_mergedissue
20	settlement_type	VARCHAR(4)	fisd	fisd_mergedissue
21	gross_spread	DOUBLE_PRECISION	fisd	fisd_mergedissue
22	selling_concession	DOUBLE_PRECISION	fisd	fisd_mergedissue
23	reallowance	DOUBLE_PRECISION	fisd	fisd_mergedissue
24	comp_neg_exch_deal	VARCHAR(4)	fisd	fisd_mergedissue
25	rule_415_reg	VARCHAR(1)	fisd	fisd_mergedissue
26	sec_reg_type1	VARCHAR(4)	fisd	fisd_mergedissue
27	sec_reg_type2	VARCHAR(4)	fisd	fisd_mergedissue
28	rule_144a	VARCHAR(1)	fisd	fisd_mergedissue
29	treasury_spread	DOUBLE_PRECISION	fisd	fisd_mergedissue
30	treasury_maturity	VARCHAR(20)	fisd	fisd_mergedissue
31	offering_amt	DOUBLE_PRECISION	fisd	fisd_mergedissue
32	offering_date	DATE	fisd	fisd_mergedissue
33	offering_price	DOUBLE_PRECISION	fisd	fisd_mergedissue
34	offering_yield	DOUBLE_PRECISION	fisd	fisd_mergedissue
35	delivery_date	DATE	fisd	fisd_mergedissue
36	unit_deal	VARCHAR(1)	fisd	fisd_mergedissue
37	form_of_own	VARCHAR(4)	fisd	fisd_mergedissue
38	denomination	VARCHAR(9)	fisd	fisd_mergedissue
39	principal_amt	DOUBLE_PRECISION	fisd	fisd_mergedissue
40	covenants	VARCHAR(1)	fisd	fisd_mergedissue
41	defeased	VARCHAR(1)	fisd	fisd_mergedissue
42	defeasance_type	VARCHAR(4)	fisd	fisd_mergedissue
43	defeased_date	DATE	fisd	fisd_mergedissue
44	defaulted	VARCHAR(1)	fisd	fisd_mergedissue
45	tender_exch_offer	VARCHAR(1)	fisd	fisd_mergedissue
46	redeemable	VARCHAR(1)	fisd	fisd_mergedissue
47	refund_protection	VARCHAR(1)	fisd	fisd_mergedissue
48	refunding_date	DATE	fisd	fisd_mergedissue
49	putable	VARCHAR(1)	fisd	fisd_mergedissue

source

default_raw_vars

 default_raw_vars ()

Defines default variables used in get_raw_data if none are specified.

print(default_raw_vars())

['offering_date', 'issue_id', 'issuer_id', 'issuer_cusip', 'issue_cusip', 'complete_cusip', 'isin', 'security_level', 'coupon_type', 'convertible', 'foreign_currency', 'rule_144a', 'redeemable', 'bond_type', 'maturity', 'coupon', 'offering_amt', 'offering_price', 'principal_amt', 'defaulted', 'day_count_basis', 'last_interest_date', 'first_interest_date', 'conv_commod_type', 'cusip_name', 'naics_code', 'sic_code', 'treasury_maturity', 'putable', 'country_domicile', 'private_placement', 'asset_backed', 'interest_frequency', 'dated_date']

source

parse_varlist

 parse_varlist (vars:List[str]=None, required_vars=[])

Validates that vars are available in {LIBRARY}.{TABLE} table and adds a. prefixes to variable names to feed into an SQL query

parse_varlist()

'a.security_level,a.private_placement,b.naics_code,a.bond_type,a.putable,a.issuer_cusip,a.defaulted,a.interest_frequency,a.principal_amt,a.issue_cusip,a.rule_144a,a.complete_cusip,a.offering_amt,a.treasury_maturity,a.redeemable,a.dated_date,a.coupon,a.foreign_currency,a.maturity,a.issue_id,a.isin,a.conv_commod_type,a.first_interest_date,b.sic_code,a.offering_price,a.coupon_type,a.offering_date,a.last_interest_date,b.country_domicile,b.cusip_name,a.day_count_basis,a.convertible,a.issuer_id,a.asset_backed'

source

get_raw_data

 get_raw_data (vars:List[str]=None,
               required_vars:List[str]=['offering_date', 'complete_cusip',
               'issuer_id'], nrows:int=None, start_date:str=None,
               end_date:str=None)

Downloads vars from start_date to end_date from WRDS {LIBRARY}.{TABLE} library

	Type	Default	Details
vars	List[str]	None	If None, downloads `default_raw_vars`; use ’*’ to get all available variables
required_vars	List[str]	[‘offering_date’, ‘complete_cusip’, ‘issuer_id’]	Will get downloaded, even if not in `vars`
nrows	int	None	Number of rows to download. If None, full dataset will be downloaded
start_date	str	None	Start date in MM/DD/YYYY format
end_date	str	None	End date in MM/DD/YYYY format
Returns	pd.DataFrame

raw = get_raw_data(start_date='01/01/2022')

raw.head(0)

	offering_date	complete_cusip	issuer_id	security_level	private_placement	naics_code	bond_type	putable	issuer_cusip	defaulted	...	first_interest_date	sic_code	offering_price	coupon_type	last_interest_date	country_domicile	cusip_name	day_count_basis	convertible	asset_backed

0 rows × 34 columns

source

process_raw_data

 process_raw_data (df:pandas.core.frame.DataFrame=None)

Mainly converts variables to categorical type to save memory.

	Type	Default	Details
df	pd.DataFrame	None	Must contain `permno` and `datadate` columns
Returns	pd.DataFrame

df_clean = process_raw_data(raw)

df_clean.head(0)

	offering_date	cusip	issuer_id	security_level	private_placement	naics_code	bond_type	putable	issuer_cusip	defaulted	...	first_interest_date	sic_code	offering_price	coupon_type	last_interest_date	country_domicile	cusip_name	day_count_basis	convertible	asset_backed

0 rows × 34 columns