from __future__ import annotations
from typing import List
import pandas as pd
import numpy as np
import pandasmore as pdm
from finsets.wrds import wrds_api
Peters and Taylor (2016)
Total Q, and intangible capital measures from Peters and Taylor (2016)
= 'Wharton Research Data Services (WRDS)'
PROVIDER = 'https://wrds-www.wharton.upenn.edu/pages/get-data/peters-and-taylor-total-q/peters-and-taylor-total-q/'
URL = 'totalq'
LIBRARY = 'total_q'
TABLE = 'crsp'
LINK_LIBRARY = 'ccmxpf_lnkhist'
LINK_TABLE = 'A'
FREQ = 1950
MIN_YEAR = None
MAX_YEAR = 'permno'
ENTITY_ID_IN_RAW_DSET = 'permno'
ENTITY_ID_IN_CLEAN_DSET = 'datadate'
TIME_VAR_IN_RAW_DSET = f'{FREQ}date' TIME_VAR_IN_CLEAN_DSET
list_all_vars
list_all_vars ()
Collects names of all available variables from WRDS f{LIBRARY}.{TABLE}
and {LIBRARY}.{COMPANY_TABLE}
.
= list_all_vars() all_vars
all_vars
name | type | wrds_library | wrds_table | |
---|---|---|---|---|
0 | gvkey | VARCHAR(6) | totalq | total_q |
1 | datadate | DATE | totalq | total_q |
2 | fyear | DOUBLE_PRECISION | totalq | total_q |
3 | k_int_know | DOUBLE_PRECISION | totalq | total_q |
4 | k_int_org | DOUBLE_PRECISION | totalq | total_q |
5 | k_int_offbs | DOUBLE_PRECISION | totalq | total_q |
6 | k_int | DOUBLE_PRECISION | totalq | total_q |
7 | q_tot | DOUBLE_PRECISION | totalq | total_q |
get_raw_data
get_raw_data (vars:List[str]='*', required_vars:List[str]=['gvkey', 'datadate'], nrows:int=None, start_date:str=None, end_date:str=None)
Downloads vars
from start_date
to end_date
from WRDS {LIBRARY}.{TABLE}
library and adds PERMNO and PERMCO as in CCM
Type | Default | Details | |
---|---|---|---|
vars | List[str] | * | Default is to get all available variables |
required_vars | List[str] | [‘gvkey’, ‘datadate’] | list of variables that will get downloaded, even if not in vars |
nrows | int | None | Number of rows to download. If None, full dataset will be downloaded |
start_date | str | None | Start date in MM/DD/YYYY format |
end_date | str | None | End date in MM/DD/YYYY format |
Returns | pd.DataFrame |
= get_raw_data(vars='*')
raw raw.head()
permno | permco | liid | linkprim | gvkey | datadate | fyear | k_int_know | k_int_org | k_int_offbs | ... | q_tot | xrd | xsga | cogs | rdip | at | capx | ppegt | ppent | dp | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 25881.0 | 23369.0 | 01 | P | 001000 | 1970-12-31 | 1970.0 | 0.0 | 5.328974 | 5.328974 | ... | 0.817259 | NaN | 9.420 | 30.529 | NaN | 33.450 | 2.767 | 14.517 | 8.876 | 1.352 |
1 | 25881.0 | 23369.0 | 01 | P | 001000 | 1971-12-31 | 1971.0 | 0.0 | 7.607690 | 7.607690 | ... | 0.508109 | NaN | 10.548 | 33.973 | NaN | 29.330 | 1.771 | 13.269 | 7.639 | 1.399 |
2 | 25881.0 | 23369.0 | 01 | P | 001000 | 1971-12-31 | 1971.0 | 0.0 | 7.607690 | 7.607690 | ... | 0.508109 | NaN | NaN | 23.380 | NaN | NaN | NaN | NaN | NaN | 0.868 |
3 | 25881.0 | 23369.0 | 01 | P | 001000 | 1972-12-31 | 1972.0 | 0.0 | 8.550650 | 8.550650 | ... | 0.516247 | NaN | 7.551 | 22.702 | NaN | 19.907 | 1.254 | 11.709 | 7.013 | 1.200 |
4 | 25881.0 | 23369.0 | 01 | P | 001000 | 1972-12-31 | 1972.0 | 0.0 | 8.550650 | 8.550650 | ... | 0.516247 | NaN | NaN | 21.444 | NaN | NaN | NaN | NaN | NaN | 0.933 |
5 rows × 21 columns
process_raw_data
process_raw_data (df:pandas.core.frame.DataFrame=None, clean_kwargs:dict={})
Applies pandasmore.setup_panel
to df
Type | Default | Details | |
---|---|---|---|
df | pd.DataFrame | None | Must contain permno and datadate columns |
clean_kwargs | dict | {} | Params to pass to pdm.setup_panel other than panel_ids , time_var , and freq |
Returns | pd.DataFrame |
= process_raw_data(raw) df_clean
features
features (df:pandas.core.frame.DataFrame=None)
= features(df_clean) ftrs
'i2k_tot','i2k_phy','i2k_int']]).describe() pdm.wins(ftrs[[
i2k_tot | i2k_phy | i2k_int | |
---|---|---|---|
count | 175623.000000 | 175623.000000 | 203412.000000 |
mean | 0.245097 | 0.119157 | 0.122255 |
std | 0.225062 | 0.164408 | 0.124075 |
min | 0.000340 | 0.000000 | 0.000000 |
25% | 0.115130 | 0.028412 | 0.025475 |
50% | 0.191825 | 0.068188 | 0.100432 |
75% | 0.294431 | 0.139660 | 0.172285 |
max | 1.453925 | 1.066099 | 0.686001 |