core

Main functions of estout package

Short descriptions for main functions (see below for more details):

collect_stats: extracts a given set of attributes from results object generated by stats packages
to_df: takes a list of collect_stats outputs and merges them as separate columns in a pandas DataFrame
to_tex: takes one or more DataFrames and creates tex code to build table with each DataFrame as a different panel
to_pdf: takes one or more tex tables (either as strings or paths to tex files) and merges them in a pdf document

Exported source

from __future__ import annotations
from pathlib import Path 
from typing import List, Dict, Literal, Union, Callable
import importlib

import numpy as np
import pandas as pd
import statsmodels.api as sm
from linearmodels import PanelOLS

from estout import utils

Set up an example dataset and run a few regressions to showcase the functions in this module.

np.random.seed(123)
df = pd.DataFrame(np.random.rand(9,3), 
                  columns=['y','x','z'],
                  index = pd.MultiIndex.from_product([[1,2,3],[1,2,3]], names=['firmid','time'])
                  ).assign(cons = 1)
sm1 = sm.OLS(df['y'], df[['cons','x']]).fit()
sm2 = sm.OLS(df['y'], df[['cons','x','z']]).fit().get_robustcov_results(cov_type='HAC', maxlags=2)
lmres = PanelOLS(df['y'],  df[['cons','x','z']], entity_effects=True
                 ).fit(cov_type='clustered', cluster_entity=True)

source

collect_stats

 collect_stats (res, get_default_stats=True, add_stats:dict=None,
                add_literals:dict=None)

Collects stats from res object. Stats in add_stats can override default stats

	Type	Default	Details
res			Results object to extract stats from
get_default_stats	bool	True	If True, returns all stats implemented by the `f'{package}_results'` module
add_stats	dict	None	Keys are stats to extract in addition to the default ones; values are attributes of `res` or callables
add_literals	dict	None	Additional info to be added to output dict; values must be scalars
Returns	dict

stats1 = collect_stats(sm1)
stats2 = collect_stats(sm2, add_literals={'Cov Type': 'Newey West'})
stats3 = collect_stats(lmres, add_stats={'r2b': 'rsquared_between', 'FE':'included_effects'})
stats4 = collect_stats(sm2, add_stats={'Max Eigen': lambda x: x.eigenvals.max()})

stats1

{'package': 'statsmodels',
 'ynames': ['y'],
 'xnames': ['cons', 'x'],
 'params': cons    0.507852
 x       0.345003
 dtype: float64,
 'tstats': cons    3.905440
 x       1.292246
 dtype: float64,
 'pvalues': cons    0.005858
 x       0.237293
 dtype: float64,
 'covmat':           cons         x
 cons  0.016910 -0.030531
 x    -0.030531  0.071278,
 'se': cons    0.130037
 x       0.266979
 dtype: float64,
 'nobs': 9,
 'r2': np.float64(0.19260886185799475)}

assert stats2['Cov Type'] == 'Newey West'

stats3['r2b']

0.7954933715233719

stats3['FE']

['Entity']

source

to_df

 to_df (res_list:List[dict], which_xvars:list=None,
        stats_body:list=['params', 'tstats'], stats_bottom:list=['r2',
        'nobs'], labels:dict=None, add_formats:dict=None)

Combines results from multiple collect_stats() outputs into a single pd.DataFrame

	Type	Default	Details
res_list	List		List of outputs from `collect_stats()`
which_xvars	list	None	If None, report all xvars
stats_body	list	[‘params’, ‘tstats’]	Each element of `res_list` needs to have these stats as keys; values must be pd.Series
stats_bottom	list	[‘r2’, ‘nobs’]	Each element of `res_list` needs to have these stats as keys; values must be scalars
labels	dict	None
add_formats	dict	None
Returns	DataFrame

d = to_df(res_list=[stats1, stats2, stats1, stats3], 
          which_xvars=['cons','x','z'], 
          add_formats={'r2':'{:.2f}'},
          labels={'nobs':'Observations', 'r2':'$R^2$'})
d

		0	1	2	3
cons	params	0.51***	0.70***	0.51***	0.73***
	tstats	(3.91)	(21.48)	(3.91)	(167.36)
x	params	0.35	0.57**	0.35	0.64*
	tstats	(1.29)	(2.85)	(1.29)	(2.26)
z	params		-0.64**		-0.77**
	tstats		(-3.55)		(-2.91)
$R^2$		0.19	0.49	0.19	0.35
Observations		9	9	9	9

Note how we used a LaTex-friendly label for the $R^2$ coefficient so we don’t have to re-label this row before we send it to LaTex.

source

to_tex

 to_tex (dfs:Union[pandas.core.frame.DataFrame,List[pandas.core.frame.Data
         Frame]], outfile:pathlib.Path|str=None, title:str='Table title',
         notes:str='Table description', notes_on_top:bool=True,
         label:str='',
         table_type:Literal['table','sidewaystable']='table',
         font_size:str='\\footnotesize', addtocounter:int=0,
         panel_title:List[str]=None, palign:Literal['l','r','c']='l',
         col_groups:List[dict]=None,
         col_names:List[Union[list,bool]]=True,
         hlines:List[List[int]]=None, tabular_env:str='tabular*')

Create tex code to generate table from one or more dataframes

	Type	Default	Details
dfs	Union		DataFrame(s) to be converted to tex table; if multiple, they will be panels in a larger table
outfile	pathlib.Path \| str	None	Where to save resulting tex output
title	str	Table title	Table title
notes	str	Table description	Some call this the table caption
notes_on_top	bool	True	Set to False if you want table description (caption) to be at the bottom
label	str		Table label (for referencing within LaTex document)
table_type	Literal	table
font_size	str		Gets applied to the table contents as well as its caption
addtocounter	int	0	Set to -1 for tables that are just a continuation of a table on a new page
panel_title	List	None	One element in the list for each dataframe in `dfs`
palign	Literal	l	Alignment of panel title
col_groups	List	None	Keys are group names; values are lists of consecutive indices of columns in the group
col_names	List	True	If False, none; if True, use df column names; if list, gives custom column names
hlines	List	None	Row indices under which to place hline
tabular_env	str	tabular*	LaTex tabular environment
Returns	str

tbl = to_tex([d,d], panel_title=['Panel A: Some title', 'Panel B: Some title'], 
               col_groups=[{'Group1':[1,2]}]*2,
               col_names=[['Model 1', 'Model 2', 'Model 3', 'Model 4']]*2,
               hlines=[[0,1,4,12], [1,4,12]] )

source

to_pdf

 to_pdf (outfile:str, table_tex_code:Union[str,pathlib.Path,List[Union[str
         ,pathlib.Path]]]=None,
         article_spec='\\documentclass[11pt]{article}',
         captionsetup='format=plain, labelsep=newline, labelfont = bf,
         justification=centering', make_pdf:bool=True,
         open_pdf:bool=False)

Creates PDF with one or more tables given their tex code (from to \end{table})

	Type	Default	Details
outfile	str		Path to .tex file where combined tables are saved (must contain .tex extension)
table_tex_code	Union	None	String(s) or Paths to files containing table tex code (e.g. like outputs of to_tex())
article_spec	str
captionsetup	str	format=plain, labelsep=newline, labelfont = bf, justification=centering
make_pdf	bool	True
open_pdf	bool	False

For the test below to work, you need to have TexLive installed (and change the path below to a valid path on your system).

to_pdf('../_outputs/paper.tex', table_tex_code=[tbl, tbl])

PDF creation successful!