def download(sql_string: str=None,
=None # Params cited in the `sql_string`
params: Sequence-> pd.DataFrame:
) """Downloads data from WRDS using the given PostgreSQL `sql_string`"""
try:
= Connection()
db = db.raw_sql(sql=sql_string, params=params)
df except Exception as err:
raise err
finally:
db.close()
return df
wrds_api
wrds
package: https://github.com/wharton/wrds
This is a fork of version 3.1.6 of the wrds
package. The fork allows users to store their WRDS password as environment variable WRDS_PASS
(e.g. in GitHub Actions secrets). This facilitates testing in the CI pipeline without having to create a pgpass
file on a remote server. For local use, the preferred method is to use a pgpass
file, not an environment variable with the WRDS password.
When establishing a connection with WRDS database, Connection
will first look for the WRDS_PASS
environment variable. If it can’t find it, it then looks for a pgpass
file. If it can’t find that either, it prompts the user for a password.
Also added a download
function which is a wrapper around the Connection().raw_sql
method that manages the connection with WRDS for you (using a context manager).
Connection
Connection (autoconnect=True, verbose=False, **kwargs)
Set up the connection to the WRDS database. By default, also establish the connection to the database.
Optionally, the user may specify connection parameters: wrds_hostname: WRDS database hostname wrds_port: database connection port number wrds_dbname: WRDS database name wrds_username: WRDS username autoconnect: If false will not immediately establish the connection
The constructor will use the .pgpass file if it exists and may make use of PostgreSQL environment variables such as PGHOST, PGUSER, etc., if cooresponding parameters are not set. If not, it will ask the user for a username and password. It will also direct the user to information on setting up .pgpass.
Additionally, creating the instance will load a list of schemas the user has permission to access.
:return: None
Usage:: >>> db = wrds.Connection() Loading library list… Done
::: {.callout-caution} The context manager does not seem to close connections properly. To be safe, don’t use it as below:
with Connection(wrds_username = wrds_username) as db:
= db.raw_sql(sql=sql_string, params=params) df
download
download (sql_string:str=None, params:Sequence=None)
Downloads data from WRDS using the given PostgreSQL sql_string
Type | Default | Details | |
---|---|---|---|
sql_string | str | None | |
params | typing.Sequence | None | Params cited in the sql_string |
Returns | DataFrame |
This function will prompt the user for their WRDS password, unless a WRDS_PASS
environment variable is set up (not recommended unless it’s in an encrypted location like a GitHub Secret) or unless a pgpass
file is set up (recommended).
"SELECT * from ff.factors_monthly") download(
date | mktrf | smb | hml | rf | year | month | umd | dateff | |
---|---|---|---|---|---|---|---|---|---|
0 | 1926-07-01 | 0.0296 | -0.0256 | -0.0243 | 0.0022 | 1926.0 | 7.0 | NaN | 1926-07-31 |
1 | 1926-08-01 | 0.0264 | -0.0117 | 0.0382 | 0.0025 | 1926.0 | 8.0 | NaN | 1926-08-31 |
2 | 1926-09-01 | 0.0036 | -0.0140 | 0.0013 | 0.0023 | 1926.0 | 9.0 | NaN | 1926-09-30 |
3 | 1926-10-01 | -0.0324 | -0.0009 | 0.0070 | 0.0032 | 1926.0 | 10.0 | NaN | 1926-10-30 |
4 | 1926-11-01 | 0.0253 | -0.0010 | -0.0051 | 0.0031 | 1926.0 | 11.0 | NaN | 1926-11-30 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1162 | 2023-05-01 | 0.0035 | 0.0161 | -0.0772 | 0.0036 | 2023.0 | 5.0 | -0.0063 | 2023-05-31 |
1163 | 2023-06-01 | 0.0646 | 0.0154 | -0.0026 | 0.0040 | 2023.0 | 6.0 | -0.0237 | 2023-06-30 |
1164 | 2023-07-01 | 0.0321 | 0.0208 | 0.0411 | 0.0045 | 2023.0 | 7.0 | -0.0398 | 2023-07-31 |
1165 | 2023-08-01 | -0.0239 | -0.0316 | -0.0106 | 0.0045 | 2023.0 | 8.0 | 0.0377 | 2023-08-31 |
1166 | 2023-09-01 | -0.0524 | -0.0251 | 0.0152 | 0.0043 | 2023.0 | 9.0 | 0.0024 | 2023-09-29 |
1167 rows × 9 columns
Examples of useful features of the Connection
class
#db = Connection(wrds_username=os.getenv('WRDS_USERNAME'))
= Connection() db
5] db.list_libraries()[:
['aha_sample', 'audit', 'audit_acct_os', 'audit_audit_comp', 'audit_common']
='crsp')[:5] db.list_tables(library
['acti', 'asia', 'asib', 'asic', 'asio']
='comp',table='funda').iloc[:5] db.describe_table(library
name | nullable | type | comment | |
---|---|---|---|---|
0 | gvkey | True | VARCHAR(6) | None |
1 | datadate | True | DATE | None |
2 | fyear | True | DOUBLE_PRECISION | None |
3 | indfmt | True | VARCHAR(12) | None |
4 | consol | True | VARCHAR(2) | None |
='ff',
db.get_table(library='factors_monthly',
table=['date','mktrf','smb','hml','rf'],
columns=5) obs
date | mktrf | smb | hml | rf | |
---|---|---|---|---|---|
0 | 1926-07-01 | 0.029600 | -0.025600 | -0.024300 | 0.00220 |
1 | 1926-08-01 | 0.026400 | -0.011700 | 0.038200 | 0.00250 |
2 | 1926-09-01 | 0.003600 | -0.014000 | 0.001300 | 0.00230 |
3 | 1926-10-01 | -0.032400 | -0.000900 | 0.007000 | 0.00320 |
4 | 1926-11-01 | 0.025300 | -0.001000 | -0.005100 | 0.00310 |
db.close()
validate_dates
validate_dates (date_strings:List[str])