###### tags: `python`
# Use publicly available data
## Use data from a GitHub repository
### Import multiple CSV files from a GitHub repository using a range of dates
```python!=
'''
Purpose:
(1) Import CSV files from a repository https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports where CSV files are uploaded daily from 01-22-2020 and consistently named as mm-dd-yyyy.csv
Note: column names changed from 03-22-2020 and onward
How to run this file in ipython?
os.chdir(folder_path_script)
%run ./import-daily-files_GitHub-COVID-19-csse-covid-19-daily-reports-CSV.py
'''
import requests
import shutil
import datetime
import os
import pandas as pd
folder_path_script='D:/googleDrive/python/scripts/'
# Column names changed from columns_to_read_1 to columns_to_read_1 on 03-22-2020.csv
columns_to_read_1=['Province/State', 'Country/Region', 'Last Update', 'Confirmed','Deaths', 'Recovered']
columns_to_read_2=['Province_State', 'Country_Region', 'Last_Update', 'Confirmed','Deaths', 'Recovered']
# Create date ranges similar to the CSV files
dates=[datetime.date(2020,1,22)+datetime.timedelta(dval) for dval in range(0,366)];
date_column_names_changed=datetime.date(2020,3,22)
# Create an empty dataframe for appending imported CSV files
df_base=pd.DataFrame(columns=columns_to_read_2)
# Loop thru each CSV file URL
for dateval in dates:
filepath='https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/'+ dateval.strftime('%m-%d-%Y')+'.csv'
r= requests.get(filepath)
if r.status_code == 200:
if dateval < date_column_names_changed:
df_temp=pd.read_csv(filepath,usecols=columns_to_read_1)
df_temp.columns=columns_to_read_2
else:
df_temp=pd.read_csv(filepath,usecols=columns_to_read_2)
df_base=pd.concat([df_temp,df_base],ignore_index=True)
```
---
### Download CSV files (not working, downloaded CSV files are empty)
```python!
'''
Purpose:
(1) Download CSV files from a website https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports.
CSV files are uploaded daily and consistently named as mm-dd-yyyy.csv
How to run this file in ipython?
%run D:/googleDrive/python/scripts/download-daily-files_GitHub-COVID-19-csse-covid-19-daily-reports-CSV.py
'''
# --------------------
import requests
import shutil
import datetime
import os
folder_path_script='D:/googleDrive/python/scripts/'
folder_path_download='D:/download/GitHub/CSSEGISandData/COVID-19/csse-covid-19-daily-reports-CSV/'
# os.makedirs('dir-to-create',exist_ok=True) enables the mkdir -p functionality [mkdir -p functionality in Python [duplicate]](https://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python)
os.makedirs(folder_path_download, exist_ok=True)
# -----------------------------------------------------------------------------------
dates=[datetime.datetime(2020,1,22)+datetime.timedelta(dval) for dval in range(0,366)];
# -----------------------------------------------------------------------------------
# Download daily added CSV files from https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports
for dateval in dates:
r= requests.get('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/'+ dateval.strftime('%m-%d-%Y')+'.csv')
if r.status_code == 200:
with open(dateval.strftime('%m-%d-%Y')+".csv", 'wb') as f:
r.raw.decode_content = True
# shutil.copyfileobj(fsrc, fdst[, length])
## fsrc: A file-like object representing the source file to be copied
## fdst: A file-like object representing the destination file, where fsrc will be copied.
os.chdir(folder_path_download)
shutil.copyfileobj(r.raw, f)
# ---------------------------------------------------------------------------------
```
---
### Import a CSV file into a DataFrame (working)
```python!
import pandas as pd
url= 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-22-2020.csv'
df= pd.read_csv(url)
```
---