## 修改時間,INSERT到SQL
```python
import psycopg2
import pandas as pd
conn = psycopg2.connect(database="mydb", user="test01",
password="testpw", host="localhost",
port="5432")
print(conn.closed)
## 允許python執行postgresql語法
with conn.cursor() as cur:
## 觀察查詢筆數
print(cur.rowcount)
## SQL語法
sql = "select * from public.salary"
## 執行sql語法
cur.execute(sql)
## 取得欄位名稱
name = [desc[0] for desc in cur.description]
## fetchall是將所有收尋的資料都寫入 rows 中
# rows = pd.DataFrame(cur.fetchall())
rows = pd.DataFrame(cur.fetchall(),columns=name)
## 取出一筆資料
# rows = pd.DataFrame(cur.fetchone())
## 自行設定要幾筆資料
# rows = pd.DataFrame(cur.fetchmany(10))
## 觀察查詢筆數
print(cur.rowcount)
print(rows.shape)
rows.head()
```
Pandas 主要是讀取、轉換和處理資料,使用表格的方式呈現給使用者觀看。Series與DataFrame是主要的資料結構,分成一維與二維資料。
```python
dfs = pd.read_html('https://www.ubus.com.tw/Booking/FareInquiry')
dfs[0]
```
```python
rows.tail(3)
```
```python
rows.index
```
```python
rows.columns
```
```python
rows.to_numpy()
```
```python
rows.to_numpy()[0]
```
```python
rows.describe()
```
```python
rows["salary_1"].describe()
rows.salary_1.describe()
```
```python
rows.describe(include='all')
```
## 因為裡面只有check_n在設計時,是屬於數值資料,其他都是類別資料或者時間資料
```python
rows.info()
```
```python
pd.to_numeric(rows.salary_1)
```
```python
rows.salary_1
```
```python
rows1 = rows
rows1["salary_1_num"] = pd.to_numeric(rows.salary_1)
rows1.head()
```
```python
rows1.info()
```
```python
rows.describe()
```
```python
## 轉換時間資料
rows.clock_in[0]
```
```python
d = pd.Timestamp(rows.clock_in[0], unit='s')
d
```
```python
print(d.time())
```
## 功課,修改格式
pd.Timestamp(rows.clock_in[10], unit='s')
rows.clock_in[10]
```python
rows.clock_in[10]
```
```python
## 字串切割
rows.clock_in.str.split(r"\:")
```
```python
## 列出一行,切割訊號
r = [rows.clock_in.str.split(r"\:")][0][10]
print(r)
for n in range(3):
if len(r[n]) <2:
r[n] = str(0)+r[n]
print(r)
':'.join(r)
```
```python
pd.Timestamp(':'.join(r))
```
```python
rows1.iloc[0]
```
```python
## 計算最大值
rows1.columns
```
```python
#5之後,9以前,不包含9
rows1.columns[5:9]
```
```python
## 取出欄位資料
rows1[rows1.columns[5:9]]
```
```python
## 刪除資料 欄位
rows1[rows1.columns[5:9]].drop("salary_1",axis=1)
```
```python
## 刪除資料 列位
rows1[rows1.columns[5:9]].drop(0,axis=0)
```
```python
## axis : 0 is by row, 1 by is column
rows1[rows1.columns[5:9]].max(axis=1)
```
```python
rows1.head()
```
```python
rows1.to_csv("new.csv", header=False,index = False)
```
```python
## 存入資料庫
import psycopg2
conn = psycopg2.connect(database="mydb", user="test01",
password="testpw", host="localhost",
port="5432")
print(conn.closed)
## 允許python執行postgresql語法
with conn.cursor() as cur:
cur.execute("DROP TABLE IF EXISTS public.new_salary")
cur.execute("CREATE TABLE public.new_salary( \
check_n SERIAL PRIMARY KEY, \
id SERIAL, \
date DATE NOT NULL, \
clock_in VARCHAR (10) NOT NULL, \
clock_out VARCHAR (10) NOT NULL, \
salary_1 VARCHAR (11) NOT NULL, \
salary_2 VARCHAR (11) NOT NULL, \
salary_3 VARCHAR (11) NOT NULL, \
salary_4 VARCHAR (11) NOT NULL, \
salary_1_num BIGINT NOT NULL)")
f = open("./new.csv", 'r')
cur.copy_from(f, "new_salary", columns=rows1.columns, sep=",")
print("copy end")
conn.commit()
print("end")
```
```python
import psycopg2
conn = psycopg2.connect(database="mydb", user="test01",
password="testpw", host="localhost",
port="5432")
## 允許python執行postgresql語法
with conn.cursor() as cur:
## 觀察查詢筆數
print(cur.rowcount)
## SQL語法
sql = "select * from public.new_salary"
## 執行sql語法
cur.execute(sql)
## 取得欄位名稱
name = [desc[0] for desc in cur.description]
## fetchall是將所有收尋的資料都寫入 rows 中
# rows = pd.DataFrame(cur.fetchall())
rows2 = pd.DataFrame(cur.fetchall(),columns=name)
## 取出一筆資料
# rows = pd.DataFrame(cur.fetchone())
## 自行設定要幾筆資料
# rows = pd.DataFrame(cur.fetchmany(10))
## 觀察查詢筆數
print(cur.rowcount)
print(rows2.shape)
rows2.head()
```
# 功課
## 一、輸出完整時間格式,移除不尋常的時間,取代表內格式
```python
import psycopg2
import pandas as pd
conn = psycopg2.connect(database="mydb", user="test01",
password="testpw", host="localhost",
port="5432")
print(conn.closed)
## 允許python執行postgresql語法
with conn.cursor() as cur:
## 觀察查詢筆數
print(cur.rowcount)
## SQL語法
sql = "select * from public.salary"
## 執行sql語法
cur.execute(sql)
## 取得欄位名稱
name = [desc[0] for desc in cur.description]
## fetchall是將所有收尋的資料都寫入 rows 中
# rows = pd.DataFrame(cur.fetchall())
rows = pd.DataFrame(cur.fetchall(),columns=name)
## 取出一筆資料
# rows = pd.DataFrame(cur.fetchone())
## 自行設定要幾筆資料
# rows = pd.DataFrame(cur.fetchmany(10))
## 觀察查詢筆數
print(cur.rowcount)
print(rows.shape)
rows.head()
```
```python
## 時間需要xx:xx:xx
## 不是24進位或者60進位
i = []
for n in [rows.clock_in.str.split(r"\:")][0]:
limit = True
print(n)
## n要是字串
for m in range(3):
if len(n[m])<2:
n[m] = str(0)+n[m]
if int(n[m]) == 60:
limit = False
# x = "00:00:00"
if limit == True:
':'.join(n)
i.append(pd.Timestamp(':'.join(n)).time())
else:
i.append(pd.Timestamp("00:00:00").time())
i
```
```python
o = []
for n in [rows.clock_out.str.split(r"\:")][0]:
limit = True
print(n)
for m in range(3):
if len(n[m])<2:
n[m] = str(0)+n[m]
if int(n[m]) == 60 or int(n[0]) == 24:
limit = False
# x = "00:00:00"
if limit == True:
':'.join(n)
o.append(pd.Timestamp(':'.join(n)).time())
else:
o.append(pd.Timestamp("00:00:00").time())
```
```python
rows2 = []
rows2 = rows
rows2.clock_in = i
rows2.clock_out = o
rows2.head()
```
## 二、將薪水1~4改成數值資料,並且計算出最大值,寫到新的欄位
```python
rows2["salary"] = pd.to_numeric(rows2[rows2.columns[5:9]].T.max()).astype(int)
rows2 = rows2.drop(rows2.columns[5:9],axis=1)
rows2.head()
```
```python
rows2.info()
```
## 三、以上完成,將新表格存入psql資料庫
### 表格內容:check_n, id, date, clock_in, clock_out, maxsalary
#### 這部分沒有完成下禮拜作業會無法完成
```python
rows2.to_csv("new.csv", header=False,index = False)
## 存入資料庫
import psycopg2
conn = psycopg2.connect(database="mydb", user="test01",
password="testpw", host="localhost",
port="5432")
print(conn.closed)
## 允許python執行postgresql語法
with conn.cursor() as cur:
cur.execute("DROP TABLE IF EXISTS public.salary_max")
cur.execute("CREATE TABLE public.salary_max( \
check_n SERIAL PRIMARY KEY, \
id SERIAL, \
date DATE NOT NULL, \
clock_in time NOT NULL, \
clock_out time NOT NULL, \
salary BIGINT NOT NULL)")
f = open("./new.csv", 'r')
cur.copy_from(f, "salary_max", columns=rows2.columns, sep=",")
cur.execute("GRANT ALL ON TABLE new_salary to test01")
print("copy end")
conn.commit()
print("end")
```
```python
print(conn.closed)
## 允許python執行postgresql語法
with conn.cursor() as cur:
## 觀察查詢筆數
print(cur.rowcount)
## SQL語法
sql = "select * from public.salary_max"
## 執行sql語法
cur.execute(sql)
## 取得欄位名稱
name = [desc[0] for desc in cur.description]
## fetchall是將所有收尋的資料都寫入 rows 中
# rows = pd.DataFrame(cur.fetchall())
rows2 = pd.DataFrame(cur.fetchall(),columns=name)
## 取出一筆資料
# rows = pd.DataFrame(cur.fetchone())
## 自行設定要幾筆資料
# rows = pd.DataFrame(cur.fetchmany(10))
## 觀察查詢筆數
print(cur.rowcount)
print(rows2.shape)
rows2.head()
```
```python
```