python
import pandas as pd
說明:
引入pandas模組,本日課程所有範例都需要引入
s = pd.Series([13, 30, 77, 6, 111, 10])
print(s)
names = ['Aaron', 'Amber', 'Andy', 'Apple']
no = [32, 12, 42, 'A']
s = pd.Series(names, index=no)
print(s)
print(s[12]) # 取出Amber
print(s[ [32, 42] ])
print(s.index) # Series裡面的全部索引
print(s.values) # Series理面全部的值
scores = [23, 45, 67, 34, 98]
newScore = [s + 10 for s in scores] # for comperhension
print(newScore)
s = pd.Series(scores)
print(s + 10)
print((s - 1) * 2)
data = {
'學號': [1, 2, 3, 4, 5],
'姓名': ['Aaron', 'Amber', 'Abner', 'Apple', 'Astrid'],
'國文': [100, 90, 80, 70, 60],
'英文': [90, 80, 70, 60, 50],
'數學': [100, 95, 85, 75, 65]
}
df = pd.DataFrame(data)
print(df)
index = ['0101', '0102', '0103', '0104', '0105'] # 自訂索引
df = pd.DataFrame(data, index=index)
print(df)
print(df.T)
df1 = pd.read_csv('Fstdata.csv', encoding='utf-8') # 讀入csv檔並產生DataFrame物件
print(df1)
df1.to_excel('Fstdata.xlsx')
df2 = pd.read_excel('Fstdata.xlsx')
print(df2)
for index, row in df2.iterrows():
print(index, row)
print(df2.head(1)) # 拿第一筆資料
print(df2.tail(1)) # 拿最後一筆資料
df2.info()
print(len(df2)) # 查資料筆數
print(df2.shape) # 看輪廓
print(s.shape) # Series也可以看輪廓(這行要配合一開始的Series建立才能執行)
data = {
'學號': [1, 2, 3, 4, 5],
'姓名': ['Aaron', 'Amber', 'Abner', 'Apple', 'Astrid'],
'國文': [100, 90, 80, 70, 60],
'英文': [90, 80, 70, 60, 50],
'數學': [100, 95, 85, 75, 65]
}
index = ['A', 'B', 'C', 'D', 'E']
df = pd.DataFrame(data, index=index)
print(df)
print('-----------------------------')
print(df['姓名'].head(2))
print(df.姓名.head(2))
print('-----------------------------')
print( df[ ['姓名', '學號'] ].head(2) ) # 取得姓名和學號的前兩筆資料
print('-----------------------------')
print(df[1:2])
print(df['B':'C'])
print(df.loc['A'])
print(df.loc[:, '姓名'])
print(df.loc['B':'D', ['學號','姓名'] ])
print('-----------------------------')
print(df.loc['E', '數學'])
print('-----------------------------')
print(df.loc['D':'E', ['國文', '英文']])
另有iloc可以用索引來定位,請參考講義
data = {
'學號': [1, 2, 3, 4, 5],
'姓名': ['Aaron', 'Amber', 'Abner', 'Apple', 'Astrid'],
'國文': [100, 90, 80, 70, 60],
'英文': [90, 80, 70, 60, 50],
'數學': [100, 95, 85, 75, 65]
}
index = ['A', 'B', 'C', 'D', 'E']
df = pd.DataFrame(data, index=index)
print(df)
print(df[ df.國文 >= 80 ]) # 依條件取得資料
print( df[ (df.國文 >= 60) & (df.數學 >= 60) & (df.英文 >= 60)] )
取得人生藥局的快篩剩餘量(與第一週的範例比較)
df1 = pd.read_csv('Fstdata.csv', encoding='utf-8')
df1.info()
df2 = df1[ df1.醫事機構名稱.str.contains('人生') ] # 過濾人生藥局,過完後會拿到新的DataFrame
print(df2.iloc[:, [1, 7]])
df1.set_index('快篩試劑截至目前結餘存貨數量')
df1.sort_values(['快篩試劑截至目前結餘存貨數量'], ascending=False, inplace=True)
df_top_10 = df1.head(10) # 取得前10明快篩剩餘最多的藥局
print(df_top_10.iloc[:, [1, 7]])
df1.set_index('快篩試劑截至目前結餘存貨數量', inplace=True)
df1.sort_values(['快篩試劑截至目前結餘存貨數量'], ascending=False, inplace=True)
df_top_10 = df1.head(10) # 取得前10明快篩剩餘最多的藥局
print(df_top_10.iloc[:, [1, 7]])
data = {
'日期': ['2021-01-01', '2021-01-02', '2021-01-01', '2021-01-03', '2021-01-01', '2021-01-02', '2021-01-03'],
'交通': [100, 50, 60, 899, 32, 97, 21],
'旅遊': [102, 30, 160, 89, 321, 197, 221],
'其他': ['cc', 'ee', '160', '89', '321', '197', '221']
}
df = pd.DataFrame(data)
df_sum = df.groupby('日期')[['交通', '旅遊']].sum()
print(df_sum)
def to_japan(val):
return int(val / 3) # 將資料除以三後去掉小數點
s_result = df['旅遊'].apply(to_japan)
print((s_result))
df['旅遊'] = list(s_result) # 將處理後的新址料存回原DataFrame
print(df)
data = {
'學號': [1, 2, 3, 4, 5],
'姓名': ['Aaron', 'Amber', 'Abner', 'Apple', 'Astrid'],
'國文': [100, 90, 80, 70, 60],
'英文': [90, 80, 70, 60, 50],
'數學': [100, 95, 85, 75, 65]
}
df = pd.DataFrame(data)
print(df)
df = df.drop([1, 4]) # 刪除第1, 4列
print(df)
df = df.drop(['國文'], axis=1) # axis=1 刪除欄
print(df)
df.iloc[0,1] = 'Aaron Ho'
print(df)
df['哈囉'] = [1,2,3]
print(df)
or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up