# panda data analyze
## Series
```python=
import pandas # Usually, we will set pandas as pd
# create column
column = pandas.Series([12,32,43,12])
print(column)
print("print Max",column.max())
print("print min",column .min())
print("print media",column.median())
print("print arerage",column.mean())
column*=2 #the column multiple 2
print(column)
#
column=column==24
print(column)
```
## Dataframe
```python=
Dim2Data=pandas.DataFrame(
{
"name":["jacky","mandy","meowhecker"],
"salary":[999999999,12,9999999999999999999999999999],
"age":[20,40,23]
},index={"a","b","c"} #set up table index
)
print(Dim2Data)
print("=======================================")
#basis information
print(Dim2Data.iloc[2]) #iloc=I location -> using list show the row , sequence
print("data_type"+"(rows,columns)",Dim2Data.shape)
print("data index",Dim2Data.index)
print("======================================")
print(Dim2Data.loc["c"],sep="\n") # lic -> indext
```
```python=
# calculate the average of salarys
columnSalary = Dim2Data["salary"]
'''
total = columnSalary[0]+columnSalary[1]
print(total)
'''
print(columnSalary.mean()) # mean get average
```
## Create new columns
```python=
import pandas as pd
Dim2Data=pandas.DataFrame(
{
"name":["jacky","mandy","meowhecker"],
"salary":[9921319,23213421,99999],
"age":[20,40,23]
}, index=range(3) #set up table index
)
Dim2Data["rank"] = pandas.Series([3,2,1]) # 正式寫法
Dim2Data["test"] = [1,2,3] # 偷懶寫法
print(Dim2Data)
```
# Fillter data
```python=
import pandas
testColumn = pandas.Series([54,23,90])
#filter value
condition = testColumn>30
print("filterCondition",condition,sep="\n")
print("=================================================")
filterTestColumn = testColumn[condition]
print(filterTestColumn,sep="\n")
print("=================================================")
testColumn2 = pandas.Series(["智晟王者","家偉盜賊","冠霖盜賊"])
print(testColumn2)
print("===================================================")
print("display what you want to show")
stringCondition= testColumn2.str.contains("王者") #options #stringCondition = [True,False,True,]
testColumn2 = testColumn2[stringCondition]
print(testColumn2)
print("==================================================")
studentTable = pandas.DataFrame(
{
"name":["智晟","家偉","冠霖","佑豪"],
"score":[100,80,75,34,]
}
)
print(studentTable)
print("=================================================")
print("pass > 60")
condition = studentTable["score"] >= 60 #condition = [True,False,False,True]
print(studentTable[condition])
print("==================================================")
print("取得智晟的成績")
condition = studentTable["name"] == "智晟"
print(studentTable[condition])
```
# Analyze Data
resource file : by 澎澎 youtuber video
```python=
# Analyze data
import pandas
#to read the data and to ransport csv into dataFrame
dataTable = pandas.read_csv("googleplaystore.csv")
print(dataTable)
print("==================================================")
print("trying to get information")
print("table 大概的形狀", dataTable.shape)
print("tuble columns",dataTable.columns)
print("==================================================")
print("Showing data that is we want to know.")
print(dataTable["Rating"])
rating = dataTable["Rating"]
print("Average of Rating", rating.mean())
print("MedianNumber of Rating", rating.median())
print("前一百名 rating 平均",rating.nlargest(100).mean())
print("===================================================")
print("找出奇怪的數值")
condictionFindOver5 = rating >5
print(dataTable[condictionFindOver5])
print("===================================================")
print("Exclude the odd data")
conditionExcludeOver5 = rating <=5
exData = dataTable[conditionExcludeOver5]
print(exData)
print("前一百名 rating 平均",exData["Rating"].nlargest(100).mean())
print("====================================================")
print("to Analye Install")
print(dataTable.columns)
print(dataTable["Installs"])
print("=======================================================")
print("filter the odd data ")
# We could know odd charator (+ and ,)
dataTable["Installs"] = pandas.to_numeric(dataTable["Installs"].str.replace("[+,]","").replace("Free",""))
#print(dataTable["Installs"][10472]) # Fucking Free
print("Average of install is ", dataTable["Installs"].mean())
over100000Condition = dataTable["Installs"]> 100000
print("Over 100000 installed ",dataTable[over100000Condition].shape)
print("========================================================")
print("Using keywords search the APP")
keywords = input("Enter the keywords")
conditionKeyword = dataTable["App"].str.contains(keywords,case= False) # contains(variable, ignore 大小寫)
print("The result of a searching is:", dataTable[conditionKeyword])
```