---
title: Python
tags: note
---

[TOC]
# terminology
* pyautogui
* reCAPTCHA
# subprocess
```python
import subprocess
out_bytes = subprocess.check_output(['netstat','-a'])
subprocess.check_output('echo Hello World', shell=True)
subprocess.call('echo Hello World', shell=True)
```
# logging
message level:
DEBUG -> INFO -> WARNING -> ERROR -> CRITICAL
Default: 只有 Warring 以上會被打在 Console 上
* 只輸出在 Console 上
```
import logging
logging.warning('Watch out!') # will print a message to the console
logging.info('I told you so') # will not print anything
```
* 同時輸出在 Console 和 logfile 上
```python
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
ch.setFormatter(formatter)
fh = logging.FileHandler("test.log")
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
logger.addHandler(ch)
logger.addHandler(fh)
if __name__ == "__main__":
logging.debug('debug')
logging.info('info')
logging.warning('warning')
logging.error('error')
logging.critical('critical')
```
# module 相關
* \_\_init\_\_.py
> 在 import module 時會執行的程式
```python
└── mypackage
├── subpackage_1
│ ├── test11.py
│ └── test12.py
├── subpackage_2
│ ├── test21.py
│ └── test22.py
└── subpackage_3
├── test31.py
└── test32.py
```
```python=
# example of __init__.py
import mypackage
from mypackage.subpackage_1 import test11
from mypackage.subpackage_1 import test12
from mypackage.subpackage_2 import test21
from mypackage.subpackage_2 import test22
from mypackage.subpackage_3 import test31
from mypackage.subpackage_3 import test32
```
* \_\_init\_\_.py 實現 `from mypackage import *`
```python=
# 修改 __all__
__all__ = ['subpackage_1', 'subpackage_2']
```
# Type
* bin
```python
bin(123) # '0b11'
```
# math
* dist
```python
math.dist([3], [4])
```
* inf
```python
math.inf
```
# networkx
[:link:][networkx-type]
[networkx-type]: https://hackmd.io/H1ss3XhzRBOPteU3WbgFjQ
# function
* args
```python
def test_var_args(f_arg, *args):
print("first normal arg:", f_arg)
for arg in argv:
print("another arg through *args:", arg)
test_var_args('yasoob', 'python', 'eggs', 'test')
```
* kwargs
```python
def myFun(arg1, **kwargs):
for key, value in kwargs.items():
print ("%s == %s" %(key, value))
myFun("Hi", first ='Geeks', mid ='for', last='Geeks')
# last == Geeks
# mid == for
# first == Geeks
```
```python
def dog_bark():
print("Bark !!!")
```
* name of function
```python
dog_bark.__name__
```
* funciotn 也可以做傳遞
```python
def x():
print(20)
y = x
y()
```
* decorder
> 在執行前先做事?
```python
def print_func_name(func):
def warp_1():
print("Now use function '{}'".format(func.__name__))
func()
return warp_1
@print_func_name
def dog_bark():
print("Bark !!!")
```
# whos
> read all variable
# opencc
> 繁體轉簡體
```python=
from opencc import OpenCC
cc = OpenCC('t2s')
text = '投票當天需攜帶投票通知單、國民身分證及印章,若沒有收到投票通知書,可以向戶籍所在地鄰長查詢投票所,印章則是可以用簽名代替,至於身分證則是一定要攜帶。'
print(cc.convert(text))
```
# jieba
```python=
import jieba
documents = ['我来自北京清华大学', '我喜欢写程式', '每天发技术文章']
jieba.cut(text, cut_all=False, HMM=True)
for sentence in documents:
seg_list = jieba.cut(sentence)
print('/'.join(seg_list))
```
# copy
* deepcopy
```python
import copy
a = [1, [2,3]]
a_deepcopy = copy.deepcopy(a)
```
# sys
* sys.path
> import path
# None
* 查詢變數是否為None
```python
variable is None
```
# Selenium
* 載入瀏覽器
```python
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument("--disable-notifications")
chrome = webdriver.Chrome('./chromedriver', chrome_options=options)
browser = webdriver.Firefox(options = opts, executable_path="./geckodriver")
# chromedriver 要自己下載歐
```
* 用瀏覽器進入網頁
```python
URL = "http://server-a1.ddns.net:5153/"
chrome.get(URL)
```
* 選取網頁元素
```python
chrome.find_element_by_id("seed") # by id
chrome.find_element_by_xpath(full XPath) # by xpath
chrome.find_element_by_id("seed").text # get text
```
* send keys
```python
seed = chrome.find_element_by_id("seed")
seed.send_keys(Keys.CONTROL, 'a')
seed.send_keys(Keys.BACKSPACE)
seed.send_keys(Keys.LEFT)
seed.click() #點觸
```
* 其他
```python
chrome.refresh() # 重新整理
chrome.back() # 回到上一頁
chrome.clost() # 關閉瀏覽器
```
# Exception handling
* assert
```python
assert os.path.exists("./ckiptagger_data"), "ckiptagger_data 不在同層目錄"
```
* try except
```python
a = 22
b = 33
try:
if a < b:
print(n)
except:
print("except")
```
# list
* spefic 2d
```python
def get_matrix(A, r_s, r_e, c_s, c_e):
buf = [ v_i for i, v_i in enumerate(A) if r_s <= i < r_e]
buf = [v_i[c_s:c_e] for i, v_i in enumerate(buf)]
return(buf)
```
* count
```python
.count()
```
* sort
```python
.sort()
```
* 添加元素 append
```python
list_ = [] ## 空列表
list_.append('Google') ## 使用 append() 添加元素
list_.append('Runoob')
```
* 添加元素 extend
```python
bad_1 = ['Bad', 'Smooth Criminal','Speed Demon']
bad_2 = ['Man in the Mirror', 'Dirty Diana']
bad_1.extend(bad_2)
print(bad_1)
```
* 將list的element 從 str 轉到 float
```python
list(map(float, mylist))
```
* nested list to 1d list
```python
from itertools import chain
buf = list(chain.from_iterable(buf))
```
* split string to list
```python
s = "abcabcbb"
s.split("a")
# ['', 'bc', 'bcbb']
```
* pop element
```python
list1 = ['Google', 'Runoob', 'Taobao']
list_pop=list1.pop(1)
```
* 取前三項並並名
```python
a, b, c = data[:3]
```
* Create empty list
```python
lst = [None] * 10
```
# defaultdict
```python=
from collections import defaultdict
def zero():
return 0
counter_dict = defaultdict(zero) # default值以一個zero()方法產生
a_list = ['a','b','x','a','a','b','z']
for element in a_list:
counter_dict[element] += 1
print(counter_dict)
```
# IPython
* 消除輸出
```python
from IPython.display import clear_output
clear_output()
```
# Crawler
[:link:][Crawer-type]
[Crawer-type]: https://hackmd.io/bBRnnn1cQlCtW4y1Do0_2A?both
# GloVe
```python
embeddings_dict = {}
with open("glove.6B.50d.txt", 'r') as f:
for line in f:
values = line.split()
word = values[0]
vector = np.asarray(values[1:], "float32")
embeddings_dict[word] = vector
```
# warnings
* no warning
```python
import warnings
warnings.filterwarnings('ignore')
```
# ord()
> ASCII 對應的數值
```python
ord('a') # 97
chr(97) # a
```
# dictionary operation
* create dictionary
```python
phone_dict = {"Kim":"123", "Tom":"345"}
# from list
dishes = ["pizza", "sauerkraut", "paella", "Hamburger"]
countries = ["Italy", "Germany", "Spain", "USA"]
country_specialities_dict = dict(zip(countries, dishes))
```
* check if a key exists
```python
'Tom' in phone_dict
```
* Add / Update / Remove Key‐Value pairs
```python
data = {} # start with an empty dictionary
data['Joe'] = 181
data['Alice'] = 159
data['Sue'] = 165
print(data) # {'Sue': 165, 'Joe': 181, 'Alice': 159}
del data['Joe'] # remove a key‐value pair by del statement
data['Alice'] = 163
```
* access item
```python
phone_dict = {"Kim":"123", "Tom":"345"}
data.get('Sue') # return None
data.get('Kim')
```
* get all key/ get all value
```python
list(data.keys())
list(data.values())
```
* update
```python
f1 = {'apples': 1, 'oranges': 3, 'pears': 2}
f2 = {'pears': 4, 'grapes': 5}
f1.update(f2)
```
* change key name
```python
a_dict = {"a": 1, "B": 2, "C": 3}
new_key = "A"
old_key = "a"
a_dict[new_key] = a_dict.pop(old_key)
```
# set operation
* s.issubset(t)
* set difference
```python
set(A)-set(B)
```
* intersection
```python
set(a) & set(b)
```
* union
```python
set(a) | set(b)
```
# string opertaion
* Formatting string
```python
# % [flag][width][.][precision]typecode
# [flag]: +:顯示(+/-) -:靠左 0: 留0
x = 1234
myformat = "integers: %d | %‐6d | %06d"
print(myformat % (x, x, x))
```
* duplication
```python
start = "Na" * 4
```
* replace
```python
str = "this is string example....wow!!! this is really string";
print str.replace("is", "was")
```
* list to string
```python
"".join(["a", "b", "c"])
```
* string to list
```python
list('abc')
```
* contain
```python
"data" in "asdfasdfdata"
```
* 刪除後面空白
```python
" xyz ".rstrip()
```
* 刪除前面空白
```python
str2 = " Runoob "
str2.strip()
```
* split
```python=
s = "1 2 3456"
s.split(" ")
```
* endswutg
```python=
s = "1 2 3456"
s.endswith("456")
```
# numpy
[:link:][numpy-type]
[numpy-type]: https://hackmd.io/4_ij3QgtTci1vXN9plcnRw
# input type
[:link:][input-type]
[input-type]: https://docs.python.org/3/library/typing.html
# argparse
```python
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--num", default = 50, type=int)
parser.add_argument("--str", default = "yes", type=str)
# other parameter: required=True
args = parser.parse_args()
args = parser.parse_args(args=[]) # for Jupyter Notebook
print("--num: %d " % args.num)
print("--str: %s " % args.str)
# 可以用這招 一勞永逸
if "__file__" in dir():
args = parser.parse_args()
else:
args = parser.parse_args([])
```


# NLTK
[:link:][nltk-Sync]
[nltk-Sync]: https://hackmd.io/LltMMZl9RlGCeWAm8JMzLw
# package
## upgrade module
```
!pip install --upgrade matplotlib
```
# txt I/O
* write
```python
f = open('test.txt', "w") # overwrite
f.write("add one line\n")
f.close()
```

* read txt file to list
```python
f = open(file_path, 'r+')
# f = open(file_path, 'r+', encoding="utf-8")
buf = f.read().splitlines()
f.close()
```
# pytorch
[:link:][pytorch-Sync]
[pytorch-Sync]: https://hackmd.io/j5tOstOkRny46AFIYUNfjg
# tqdm
* for .py
```python
from tqdm import tqdm
```
* for notebook
```python
from tqdm.notebook import tqdm
```
# plot
[:link:][plot-Sync]
[plot-Sync]: https://hackmd.io/6LdE25KjSO-8qhpAIOQwKg
# namedtuple
```python
from collections import namedtuple
# define class
Identity = namedtuple('Identity', ['first_name','last_name','birthday'])
# create class
identity = Identity('Sam','Lee','4/2')
# get element
identity.birthday # '4/2'
identity.first_name # 'Sam
# replace
identity = identity._replace(birthday='4/3')
```
# multiprocessing
```python
import multiprocessing as mp
import time
def job(x):
print(x)
time.sleep(5)
return x*x
pool = mp.Pool()
# res = pool.map(job, range(100))
def multicore():
pool = mp.Pool()
res = pool.map(job, range(100))
print(res)
if __name__ == '__main__':
multicore()
```
# colab
* upload file
```python
from google.colab import files
uploaded = files.upload()
```
# dictionary
## get item by key
```python
dict[keyname]
```
## create dictionary
```python
char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']
num_dic = {n: i for i, n in enumerate(char_arr)}
```
```python
keys = ['a', 'b', 'c']
values = [1, 2, 3]
dictionary = dict(zip(keys, values))
```
# gym
```python
import gym
from IPython import display
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
env = gym.make('CartPole-v0')
env.reset()
img = plt.imshow(env.render(mode='rgb_array')) # only call this once
for _ in range(100):
img.set_data(env.render(mode='rgb_array')) # just update the data
display.display(plt.gcf())
display.clear_output(wait=True)
action = env.action_space.sample()
env.step(action)
```
# time
```python
import time
tStart = time.time()#計時開始
#模擬要測量的function
time.sleep(2)
print "abc"
for x in range(1000):
x += 1
print x
#end of 模擬要測量的function
tEnd = time.time()#計時結束
#列印結果
print "It cost %f sec" % (tEnd - tStart)#會自動做近位
```
## datetime
```python
import datetime
print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
```
# iterools
```python
import itertools
product('ABCD', repeat=2) # 重複排列
product([1,2,3],[3,4])
permutations('ABCD', 2) # 不重複排列
combinations('ABCD', 2) # 不重複組合
combinations_with_replacement('ABCD', 2) # 可重複組合
```
## count
```python
for t in count():
print(t)
if(t >= 15):
break
```
# for loop
* enumerate
```python
for num, line in enumerate(lines):
print("{0:03d}: {}".format(num, line))
```
* range
```python
for i in range(15):
```
* dictionary
```python
for key, value in a_dict.items():
```
# class
* 基礎用法
```python
class test:
def __init__(self):
# class 的 Constructor
def __del__(self):
# class 的 Destructor
def __len__(self):
return(5)
def __call__(self):
return(you call a function)
def __repr__(self):
return("5") # return type 需要為 string
# private member
__alive = True
test2 = test()
len(test2)
```
* Inhertance
```python
class Transportation:
# 駕駛方法
def drive(self):
print("Base class drive method is called.")
# 汽車子類別
class Car(Transportation):
# 駕駛方法
def drive(self):
super().drive()
print("Sub class drive method is called.")
```
* with
使用方式
```python
class a:
def __init__(self):
print("exec __init__")
def __enter__(self):
print("exec __enter__")
def __exit__(self ,type, value, traceback):
print("exec __exit__")
def __del__(self):
print("exec __del__")
with a() as s: #
pass
```
執行結果
```python
# exec __init__
# exec __enter__
# exec __exit__
# exec __del__
```
* @staticmethod
> 希望某個 member independent of instance,不帶instance為參數,就會宣告該 member 為 static ,使 members 間的關係更加乾淨俐落。
```python
class Shiba:
def __init__(self, height, weight):
self.height = height
self.weight = weight
@staticmethod
def pee(length):
print("pee" + "." * length)
# 可以直接做
Shiba.pee()
```
* @property
```
class Bank_acount:
@property
def password(self):
return ‘密碼:123'
# 只能讀
andy = Bank_acount()
print(andy.password)
```
* check variable
```python
classname.__dict__
```
* pickle **save class**
```python
import pickle
class Company(object):
def __init__(self, name, value):
self.name = name
self.value = value
# save
with open('company_data.pkl', 'wb') as output:
company1 = Company('banana', 40)
pickle.dump(company1, output, pickle.HIGHEST_PROTOCOL)
# read
with open('company_data.pkl', 'rb') as input:
company1 = pickle.load(input)
```
# call variable by string
```python
globals()[string]
```
# change type of list
```python
list(map(int, list_))
[ list(map(int, i)) for i in after_encode] # 2d list
```
# collections
[:link:][collection]
[collection]: https://docs.python.org/2/library/collections.html
* Counter
```python
from collections import Counter
c = Counter('abcasd')
c.update('red')
c.most_common()
len(c)
```
* deque
```python
from collections import deque
d = deque('ghi') # deque(['g', 'h', 'i'])
d.append('j') # deque([g', 'h', 'i', 'j'])
d.appendleft('f') # deque(['f', 'g', 'h', 'i', 'j'])
```
* OrderedDict
```python=
from collections import OrderedDict
d1 = OrderedDict()
```
# .py 的開頭
```python
if __name__ == '__main__':
```
# 偵測string 的語言
```python
from textblob import TextBlob
b = TextBlob("bonjour")
b.detect_language()
```
# sklearn
[:link:][sklearn]
[sklearn]: https://hackmd.io/nesOt10mR120KCECtwSk8A
# random
* random shuffle
```python
import random
number_list = [7, 14, 21, 28, 35, 42, 49, 56, 63, 70]
random.shuffle(number_list)
```
* random choices
```python
import random
list_ = [20, 30, 40, 50 ,60]
weights = [0.1, 0.1, 0.1, 0.2, 0.5]
sampling = random.choices(list_, weights = weights)
```
* random choices with weight
```python
import random
list = [20, 30, 40, 50 ,60, 70, 80]
sampling = random.choices(list, k=4)
```
* generate random integer
```python
random.randint(0, 100)
random.randint(0, 1)
```
* random choice can replace
```python
np.random.choice(5, 3, replace=True)
```
# json
## save
* 若要儲存numpy 且裡面有int 要先轉換成一般的int
```python
# write file
import json
with open('savename.txt', 'w') as outfile:
json.dump(savedata, outfile)
```
* 存成中文 要加上utf-8
```python 存成中文
with open(i + ".txt",'w', encoding='utf8') as outfile:
json.dump(savedata, outfile, ensure_ascii=False)
```
* 如果讀進來是 str 則可用以下方式轉成dict
```python
data2 = json.loads(data)
```
## read
```python
# read file
with open('dictonary.json') as json_file:
data = json.load(json_file)
# method 2
[json.loads(line) for line in open('train_gold.json', 'r')]
```
* read function
```python
# read file
def read_data(str_):
path = '%s/%s' % (args.d, str_)
with open(path) as json_file:
buf = json.load(json_file)
if( args.t):
buf = buf[:20]
globals()[str_[:-5]] = buf
print("assign %s" % str_[:-5])
```
# os
```python
import os
os.path.exists(filepath) # 檢查檔案目錄是否存在
path = os.getcwd() #取得目前路徑
os.chdir(path) #改變路徑
os.listdir(path) # 列出folder的全部item
os.mkdir(path) # make dir
os.rename(a, b) # rename
os.path.abspath(os.path.dirname(__file__)) + "/swear_words_chinese.json" # 最保線的 相對路徑
os.system(command) # 執行指令
# Rmk: 若想回到上一層路徑 可用 os.chdir("..")
# Rmk: 相對路徑可用 "./" 表示
```
# re
> Regular expression
[Online regular expression](https://regex101.com/)
```python
import re
matchObj = re.match("(.*)_(.*)_(.*)_(.*).json", str)
```
```python
matchObj = re.match("(.*)(\(˙.*\))(.*)", str_)
if( type(matchObj) == re.Match ):
str_ = matchObj.group(1) + matchObj.group(3)
```
# select list element by bool index
```python
from itertools import compress
list_a = [1, 2, 4, 6]
fil = [True, False, True, False]
list(compress(list_a, fil))
```
# draw correlation map
資料格式:(dataframe)
| A | B | C |
| -------- | -------- | -------- |
| | | |
| | | |
| | | |
| | | |
```python
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
data = {'A': [45,37,42,35,39],
'B': [38,31,26,28,33],
'C': [10,15,17,21,12]
}
df = pd.DataFrame(data,columns=['A','B','C'])
corrMatrix = df.corr()
sns.heatmap(corrMatrix, annot=True)
# plt.savefig('corrMatrix.png') 輸出 #須放在 plt.show 之前
# plt.figure(figsize=(20,20)) 調整大小
plt.show()
```
# select list element by index
```python
test_list = [9, 4, 5, 8, 10, 14]
index_list = [1, 3, 4]
list(map(test_list.__getitem__, index_list)) # [4, 8, 10]
```
# pandas
* read excel
```python
xls = pd.ExcelFile("華語八千詞表20200917.xlsx")
xls.sheet_names
pd.read_excel(xls, '準備級一級')
```
* read csv
```python
pd.read_csv(path)
```
* save
```python
df.to_csv('result.csv', index=False)
df.to_csv(save_path, index=False, encoding="utf_8_sig") # sasve chinese file
```
* make data frame
```python
dict_ = {"標題": [1, 2, 3, 4, 5],
"直覺分數": None,
"誇張表現": None,
"情緒性字眼": None,
"刻意隱藏資訊": None,
"不正式用詞": None,
"不必要的資訊": None,
"附註": None
}
df = pd.DataFrame.from_dict(dict_)
```
* get element
```python
df.iloc[0] # get first row
df[df.columns[0]] # get first column
```
* groupby
```python
sectors = fortune.groupby("Sector")
sectors.get_group("Energy")
```
* round
```python
df.round(1)
```
* read .json file
```python
data = pd.read_json('https://bit.ly/108-nlp-train', lines = True)
```
* column switch
```python
df.reindex(columns=["idx", 'categories', 'reply', 'text'])
```
* sort by one column
```python
data.sort_values(by=['Body ID'])
```
# Sorting list based on values from another list
```python
Y = [ 0, 1, 1, 0, 1, 2, 2, 0, 1]
X = ["a", "b", "c", "d", "e", "f", "g", "h", "i"]
X.sort(key=dict(zip(X, Y)).get)
```