Python - HackMD

--- title: Python tags: note --- ![](https://i.imgur.com/PDPgPS9.png) [TOC] # terminology * pyautogui * reCAPTCHA # subprocess ```python import subprocess out_bytes = subprocess.check_output(['netstat','-a']) subprocess.check_output('echo Hello World', shell=True) subprocess.call('echo Hello World', shell=True) ``` # logging message level: DEBUG -> INFO -> WARNING -> ERROR -> CRITICAL Default: 只有 Warring 以上會被打在 Console 上 * 只輸出在 Console 上 ``` import logging logging.warning('Watch out!') # will print a message to the console logging.info('I told you so') # will not print anything ``` * 同時輸出在 Console 和 logfile 上 ```python import logging logger = logging.getLogger() logger.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) ch.setFormatter(formatter) fh = logging.FileHandler("test.log") fh.setLevel(logging.DEBUG) fh.setFormatter(formatter) logger.addHandler(ch) logger.addHandler(fh) if __name__ == "__main__": logging.debug('debug') logging.info('info') logging.warning('warning') logging.error('error') logging.critical('critical') ``` # module 相關 * \_\_init\_\_.py > 在 import module 時會執行的程式 ```python └── mypackage ├── subpackage_1 │ ├── test11.py │ └── test12.py ├── subpackage_2 │ ├── test21.py │ └── test22.py └── subpackage_3 ├── test31.py └── test32.py ``` ```python= # example of __init__.py import mypackage from mypackage.subpackage_1 import test11 from mypackage.subpackage_1 import test12 from mypackage.subpackage_2 import test21 from mypackage.subpackage_2 import test22 from mypackage.subpackage_3 import test31 from mypackage.subpackage_3 import test32 ``` * \_\_init\_\_.py 實現 `from mypackage import *` ```python= # 修改 __all__ __all__ = ['subpackage_1', 'subpackage_2'] ``` # Type * bin ```python bin(123) # '0b11' ``` # math * dist ```python math.dist([3], [4]) ``` * inf ```python math.inf ``` # networkx [:link:][networkx-type] [networkx-type]: https://hackmd.io/H1ss3XhzRBOPteU3WbgFjQ # function * args ```python def test_var_args(f_arg, *args): print("first normal arg:", f_arg) for arg in argv: print("another arg through *args:", arg) test_var_args('yasoob', 'python', 'eggs', 'test') ``` * kwargs ```python def myFun(arg1, **kwargs): for key, value in kwargs.items(): print ("%s == %s" %(key, value)) myFun("Hi", first ='Geeks', mid ='for', last='Geeks') # last == Geeks # mid == for # first == Geeks ``` ```python def dog_bark(): print("Bark !!!") ``` * name of function ```python dog_bark.__name__ ``` * funciotn 也可以做傳遞 ```python def x(): print(20) y = x y() ``` * decorder > 在執行前先做事? ```python def print_func_name(func): def warp_1(): print("Now use function '{}'".format(func.__name__)) func() return warp_1 @print_func_name def dog_bark(): print("Bark !!!") ``` # whos > read all variable # opencc > 繁體轉簡體 ```python= from opencc import OpenCC cc = OpenCC('t2s') text = '投票當天需攜帶投票通知單、國民身分證及印章，若沒有收到投票通知書，可以向戶籍所在地鄰長查詢投票所，印章則是可以用簽名代替，至於身分證則是一定要攜帶。' print(cc.convert(text)) ``` # jieba ```python= import jieba documents = ['我来自北京清华大学', '我喜欢写程式', '每天发技术文章'] jieba.cut(text, cut_all=False, HMM=True) for sentence in documents: seg_list = jieba.cut(sentence) print('/'.join(seg_list)) ``` # copy * deepcopy ```python import copy a = [1, [2,3]] a_deepcopy = copy.deepcopy(a) ``` # sys * sys.path > import path # None * 查詢變數是否為None ```python variable is None ``` # Selenium * 載入瀏覽器 ```python from selenium import webdriver from selenium.webdriver.chrome.options import Options options = Options() options.add_argument("--disable-notifications") chrome = webdriver.Chrome('./chromedriver', chrome_options=options) browser = webdriver.Firefox(options = opts, executable_path="./geckodriver") # chromedriver 要自己下載歐 ``` * 用瀏覽器進入網頁 ```python URL = "http://server-a1.ddns.net:5153/" chrome.get(URL) ``` * 選取網頁元素 ```python chrome.find_element_by_id("seed") # by id chrome.find_element_by_xpath(full XPath) # by xpath chrome.find_element_by_id("seed").text # get text ``` * send keys ```python seed = chrome.find_element_by_id("seed") seed.send_keys(Keys.CONTROL, 'a') seed.send_keys(Keys.BACKSPACE) seed.send_keys(Keys.LEFT) seed.click() #點觸 ``` * 其他 ```python chrome.refresh() # 重新整理 chrome.back() # 回到上一頁 chrome.clost() # 關閉瀏覽器 ``` # Exception handling * assert ```python assert os.path.exists("./ckiptagger_data"), "ckiptagger_data 不在同層目錄" ``` * try except ```python a = 22 b = 33 try: if a < b: print(n) except: print("except") ``` # list * spefic 2d ```python def get_matrix(A, r_s, r_e, c_s, c_e): buf = [ v_i for i, v_i in enumerate(A) if r_s <= i < r_e] buf = [v_i[c_s:c_e] for i, v_i in enumerate(buf)] return(buf) ``` * count ```python .count() ``` * sort ```python .sort() ``` * 添加元素 append ```python list_ = [] ## 空列表 list_.append('Google') ## 使用 append() 添加元素 list_.append('Runoob') ``` * 添加元素 extend ```python bad_1 = ['Bad', 'Smooth Criminal','Speed Demon'] bad_2 = ['Man in the Mirror', 'Dirty Diana'] bad_1.extend(bad_2) print(bad_1) ``` * 將list的element 從 str 轉到 float ```python list(map(float, mylist)) ``` * nested list to 1d list ```python from itertools import chain buf = list(chain.from_iterable(buf)) ``` * split string to list ```python s = "abcabcbb" s.split("a") # ['', 'bc', 'bcbb'] ``` * pop element ```python list1 = ['Google', 'Runoob', 'Taobao'] list_pop=list1.pop(1) ``` * 取前三項並並名 ```python a, b, c = data[:3] ``` * Create empty list ```python lst = [None] * 10 ``` # defaultdict ```python= from collections import defaultdict def zero(): return 0 counter_dict = defaultdict(zero) # default值以一個zero()方法產生 a_list = ['a','b','x','a','a','b','z'] for element in a_list: counter_dict[element] += 1 print(counter_dict) ``` # IPython * 消除輸出 ```python from IPython.display import clear_output clear_output() ``` # Crawler [:link:][Crawer-type] [Crawer-type]: https://hackmd.io/bBRnnn1cQlCtW4y1Do0_2A?both # GloVe ```python embeddings_dict = {} with open("glove.6B.50d.txt", 'r') as f: for line in f: values = line.split() word = values[0] vector = np.asarray(values[1:], "float32") embeddings_dict[word] = vector ``` # warnings * no warning ```python import warnings warnings.filterwarnings('ignore') ``` # ord() > ASCII 對應的數值 ```python ord('a') # 97 chr(97) # a ``` # dictionary operation * create dictionary ```python phone_dict = {"Kim":"123", "Tom":"345"} # from list dishes = ["pizza", "sauerkraut", "paella", "Hamburger"] countries = ["Italy", "Germany", "Spain", "USA"] country_specialities_dict = dict(zip(countries, dishes)) ``` * check if a key exists ```python 'Tom' in phone_dict ``` * Add / Update / Remove Key‐Value pairs ```python data = {} # start with an empty dictionary data['Joe'] = 181 data['Alice'] = 159 data['Sue'] = 165 print(data) # {'Sue': 165, 'Joe': 181, 'Alice': 159} del data['Joe'] # remove a key‐value pair by del statement data['Alice'] = 163 ``` * access item ```python phone_dict = {"Kim":"123", "Tom":"345"} data.get('Sue') # return None data.get('Kim') ``` * get all key/ get all value ```python list(data.keys()) list(data.values()) ``` * update ```python f1 = {'apples': 1, 'oranges': 3, 'pears': 2} f2 = {'pears': 4, 'grapes': 5} f1.update(f2) ``` * change key name ```python a_dict = {"a": 1, "B": 2, "C": 3} new_key = "A" old_key = "a" a_dict[new_key] = a_dict.pop(old_key) ``` # set operation * s.issubset(t) * set difference ```python set(A)-set(B) ``` * intersection ```python set(a) & set(b) ``` * union ```python set(a) | set(b) ``` # string opertaion * Formatting string ```python # % [flag][width][.][precision]typecode # [flag]: +:顯示(+/-) -:靠左 0: 留0 x = 1234 myformat = "integers: %d | %‐6d | %06d" print(myformat % (x, x, x)) ``` * duplication ```python start = "Na" * 4 ``` * replace ```python str = "this is string example....wow!!! this is really string"; print str.replace("is", "was") ``` * list to string ```python "".join(["a", "b", "c"]) ``` * string to list ```python list('abc') ``` * contain ```python "data" in "asdfasdfdata" ``` * 刪除後面空白 ```python " xyz ".rstrip() ``` * 刪除前面空白 ```python str2 = " Runoob " str2.strip() ``` * split ```python= s = "1 2 3456" s.split(" ") ``` * endswutg ```python= s = "1 2 3456" s.endswith("456") ``` # numpy [:link:][numpy-type] [numpy-type]: https://hackmd.io/4_ij3QgtTci1vXN9plcnRw # input type [:link:][input-type] [input-type]: https://docs.python.org/3/library/typing.html # argparse ```python import argparse parser = argparse.ArgumentParser() parser.add_argument("--num", default = 50, type=int) parser.add_argument("--str", default = "yes", type=str) # other parameter: required=True args = parser.parse_args() args = parser.parse_args(args=[]) # for Jupyter Notebook print("--num: %d " % args.num) print("--str: %s " % args.str) # 可以用這招一勞永逸 if "__file__" in dir(): args = parser.parse_args() else: args = parser.parse_args([]) ``` ![](https://i.imgur.com/hNuHu1b.png) ![](https://i.imgur.com/D37LyEa.png) # NLTK [:link:][nltk-Sync] [nltk-Sync]: https://hackmd.io/LltMMZl9RlGCeWAm8JMzLw # package ## upgrade module ``` !pip install --upgrade matplotlib ``` # txt I/O * write ```python f = open('test.txt', "w") # overwrite f.write("add one line\n") f.close() ``` ![](https://i.imgur.com/hRvPyW7.png) * read txt file to list ```python f = open(file_path, 'r+') # f = open(file_path, 'r+', encoding="utf-8") buf = f.read().splitlines() f.close() ``` # pytorch [:link:][pytorch-Sync] [pytorch-Sync]: https://hackmd.io/j5tOstOkRny46AFIYUNfjg # tqdm * for .py ```python from tqdm import tqdm ``` * for notebook ```python from tqdm.notebook import tqdm ``` # plot [:link:][plot-Sync] [plot-Sync]: https://hackmd.io/6LdE25KjSO-8qhpAIOQwKg # namedtuple ```python from collections import namedtuple # define class Identity = namedtuple('Identity', ['first_name','last_name','birthday']) # create class identity = Identity('Sam','Lee','4/2') # get element identity.birthday # '4/2' identity.first_name # 'Sam # replace identity = identity._replace(birthday='4/3') ``` # multiprocessing ```python import multiprocessing as mp import time def job(x): print(x) time.sleep(5) return x*x pool = mp.Pool() # res = pool.map(job, range(100)) def multicore(): pool = mp.Pool() res = pool.map(job, range(100)) print(res) if __name__ == '__main__': multicore() ``` # colab * upload file ```python from google.colab import files uploaded = files.upload() ``` # dictionary ## get item by key ```python dict[keyname] ``` ## create dictionary ```python char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz'] num_dic = {n: i for i, n in enumerate(char_arr)} ``` ```python keys = ['a', 'b', 'c'] values = [1, 2, 3] dictionary = dict(zip(keys, values)) ``` # gym ```python import gym from IPython import display import matplotlib import matplotlib.pyplot as plt %matplotlib inline env = gym.make('CartPole-v0') env.reset() img = plt.imshow(env.render(mode='rgb_array')) # only call this once for _ in range(100): img.set_data(env.render(mode='rgb_array')) # just update the data display.display(plt.gcf()) display.clear_output(wait=True) action = env.action_space.sample() env.step(action) ``` # time ```python import time tStart = time.time()#計時開始 #模擬要測量的function time.sleep(2) print "abc" for x in range(1000): x += 1 print x #end of 模擬要測量的function tEnd = time.time()#計時結束 #列印結果 print "It cost %f sec" % (tEnd - tStart)#會自動做近位 ``` ## datetime ```python import datetime print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) ``` # iterools ```python import itertools product('ABCD', repeat=2) # 重複排列 product([1,2,3],[3,4]) permutations('ABCD', 2) # 不重複排列 combinations('ABCD', 2) # 不重複組合 combinations_with_replacement('ABCD', 2) # 可重複組合 ``` ## count ```python for t in count(): print(t) if(t >= 15): break ``` # for loop * enumerate ```python for num, line in enumerate(lines): print("{0:03d}: {}".format(num, line)) ``` * range ```python for i in range(15): ``` * dictionary ```python for key, value in a_dict.items(): ``` # class * 基礎用法 ```python class test: def __init__(self): # class 的 Constructor def __del__(self): # class 的 Destructor def __len__(self): return(5) def __call__(self): return(you call a function) def __repr__(self): return("5") # return type 需要為 string # private member __alive = True test2 = test() len(test2) ``` * Inhertance ```python class Transportation: # 駕駛方法 def drive(self): print("Base class drive method is called.") # 汽車子類別 class Car(Transportation): # 駕駛方法 def drive(self): super().drive() print("Sub class drive method is called.") ``` * with 使用方式 ```python class a: def __init__(self): print("exec __init__") def __enter__(self): print("exec __enter__") def __exit__(self ,type, value, traceback): print("exec __exit__") def __del__(self): print("exec __del__") with a() as s: # pass ``` 執行結果 ```python # exec __init__ # exec __enter__ # exec __exit__ # exec __del__ ``` * @staticmethod > 希望某個 member independent of instance，不帶instance為參數，就會宣告該 member 為 static ，使 members 間的關係更加乾淨俐落。 ```python class Shiba: def __init__(self, height, weight): self.height = height self.weight = weight @staticmethod def pee(length): print("pee" + "." * length) # 可以直接做 Shiba.pee() ``` * @property ``` class Bank_acount: @property def password(self): return ‘密碼:123' # 只能讀 andy = Bank_acount() print(andy.password) ``` * check variable ```python classname.__dict__ ``` * pickle **save class** ```python import pickle class Company(object): def __init__(self, name, value): self.name = name self.value = value # save with open('company_data.pkl', 'wb') as output: company1 = Company('banana', 40) pickle.dump(company1, output, pickle.HIGHEST_PROTOCOL) # read with open('company_data.pkl', 'rb') as input: company1 = pickle.load(input) ``` # call variable by string ```python globals()[string] ``` # change type of list ```python list(map(int, list_)) [ list(map(int, i)) for i in after_encode] # 2d list ``` # collections [:link:][collection] [collection]: https://docs.python.org/2/library/collections.html * Counter ```python from collections import Counter c = Counter('abcasd') c.update('red') c.most_common() len(c) ``` * deque ```python from collections import deque d = deque('ghi') # deque(['g', 'h', 'i']) d.append('j') # deque([g', 'h', 'i', 'j']) d.appendleft('f') # deque(['f', 'g', 'h', 'i', 'j']) ``` * OrderedDict ```python= from collections import OrderedDict d1 = OrderedDict() ``` # .py 的開頭 ```python if __name__ == '__main__': ``` # 偵測string 的語言 ```python from textblob import TextBlob b = TextBlob("bonjour") b.detect_language() ``` # sklearn [:link:][sklearn] [sklearn]: https://hackmd.io/nesOt10mR120KCECtwSk8A # random * random shuffle ```python import random number_list = [7, 14, 21, 28, 35, 42, 49, 56, 63, 70] random.shuffle(number_list) ``` * random choices ```python import random list_ = [20, 30, 40, 50 ,60] weights = [0.1, 0.1, 0.1, 0.2, 0.5] sampling = random.choices(list_, weights = weights) ``` * random choices with weight ```python import random list = [20, 30, 40, 50 ,60, 70, 80] sampling = random.choices(list, k=4) ``` * generate random integer ```python random.randint(0, 100) random.randint(0, 1) ``` * random choice can replace ```python np.random.choice(5, 3, replace=True) ``` # json ## save * 若要儲存numpy 且裡面有int 要先轉換成一般的int ```python # write file import json with open('savename.txt', 'w') as outfile: json.dump(savedata, outfile) ``` * 存成中文要加上utf-8 ```python 存成中文 with open(i + ".txt",'w', encoding='utf8') as outfile: json.dump(savedata, outfile, ensure_ascii=False) ``` * 如果讀進來是 str 則可用以下方式轉成dict ```python data2 = json.loads(data) ``` ## read ```python # read file with open('dictonary.json') as json_file: data = json.load(json_file) # method 2 [json.loads(line) for line in open('train_gold.json', 'r')] ``` * read function ```python # read file def read_data(str_): path = '%s/%s' % (args.d, str_) with open(path) as json_file: buf = json.load(json_file) if( args.t): buf = buf[:20] globals()[str_[:-5]] = buf print("assign %s" % str_[:-5]) ``` # os ```python import os os.path.exists(filepath) # 檢查檔案目錄是否存在 path = os.getcwd() #取得目前路徑 os.chdir(path) #改變路徑 os.listdir(path) # 列出folder的全部item os.mkdir(path) # make dir os.rename(a, b) # rename os.path.abspath(os.path.dirname(__file__)) + "/swear_words_chinese.json" # 最保線的相對路徑 os.system(command) # 執行指令 # Rmk: 若想回到上一層路徑可用 os.chdir("..") # Rmk: 相對路徑可用 "./" 表示 ``` # re > Regular expression [Online regular expression](https://regex101.com/) ```python import re matchObj = re.match("(.*)_(.*)_(.*)_(.*).json", str) ``` ```python matchObj = re.match("(.*)(\(˙.*\))(.*)", str_) if( type(matchObj) == re.Match ): str_ = matchObj.group(1) + matchObj.group(3) ``` # select list element by bool index ```python from itertools import compress list_a = [1, 2, 4, 6] fil = [True, False, True, False] list(compress(list_a, fil)) ``` # draw correlation map 資料格式:(dataframe) | A | B | C | | -------- | -------- | -------- | | | | | | | | | | | | | | | | | ```python import pandas as pd import seaborn as sns import matplotlib.pyplot as plt data = {'A': [45,37,42,35,39], 'B': [38,31,26,28,33], 'C': [10,15,17,21,12] } df = pd.DataFrame(data,columns=['A','B','C']) corrMatrix = df.corr() sns.heatmap(corrMatrix, annot=True) # plt.savefig('corrMatrix.png') 輸出 #須放在 plt.show 之前 # plt.figure(figsize=(20,20)) 調整大小 plt.show() ``` # select list element by index ```python test_list = [9, 4, 5, 8, 10, 14] index_list = [1, 3, 4] list(map(test_list.__getitem__, index_list)) # [4, 8, 10] ``` # pandas * read excel ```python xls = pd.ExcelFile("華語八千詞表20200917.xlsx") xls.sheet_names pd.read_excel(xls, '準備級一級') ``` * read csv ```python pd.read_csv(path) ``` * save ```python df.to_csv('result.csv', index=False) df.to_csv(save_path, index=False, encoding="utf_8_sig") # sasve chinese file ``` * make data frame ```python dict_ = {"標題": [1, 2, 3, 4, 5], "直覺分數": None, "誇張表現": None, "情緒性字眼": None, "刻意隱藏資訊": None, "不正式用詞": None, "不必要的資訊": None, "附註": None } df = pd.DataFrame.from_dict(dict_) ``` * get element ```python df.iloc[0] # get first row df[df.columns[0]] # get first column ``` * groupby ```python sectors = fortune.groupby("Sector") sectors.get_group("Energy") ``` * round ```python df.round(1) ``` * read .json file ```python data = pd.read_json('https://bit.ly/108-nlp-train', lines = True) ``` * column switch ```python df.reindex(columns=["idx", 'categories', 'reply', 'text']) ``` * sort by one column ```python data.sort_values(by=['Body ID']) ``` # Sorting list based on values from another list ```python Y = [ 0, 1, 1, 0, 1, 2, 2, 0, 1] X = ["a", "b", "c", "d", "e", "f", "g", "h", "i"] X.sort(key=dict(zip(X, Y)).get) ```