SC_0813 - HackMD

# SC_0813 ## `demo1_env_check_warm_up.py` ```python import os, sys import numpy import pandas import matplotlib # ctrl+shift+F10 print(os.getcwd()) print(sys.executable) print(numpy.__version__) print(pandas.__version__) print(matplotlib.__version__) # ctrl+alt+L # alt+Enter demoObj = {'course': "Data Processing", 'duration': 28, 'instructor': "MarkHo"} ``` ## `demo2_arithmatic.py` ```python import numpy as np print("hello" + ",world") l1 = [1, 2, 3] l2 = [4, 5, 6] print(l1 + l2) a1 = np.array(l1) a2 = np.array(l2) print(a1 + a2, a1 - a2, a1 * a2, a1 / a2) print(a2 / a1, a2 // a1, a2 % a1) print(a1 ** a2, a2 ** a1) ``` ## `demo3_complex.py` ```python c1 = 2 + 3j c2 = 5 c3 = 4j c4 = -5 print(type(c1), type(c2), type(c3)) print(c1.conjugate()) print(c1*5, c1*5j) print(abs(c1), abs(c2), abs(c3), abs(c4), (2**2+3**2)**0.5) print(c1.real, c1.imag) ``` ## `demo4_fraction.py` ```Python from fractions import Fraction print(Fraction(250, 72)) print(Fraction(5, 2) + Fraction(19, 6) + Fraction(27, 3)) frac1 = Fraction(250, 70) print(frac1.denominator, frac1.numerator) print(f"除數是{frac1.denominator}, 被除數是{frac1.numerator}") ``` ## `demo5_decimal.py` ```python from decimal import Decimal as Dec print(Dec(2.968)) print(Dec('2.968')) print(Dec(0.001)*Dec(2968)-Dec(2.968)) print(Dec('0.001')*Dec(2968)-Dec('2.968')) ``` ## `demo6_object_equality.py` ```python class Person: def __init__(self, age): self.age = age age = 38 print(f"age id = {hex(id(age))}") age = 39 print(f"age id = {hex(id(age))}") v1 = 39 print(f"v1 id={hex(id(v1))}") print(v1 == age, v1 is age) person1 = Person(38) print(f"person1 id={hex(id(person1))}") person2 = person1 print(f"person2 id={hex(id(person2))}") person3 = Person(38) print(f"person3 id={hex(id(person3))}") person1.age = 39 person3.age = 39 print(f"person1 id={hex(id(person1))}") print(f"person2 id={hex(id(person2))}") print(f"person3 id={hex(id(person3))}") ``` ## `demo7_equal_identical.py` ```python l1 = ['apple', 'banana'] l2 = l1 l3 = ['apple', 'banana'] print(l1 is l2, l1 == l2) print(l1 is l3, l1 == l3) ``` ## `demo8_fp1.py` ```python from pprint import pprint courses = [{'name': 'poop', 'field': 'python', 'attendee': 10, 'remote': False}, {'name': 'bdpy', 'field': 'python', 'attendee': 15, 'remote': True}, {'name': 'andbiz3', 'field': 'android', 'attendee': 5, 'remote': False}] pprint(courses) courses = [{'name': 'poop', 'field': 'python', 'attendee': 10, 'remote': False}, {'name': 'bdpy', 'field': 'python', 'attendee': 15, 'remote': True}, {'name': 'andbiz3', 'field': 'android', 'attendeen': 5, 'remote': False}] for course in courses: print(course['attendee']) ``` ## `demo9_fp2.py` ```python import collections course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) poop = course(name='poop', field='python', attendee=10, remote=False) print(poop) print(poop.name, poop.field, poop.attendee) poop.name='aiocv' ``` ## `demo10_fp3.py` ```python import collections from pprint import pprint course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) courses = [poop, bdpy, andbiz] pprint(courses) del courses[0] pprint(courses) ``` ## `demo11_fp4.py` ```python= import collections from pprint import pprint course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv) # using filter result1 = filter(lambda x: x.remote is True, courses) print(type(result1)) pprint([x for x in result1]) result2 = filter(lambda x: x.attendee >= 10, courses) print(next(result2)) print(next(result2)) result3 = filter(lambda x: x.remote is True, courses) result4 = filter(lambda x: x.attendee > 10, result3) pprint([x for x in result4]) ``` ## `demo12_fp5.py` ```Python import collections from pprint import pprint course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv) filter1 = lambda x: x.remote is True result1 = tuple(filter(filter1, courses)) print(type(result1)) pprint(result1) pprint(tuple(filter(filter1, courses))) ``` ## `demo13_fp6.py` ```Python import collections from pprint import pprint course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv) available1 = lambda c: c.attendee >= 8 def available(course): return course.attendee >= 8 def isRemote(course): return course.remote is True validRemoteCourses = tuple(filter(isRemote, filter(available, courses))) pprint(validRemoteCourses) ``` ## `demo14_fp7.py` ```python import collections from pprint import pprint course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv) pprint([c for c in courses if c.field == 'python']) pprint(tuple([c for c in courses if c.field == 'python'])) pprint(tuple(c for c in courses if c.field == 'python')) ``` ## `demo15_fp8.py` ```python import collections from pprint import pprint course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv) name_and_field = map(lambda x: {'name': x.name, 'field': x.field}, courses) pprint(tuple(name_and_field)) ``` ## demo16_fp9.py ```python= import collections from pprint import pprint course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv) pprint([{'name': c.name, 'income': c.attendee * 8000} for c in courses]) pprint(tuple({'name': c.name, 'income': c.attendee * 8000} for c in courses)) ``` ## demo17_fp10.py ```python= import collections from pprint import pprint from functools import reduce course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv) income_tuple = tuple({'name': c.name, 'income': c.attendee * 8000} for c in courses) total_income = reduce(lambda acc, val: acc+val['income'], income_tuple, 0) pprint(total_income) pprint(sum(x['income'] for x in income_tuple)) ``` ## demo18_fp11.py ```python= import collections from pprint import pprint from functools import reduce course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv) def reducer(acc, val): acc[val.field].append(val.name) return acc courses_by_category = reduce(reducer, courses, {'python': [], 'android': []}) pprint(courses_by_category) ``` ## demo19_fp12 ```python import collections from functools import reduce from pprint import pprint course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) cplus = course(name='cplus', field='c++', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv,cplus) def reducer(acc, val): acc.setdefault(val.field, []) acc[val.field].append(val.name) return acc courses_by_category = reduce(reducer, courses, {}) pprint(courses_by_category) ``` ## demo20_fp13 ```python= import collections from functools import reduce from pprint import pprint from collections import defaultdict course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv) def reducer(acc, val): acc[val.field].append(val.name) return acc courses_by_category = reduce(reducer, courses, defaultdict(list)) pprint(courses_by_category) pprint(dict(courses_by_category)) ``` ## demo21_fp14 ```python= import collections from pprint import pprint import itertools course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv) sorted_courses = sorted(courses, key=lambda x: x.field) pprint(sorted_courses) courses_by_remote = {c[0]: list(c[1]) for c in itertools.groupby(sorted_courses, lambda x: x.field)} pprint(courses_by_remote) courses_by_remote2 = {c[0]: list([c.name for c in c[1]]) for c in itertools.groupby(sorted_courses, lambda x: x.field)} pprint(courses_by_remote2) courses_by_remote3 = [(c[0], list(c.name for c in c[1])) for c in itertools.groupby(sorted_courses, lambda x: x.field)] pprint(courses_by_remote3) ``` ## demo22_fp15 ```python= from functools import reduce print({'a': 0, 'b': 1, 'a': 0 + 1, 'b': 1 + 2}) import collections from pprint import pprint course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv) reducer = lambda acc, val: {**acc, **{val.field: acc[val.field] + [val.name]}} courses_by_category = reduce(reducer, courses, {'python': [], 'android': []}) pprint(courses_by_category) ``` ## demo23_fp16.py ```python= import collections from pprint import pprint import time course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv) def transform(x): print(f"process record:{x.name}") time.sleep(3) result = {'name': x.name, 'revenue': x.attendee * 5000} print(f"done process record:{x.name}") return result start = time.time() result = tuple(map(transform, courses)) end = time.time() print(f"total time:{end-start:.2f} seconds") pprint(result) ``` ## demo24_fp17 ```python= import collections from pprint import pprint import time import multiprocessing course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) import os poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv) def transform(x): print(f"process:{os.getpid()} record:{x.name}") time.sleep(3) result = {'name': x.name, 'revenue': x.attendee * 5000} print(f"done process:{os.getpid()} record:{x.name}") return result if __name__ == '__main__': start = time.time() pool = multiprocessing.Pool(processes=4, maxtasksperchild=1) print(f"now we run with process:{pool._processes}") result = pool.map(transform, courses) end = time.time() print(f"total time:{end-start:.2f} seconds") pprint(result) ``` ## demo25_fp18 ```python= import collections from pprint import pprint import time import concurrent.futures course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) import os poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv) def transform(x): print(f"process:{os.getpid()} record:{x.name}") time.sleep(3) result = {'name': x.name, 'revenue': x.attendee * 5000} print(f"done process:{os.getpid()} record:{x.name}") return result if __name__ == '__main__': start = time.time() with concurrent.futures.ProcessPoolExecutor() as executor: result = executor.map(transform, courses) end = time.time() print(f"total time:{end-start:.2f} seconds") pprint(result) ``` # demo25_fp18(change to thread) ```python= import collections from pprint import pprint import time import concurrent.futures course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote']) print(type(course)) print(course) import os poop = course(name='poop', field='python', attendee=10, remote=False) bdpy = course(name='bdpy', field='python', attendee=15, remote=True) pykt = course(name='python', field='python', attendee=9, remote=False) andbiz = course(name='andbiz', field='android', attendee=18, remote=False) aiocv = course(name='aiocv', field='python', attendee=10, remote=True) courses = (poop, bdpy, andbiz, pykt, aiocv) def transform(x): print(f"process:{os.getpid()} record:{x.name}") time.sleep(3) result = {'name': x.name, 'revenue': x.attendee * 5000} print(f"done process:{os.getpid()} record:{x.name}") return result if __name__ == '__main__': start = time.time() with concurrent.futures.ThreadPoolExecutor() as executor: #with concurrent.futures.ProcessPoolExecutor() as executor: result = executor.map(transform, courses) end = time.time() print(f"total time:{end-start:.2f} seconds") pprint(result) ``` ## demo26_library ```python= def foo(a, b): return "[demo26][foo]result=" + str(a + b) def bar(a, b): return "[demo26][bar]result=" + str(a * b) print("inside demo26, foo(1,2)=", foo(1, 2)) print("inside demo26, bar(3,4)=", bar(3, 4)) ``` ## demo27_use_library ```python= import demo26_library print(demo26_library.foo(1, 2)) print(demo26_library.bar(3, 4)) import demo26_library as library print(library.foo(5, 6)) print(library.bar(7, 8)) from demo26_library import foo, bar print(foo(9, 10)) print(bar(11, 12)) from demo26_library import foo as f1, bar as f2 print(f1(13, 14)) print(f2(15, 16)) ``` ## demo28_variable_function_call.py ```python= def sample_call(fix1, fix2, *args): print(fix1) print(fix2) print(list(args)) sample_call("hello", "world") sample_call("hihi", "welcome", "ABC123") sample_call("multiple", "input", 5, 300, None, 5 + 4j) l1 = ['Sunday', 'Monday', 'Tuesday'] sample_call("pass a", "list", l1) sample_call("pass a", "list", *l1) ``` ## demo29_key_value_function_call.py ```python= def sample_key_value_arguments(fix1, fix2, **kwargs): print(f"fix1={fix1}, fix2={fix2}") for key, value in kwargs.items(): print(f"parameter name={key}, value={value}") sample_key_value_arguments(500, "hi", name="mark", location="Taipei") sample_key_value_arguments("hello", 'world') sample_key_value_arguments("hihi", "welcome", course="BDPY", duration=35, period=['Sun', 'Mon', 'Tue']) course = {'name': "poop", "level": "entry", "duration": 35} sample_key_value_arguments("unpack"," a dict", **course) ``` ## demo30_fix_argument1.py ```python= def lab30(fix1, fix2, fix3): print("---") print(f"arg1={fix1}, arg2={fix2}, arg3={fix3}") print("---") args = ("two", 3.0) lab30(1, *args) lab30(*args, 5) args2 = ['two', 3.0] lab30('OK', *args2) args3 = {'Hi', "OK", 'Welcome'} lab30(*args3) args4 = {'course':"PYKT",'level':'advanced',"duration":35} lab30(*args4) ``` ## dmeo31_fix_argument2 ```python= def dumpEmployee(id, name, department): print(f"id={id}") print(f"name={name}") print(f"department={department}") dumpEmployee(5, "mark", "R&D") dumpEmployee(id=5, name="Mark Ho", department="Research&Development") dumpEmployee(5, name="Mark Lin", department="FAE") emp1 = {'id': 1, 'name': "Kelvin", 'department': 'Sales'} dumpEmployee(**emp1) emp2 = (2, "James", "legal") dumpEmployee(*emp2) ``` ## demo32_str_repr.py ```python= from datetime import datetime now = datetime.now() print("repr", repr(now)) print("str", str(now)) print(now) print('inside list:', [now]) print('inside tuple', (now,)) print('not inside tuple', (now)) print('inside dict',{'k1':now}) print('inside dict, but keep str',{'k1':str(now)}) ``` ## demo32_system_command.py ```python= import os import sys print(os.getcwd()) print(sys.executable) print(sys.argv) PATH1 = "logs" os.mkdir(PATH1) os.chdir(PATH1) print(os.getcwd()) os.chdir("..") os.rmdir(PATH1) ``` ## demo33_math_random.py ```python= import math import random print(math.pi, math.log10(10), math.log10(5)) print(math.sqrt(5)) for x in range(50): print(random.randint(10, 20)) stores = ['7-11', 'fami', 'Ok', "Hi-Life"] for x in range(20): print(random.choice(stores)) cards = ['A', 'K', 'Q', 'J', 10] for x in range(10): random.shuffle(cards) print(cards) ``` ## demo34_itertools.py ```python= import itertools r1 = itertools.chain('abc', "123", 'PQRST') t1 = tuple(r1) print(t1) print(t1) TEAMS = ['A', 'B', 'C', 'D', 'E'] r2 = itertools.permutations(TEAMS, 2) t2 = tuple(r2) print(len(t2), t2) r3 = itertools.combinations(TEAMS, 3) t3 = tuple(r3) print(len(t3), t3) ``` ## `demo35_shutil.py` ```python= import shutil import os ORIGINAL = "data1" DUPLICATE = "data2" shutil.copytree(ORIGINAL, DUPLICATE) os.chdir(DUPLICATE) print("now inside:", os.getcwd()) os.chdir("..") shutil.rmtree(DUPLICATE) ``` ## `demo36_json.py` ```python= import json v1 = ["Sunday", "monday", 100, 3.14, None, '中文', {"name": "Mark"}] print(type(v1)) r1 = json.dumps(v1) print(type(r1)) print(r1) v2 = {"course": "BDPY", "instructor": "Mark", "period": ['Thursday', 'Friday']} r2 = json.dumps(v2) print(type(r2)) print(r2) ``` ## `demo37_bugzilla.py` * (https://bugzilla.mozilla.org/rest/bug/35)[https://bugzilla.mozilla.org/rest/bug/35] * (json viewer)[https://chrome.google.com/webstore/detail/json-viewer/gbmdgpbipfallnflgajpaliibnhdgobh?utm_source=chrome-ntp-icon] ```python= import requests proxies = { "http": "10.239.9.190:443", "https": "10.239.9.190:443", } URL = 'https://bugzilla.mozilla.org/rest/bug/35' #response = requests.get(URL, proxies=proxies, verify=False) response = requests.get(URL, proxies=None, verify=False) print(response.status_code) print(type(response.json())) result = response.json() for k,v in result.items(): print(f"key={k}, value={v}") bugs = result["bugs"] firstBug = bugs[0] firstBugCreator = firstBug["creator"] print(firstBugCreator) ``` ## demo38_ucom_http.py ```python= import requests from bs4 import BeautifulSoup proxies = { "http": "10.239.9.190:443", "https": "10.239.9.190:443", } URL = 'https://www.uuu.com.tw/' # r = requests.get(URL, proxies=proxies, verify=False) r = requests.get(URL, proxies=None, verify=False) soup = BeautifulSoup(r.content, "html.parser") print(type(soup)) print(soup.title.name) print(soup.title.string) hot_courses = soup.find('div', {'id': 'course_list'}) print(type(hot_courses)) print(hot_courses) items = hot_courses.find_all('a') for item in items: print(item) ``` ## demo39_urllib_get_image * (cwb image)[https://www.cwb.gov.tw/Data/satellite/LCC_IR1_CR_2750/LCC_IR1_CR_2750-2020-08-14-10-30.jpg] ```python= from urllib.request import urlopen from urllib import request from PIL import Image URL = 'https://www.cwb.gov.tw/Data/satellite/LCC_IR1_CR_2750/LCC_IR1_CR_2750-2020-08-14-10-30.jpg' # for SC proxy settings # proxy = request.ProxyHandler({'https': '10.239.9.190:443'}) # opener = request.build_opener(proxy) # request.install_opener(opener) # end of SC proxy setting fileToSave = urlopen(URL) # manual make a directory images image = Image.open(fileToSave) image.save('images/demo39.jpg') # get half size halfSize = (image.size[0] // 2, image.size[1] // 2) halfImage = image.resize(halfSize, Image.ANTIALIAS) halfImage.save('images/demo39_small.jpg') rot1 = halfImage.transpose(Image.ROTATE_90) rot1.save('images/r90.jpg') rot2 = halfImage.transpose(Image.ROTATE_180) rot2.save('images/r180.jpg') rot3 = halfImage.transpose(Image.ROTATE_270) rot3.save('images/r270.jpg') rot4 = halfImage.rotate(60) rot4.save('images/r60.jpg') ``` ## README.md ```markdown # Python and Data Wrangling 4 days ## `requests` under proxy usage ```python proxies = { "http": "10.239.9.190:443", "https": "10.239.9.190:443", } URL = 'https://www.uuu.com.tw/' # r = requests.get(URL, proxies=proxies, verify=False) r = requests.get(URL, proxies=None, verify=False) ``` ## `urlopen`under proxy usage ```python # for SC proxy settings # proxy = request.ProxyHandler({'https': '10.239.9.190:443'}) # opener = request.build_opener(proxy) # request.install_opener(opener) # end of SC proxy setting ``` ``` ``` ## demo40_file_io ```txt (Python_Introduction) Python（英國發音：/ˈpaɪθən/ 美國發音：/ˈpaɪθɑːn/）是一種廣泛使用的直譯式、進階程式、通用型程式語言，由吉多·范羅蘇姆創造，第一版釋出於1991年。Python是ABC語言的後繼者，也可以視之為一種使用傳統中綴表達式的LISP方言[4]。Python的設計哲學強調代碼的可讀性和簡潔的語法（尤其是使用空格縮排劃分代碼塊，而非使用大括號或者關鍵詞）。相比於C++或Java，Python讓開發者能夠用更少的代碼表達想法。不管是小型還是大型程式，該語言都試圖讓程式的結構清晰明瞭。 Python（パイソン）インタープリタ型の高水準汎用プログラミング言語である。グイド・ヴァン・ロッサムにより創り出され、1991年に最初にリリースされたPythonの設計哲学は、その顕著なホワイトスペースの使用によってコードの可読性が重視されている。その言語構成とオブジェクト指向のアプローチは、プログラマが小規模なプロジェクトから大規模なプロジェクトまで、明確で論理的なコードを書くのを支援することを目的としている。 Python is an interpreted, high-level, general-purpose programming language. Created by Guido van Rossum and first released in 1991, Python's design philosophy emphasizes code readability with its notable use of significant whitespace. Its language constructs and object-oriented approach aim to help programmers write clear, logical code for small and large-scale projects.[28] ``` ```python # mkdir data # make a file Python_Introduction.txt file1 = open('data/Python_Introduction.txt', encoding='utf8') readme_txt = file1.read() print(type(readme_txt)) print(readme_txt) file1.close() with open('data/Python_Introduction.txt', encoding='utf8') as file2: readme_txt2 = file2.read() print(type(readme_txt2)) print(readme_txt2) file3 = open('data/clone1','w', encoding='utf8') file3.write(readme_txt) file3.close() with open('data/clone2','w', encoding='utf-8') as file4: file4.write(readme_txt2) ``` ## demo41_csv_read.py ```python import csv sampleFile = open('data/demo41.csv', encoding='utf8') sampleReader = csv.reader(sampleFile) sampleData = list(sampleReader) sampleFile.close() print(type(sampleData)) print(sampleData) ``` ## demo42_tk1.py ```python import tkinter from tkinter import font import tkinter.font counter = 10 counter2 = [10] def callback1(): global counter # counter = 0 # print('button clicked') label1.config(text=f'button clicked {counter} times') counter += 1 def callback2(): label2.config(text=f"button2 clicked {counter2[0]} times") counter2[0] += 1 def callback3(): label1.config(text=f'button3 clicked {counter3.get()} times') counter3.set(counter3.get() + 1) top = tkinter.Tk() counter3 = tkinter.IntVar() counter3.set(10) # query available font for f in font.families(): print(f) myFont1 = font.Font(family="Tahoma", size=24) label1 = tkinter.Label(top, text='Hello Tk@Python', font=myFont1, fg='#C0FFEE', bg='#000000', padx=20, pady=10) label2 = tkinter.Label(top, text="mark@patristar.com", font=myFont1, fg='#000000', bg='#FFC0EE', padx=10, pady=20) button1 = tkinter.Button(top, text='click me1', font=myFont1, fg='#C0FFEE', bg='#440000', command=callback1) button2 = tkinter.Button(top, text='click me2', font=myFont1, fg='#FFEEC0', bg='#004400', command=callback2) button3 = tkinter.Button(top, text='clicked me3', font=myFont1, fg='#EEFFC0', bg='#000044', command=callback3) label2.pack() label1.pack() button1.pack() button2.pack() button3.pack() top.mainloop() ``` ## demo43_tk2.py ```python import tkinter from tkinter import font import tkinter.font def function1(ev): label1.config(text=f'left single clicked:{ev}', bg='#880000') def function2(ev): label1.config(text=f'right double clicked{ev}', bg='#008800') def function3(ev): label1.config(text=f"middle click drag:{ev}", bg='#000088') top = tkinter.Tk() myFont1 = font.Font(family="Tahoma", size=48) label1 = tkinter.Label(top, text="status", font=myFont1, fg='#C0FFEE', bg='#000000', padx=30, pady=20) button1 = tkinter.Button(top, text='click', font=myFont1, fg='#000000', bg='#FFC0EE', padx=20, pady=30) label1.pack() button1.pack() button1.bind('<Button-1>', function1) button1.bind('<Double-3>', function2) button1.bind('<B2-Motion>', function3) top.mainloop() ``` ## demo44_tk3.py ```python import tkinter from tkinter import font import tkinter.font def func1(ev): message1.config(text=f'move to [{ev.x},{ev.y}]') top = tkinter.Tk() myFont1 = font.Font(family="Tahoma", size=48) message1 = tkinter.Message(top, text="detect mose move", font=myFont1) label1 = tkinter.Label(top, text="detect area", bg='#C0FFEE', font=myFont1, padx=30, pady=30) message1.pack() label1.pack() label1.bind('<Motion>', func1) top.mainloop() ``` ## demo45_tk4.py ```python import tkinter from tkinter import font import tkinter.font def func1(): label1.config(text="you will buy pixel5") def func2(): label1.config(text="you will buy iphon12") def func3(): if var1.get() is 1: label1.config(text="you will buy pixel5") elif var1.get() is 2: label1.config(text="you will buy iphon12") top = tkinter.Tk() myFont1 = font.Font(family="Tahoma", size=48) var1 = tkinter.IntVar() var1.set(2) label1 = tkinter.Label(top, font=myFont1, text="you will choose from below:") button1 = tkinter.Radiobutton(top, font=myFont1, text='Google', value=1, variable=var1, command=func3) button2 = tkinter.Radiobutton(top, font=myFont1, text='Apple', value=2, variable=var1, command=func3) label1.pack() button1.pack() button2.pack() top.mainloop() ``` ## demo46_tk5.py ```python import tkinter from tkinter import font import tkinter.font def func(scale): label1.config(text=formattedString % int(scale)) top = tkinter.Tk() formattedString = 'value=%d' value = tkinter.IntVar() value.set(0) myFont1 = font.Font(family="Tahoma", size=36) label1 = tkinter.Label(top, text=formattedString % value.get(), font = myFont1) scale = tkinter.Scale(top, label='Scale', font=myFont1, orient='h',from_=0, to=100, showvalue=True, variable=value, command=func) label1.pack() scale.pack() top.minsize(300,300) top.maxsize(300,300) top.mainloop() ``` ## demo47_tk6 ```python import tkinter from tkinter import font import tkinter.font def display(ev): label1.config(text=entry1.get()) print(ev) top = tkinter.Tk() myFont1 = font.Font(family="Tahoma", size=36) label1 = tkinter.Label(top, text="Input Text", font=myFont1) entry1 = tkinter.Entry(top, font=myFont1) button1 = tkinter.Button(top, text='submit', font=myFont1) label1.pack() entry1.pack() button1.pack() entry1.bind('<Return>', display) button1.bind('<Button-1>', display) top.mainloop() ``` ## demo48 ```python import graphviz as gv print(gv.__version__) g1 = gv.Digraph(format='svg') g1.node('A') g1.node('B') g1.edge('A', 'A') g1.edge('A', 'A') g1.edge('A', 'A') g1.edge('A', 'B') g1.edge('B', 'B') g1.edge('B', 'B') g1.render('graph/demo48') ``` ## demo49_gv2 * download this, and unzip * (https://www2.graphviz.org/Packages/stable/windows/10/msbuild/Release/Win32/graphviz-2.38-win32.zip)[https://www2.graphviz.org/Packages/stable/windows/10/msbuild/Release/Win32/graphviz-2.38-win32.zip] ```python import functools import graphviz as gv from itertools import combinations graph = functools.partial(gv.Graph, format='svg') digraph = functools.partial(gv.Digraph, format='svg') g3 = graph() g4 = digraph() # 'A','Apple','Google' def add_nodes(graph, nodes): for n in nodes: if isinstance(n, tuple): graph.node(n[0], **n[1]) else: graph.node(n) return graph def add_edges(graph, edges): for e in edges: if isinstance(e[0], tuple): graph.edge(*e[0], **e[1]) else: graph.edge(*e) return graph teams = ['apple', 'google', 'facebook'] races = tuple(combinations(teams, 2)) g3 = add_edges(add_nodes(g3, teams), races) print(g3.source) g3.render('images/demo50_g3') nodes = [('A', {'label': 'Apple'}), ('B', {'label': 'Google'}), ('C', {'label': '臉書'}), ('D', {})] edges = [(('A', 'B'), {'label': 'phone maker compete'}), (('A', 'C'), {'label': 'game platform'}), (('B', 'C'), {'label': 'AI develop'}), (('B', 'D'), {})] g4 = add_nodes(g4, nodes) g4 = add_edges(g4, edges) g4.render('graph/demo50_g4') styles = { 'graph': { 'label': 'GAFA', 'fontsize': '36', 'fontcolor': '#C0FFEE', 'bgcolor': '#99CCFF', 'rankdir': 'BT', 'fillcolor': '#FFC0EE' } } def apply_styles(graph, styles): graph.graph_attr.update(('graph' in styles and styles['graph']) or {}) return graph g4 = apply_styles(g4, styles) g4.render('graph/demo50_g4_complete') ``` ## demo51_and_or_explain.py ```python x1 = True x2 = False x3 = False print(x1 and x2, x1 and x1, x2 and x2) x2s = [3.14, 500, 'hello world', 3 + 4j, None, True, False] for x2 in x2s: print(x1 and x2) print("@@@@@@@@@") for x2 in x2s: print(x3 or x2) ``` ## demo52_class_object_creation ```python # ctrl+H class MyClass(object): pass inst1 = MyClass() print(f"MyClass type:{type(MyClass)}, generate instance type:{type(inst1)}") print(f"inst1 class = {inst1.__class__}, class name={inst1.__class__.__name__}") print(f"inst1 bases = {inst1.__class__.__bases__}") print(f"inst1 class equal to type? {type(inst1)==MyClass}") class MySubClass(MyClass): pass inst2 = MySubClass() print(f"my subclass bases = {inst2.__class__.__bases__}") ``` ## demo53_object_class_property ```python class Car: vendor = "lexus" valid = True c1 = Car() c2 = Car() print(c1.vendor, c2.vendor, Car.vendor) print(c1.valid, c2.valid, Car.valid) Car.valid = False print(c1.valid, c2.valid, Car.valid) c1.valid = True print(c1.valid, c2.valid, Car.valid) c1.capacity = 7 c2.color='RED' print(c1.capacity, c1.valid, c1.vendor) print(c2.color, c2.vendor, c2.valid) Car.max = 10000 print(c1.max, c2.max, Car.max) ``` ## demo54_class_instance_property ```python class Team: name = 'Normal Team' pass team1 = Team() print(team1.name) team1.name = 'Big Data Team' team2 = Team() print(team1.name, team2.name, Team.name, sep='||') del team1.name print(team1.name, team2.name, Team.name, sep='||') ``` ## demo55_whole_class ```python class Team: member = 7 # class variable def working_hour(self): # method return self.day def all_working_hour(self): # method self.day = 7 # field return self.day * self.member @classmethod def get_member(cls): return cls.member @staticmethod def calculate(x, y): return x ** y + 200 print(Team.calculate(3, 5)) print(Team.get_member()) t1 = Team() print(t1.all_working_hour()) print(t1.working_hour()) print(Team.get_member(), t1.get_member()) ``` ## demo56_field_practice.py ```python class Rectangle: def __init__(self, width, height): self.width = width self.height = height def calculate(self): return self.width * self.height r1 = Rectangle(3, 5) r2 = Rectangle(7, 9) print(r1.calculate(), r2.calculate()) ``` ## demo56_inherit ```python class Emp: gradeLevel = 6 def startWork(self): pass def endWork(self): pass pass class PM(Emp): def startWork(self): print('pm with grade=',self.gradeLevel,"start to work") pass class RD(Emp): pass pm1 = PM() pm1.startWork() rd1 = RD() rd1.startWork() print(Emp.gradeLevel, RD.gradeLevel, PM.gradeLevel) RD.gradeLevel = 7 print(Emp.gradeLevel, RD.gradeLevel, PM.gradeLevel) Emp.gradeLevel = 8 print(Emp.gradeLevel, RD.gradeLevel, PM.gradeLevel) del RD.gradeLevel print(Emp.gradeLevel, RD.gradeLevel, PM.gradeLevel) ``` ## demo57_instance ``` sum = 0 l1 = [10, "300", 35, None, 24, "---", 29, "Hello", 31, 5] for l in l1: if isinstance(l, int): sum += l print(f'sum={sum}') ``` ## demo58_series1 ```python import pandas as pd import numpy as np l1 = [3, 1, 4, 5, 9, -2, 8] a1 = np.array(l1) pd1 = pd.Series(l1) pd2 = pd.Series(a1) print(type(l1), type(a1), type(pd1), type(pd2)) print(pd1 == pd2) # check pd1 attributes print(type(pd1.values), pd1.values) print(type(pd1.index), pd1.index) pd3 = pd.Series([4, 7, -5, 3], index=['Nangang', 'Taipei', 'Banqiao', 'Taoyuan']) print(pd3) print(pd3.values) print(type(pd3.index), pd3.index) # access print('access 1 by 1==>', type(pd1[0]), pd1[0], pd1[2]) print('access multiple==>', type(pd1[[0, 2]]), pd1[[0, 2]],sep='\n') print(pd3['Taipei']) print(pd3[['Nangang','Banqiao']]) ``` ## demo59_series2 ```Python import pandas as pd d1 = {'poop': 35, 'bdpy': 35, 'andbiz2': 28, 'testit': 14} s1 = pd.Series(d1) print(d1) print(s1) print('c#' in d1, 'C#' in s1) print('poop' in d1, 'poop' in s1) l1 = ['andbiz2', 'testit', 'poop', 'bdpy'] s2 = pd.Series(d1, index=l1) print(s2) # add index more than dictionary l2 = ['arduino', 'andbiz2', 'testit', 'andbiz3', 'poop', 'bdpy', 'pykt'] s3 = pd.Series(d1, index=l2) print(s3) print('method1 use pd\n', pd.isna(s3)) print('method2 use pd\n', pd.isnull(s3)) print('method3 use series instance\n', s3.isna()) print('method4 use series instance\n', s3.isnull()) d2 = {'poop': "Mark", 'bdpy': None, 'andbiz2': None, 'testit': 'Frank'} s4 = pd.Series(d2, index=l2) print(s4) print(s4.isna()) print(s4.isnull()) ``` ## demo60_series3 ``` import pandas as pd s1 = pd.Series([1000, 800, 500, 300], index=['nangang', 'taipei', 'banqiao', 'taoyuan']) s2 = pd.Series([500, 300, 400], index=['hsinchu', 'taichung', 'tainan']) s3 = pd.Series([900, 1200, 1500, 1300], index=['nangang', 'taipei', 'banqiao', 'taoyuan']) s4 = pd.Series([1500, 350, 480], index=['hsinchu', 'taichung', 'tainan']) print(s1) print(s2) print(s1 + s2) print(s1 + s3) print(s2 + s4) # append l1 = [1, 2, 3] l1.append(4) print(l1) s5 = s1.append(s2) s5.index.name = 'station' s5.name = 'sold(k)' print(s1) print(s3) print(s5) # change index name (but keep values unchanged)!! s5.index = ['taipei', 'taichung', 'tainan', 'banqiao', 'nangang', 'hsinchu', 'taoyuan'] print(s5) # use index to generate a new Series with index order s6 = pd.Series(s1.append(s2), index=['taipei', 'taichung', 'tainan', 'banqiao', 'nangang', 'hsinchu', 'taoyuan']) print(s6) ``` ## demo61_series4 ```python import pandas as pd dict1 = {'course': ['poop', 'bdpy', 'pytk', 'aiocv'], 'year': [2018, 2017, 2019, 2020], 'slide': [200, 250, 230, 300]} dataFrame1 = pd.DataFrame(dict1) print(type(dict1), type(dataFrame1)) print(dataFrame1.head(n=2)) dataFrame2 = pd.DataFrame(dict1, columns=['course', 'slide', 'year', 'instructor']) print(dataFrame2) dataFrame3 = pd.DataFrame(dict1, index=['c1', 'c2', 'c3', 'c4'], columns=['course', 'slide', 'year', 'instructor']) print(dataFrame3) print(dataFrame3.columns) print(dataFrame3.index) # slice data (choose columns) print(type(dataFrame3), type(dataFrame3['course']), type(dataFrame3[['course', 'slide']])) # slice data (choose rows) print(type(dataFrame3.loc['c1']), dataFrame3.loc['c1'], sep='\n') print(type(dataFrame3.loc[['c1', 'c3']]), dataFrame3.loc[['c1', 'c3']], sep='\n') dataFrame3['year'] = 2020 print(dataFrame3) dataFrame3['year'] = [2020, 2020, 2020, 2021] print(dataFrame3) ``` ## demo62_series5 ```python import pandas as pd d1 = {'course': ['poop', 'bdpy', 'pykt', 'aiocv'], 'year': [2018, 2017, 2019, 2020], 'slide': [200, 250, 230, 300]} pd1 = pd.DataFrame(d1) print(pd1) print("----") s1 = pd.Series(['Taipei', 'Hsinchu', 'Taichung', 'Kaohsiung'], index=[0, 1, 2, 3]) pd1['location'] = s1 s2 = pd.Series(['remote', 'local'], index=[0, 3]) pd1['method'] = s2 pd1['heavy'] = pd1['slide'] >= 250 print(pd1) del pd1['slide'] print(pd1) ``` ## demo63_pandas1 ```python import pandas as pd dict1 = {'poop': {2019: 5, 2020: 8}, 'bdpy': {2018: 5, 2019: 7, 2020: 10}} df1 = pd.DataFrame(dict1) print(df1) df2 = df1.T print(df2) df3 = pd.DataFrame(dict1, index=[2018, 2019, 2020, 2021]) print(df3) print(type(df3.values)) print(df3.values) dict2 = {'poop': {2019: 5, 2020: 8}, 'bdpy': {2018: 5, 2019: 7, 2020: 'not yet'}} df4 = pd.DataFrame(dict2, index=[2018, 2019, 2020, 2021]) print(df4) print(type(df4.values), type(df4.values[0, 0]), type(df4.values[2, 1])) print(df4.values[0, 0], df4.values[2, 1]) print(df4.values) df4.iloc[0, 0] = -1 df4.iloc[2, 1] = 50 print(df4) ``` ## demo64_index1 ```python import pandas as pd import numpy as np object1 = pd.Series(range(5), index=['p', 'q', 'r', 's', 't']) print(object1) index1 = pd.Index(['C', 'D', 'E', 'F', 'G', 'H'], dtype='object') print(type(index1), index1) print(index1[:3], index1[4:]) i1 = pd.Index(np.arange(3)) i2 = list(np.arange(3)) print(i1, i2) data1 = ['Nangang', 'Taipei', 'Banqiao'] s1 = pd.Series(data1, index=i1) print(s1) s2 = pd.Series(data1, index=i2) print(s2) print(2 in i1, 2 in i2) index3 = pd.Index(['Taipei', 'Taipei', 'Taipei']) print(index3) s3 = pd.Series(data1, index=index3) print(s3) print("result=\n", s3['Taipei']) ``` ## demo65_index2 ```python import pandas as pd import numpy as np s1 = pd.Series([20, 15, 18, 37, 25], index=['mar', 'jan', 'feb', 'may', 'apr']) s2 = s1.reindex(['jan', 'feb', 'mar', 'apr', 'may', 'jun']) print(s2) s3 = pd.Series(["L", "M", "S"], index=[0, 5, 10]) print(s3) s4 = s3.reindex(range(15), method='ffill') print(s4) s5 = pd.DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], columns=['Kotlin', 'Swift', 'C++', 'Java']) print(s5) s6 = s5.reindex(columns=['objC', 'Kotlin', 'Swift', 'Java', 'C++']) print(s6) ``` ## demo66_drop ```python import pandas as pd dict1 = {'station': ['Nangang', 'Taipei', 'Banqiao', 'Taoyuan', 'Hsinchu'], 'order': [1, 2, 3, 4, 5], 'backOrder': [5, 4, 3, 2, 1]} df1 = pd.DataFrame(dict1) print(df1) print(df1.drop(2)) print(df1.drop([1, 3])) print(df1.drop('order', axis=1)) print(df1.drop(['order', 'backOrder'], axis=1)) print(df1.drop(['order', 'backOrder'], axis='columns')) df1.drop([1, 4], inplace=True) print(df1) ``` ## demo67_functional ```python import pandas as pd import numpy as np df1 = pd.DataFrame(np.random.randn(6, 7), index=list(range(0, 12, 2)), columns=list(range(0, 7, 1))) print(df1) func1 = lambda x: x.max() - x.min() result1 = df1.apply(func1) print(type(result1), result1) result2 = df1.apply(func1, axis=1) print(type(result2), result2) result3 = df1.apply(func1, axis='columns') print(type(result3), result3) func2 = lambda x: pd.Series([x.min(), x.max()], index=['min', 'max']) result4 = df1.apply(func2) print(type(result4), result4, sep='\n') result5 = df1.apply(func2, axis=1) print(type(result5), result5, sep="\n") print(df1) df1.sort_index() print(df1) print("after sort") df1.index = df1.iloc[:, 1] print(df1.sort_index()) df1.columns = df1.iloc[2, :] print("sort by axis=1") df2 = df1.sort_index(axis=1) print(df2) ``` ## demo68_function2 ```python import pandas as pd import numpy as np df3 = pd.DataFrame(np.random.randn(6, 7), index=list(range(0, 12, 2)), columns=list(range(0, 7, 1))) print(df3.sort_values(by=3)) s1 = pd.Series([4, -15, 7, 7, 2, 2, 0, 0, 4]) print(s1.rank()) print(s1.rank(method='first')) # same value, use order to assign rank print(s1.rank(method='min')) print(s1.rank(method='max')) ``` ## demo69_function3 ```python import pandas as pd import numpy as np df1 = pd.DataFrame(np.random.randn(6, 7), index=list(range(0, 12, 2)), columns=list(range(0, 7, 1))) print(df1) print(df1.sum()) print(df1.sum(axis='columns')) df1.iloc[2, 3] = np.NaN print(df1.sum()) print(df1.sum(skipna=False)) print(df1.sum(axis='columns',skipna=False)) print(df1.idxmax()) print(df1.idxmin()) print(df1.idxmax(axis='columns')) print(df1.idxmin(axis='columns')) print(df1.cumsum()) print(df1.cummax()) print(df1.describe()) ``` ## demo70_functional4 ```python import pandas as pd s1 = pd.Series([1, 1, 2, 3, 5, 7, 9]) print(s1.pct_change()) df1 = pd.DataFrame({'JP': [100, 200, 300, 400, 500], 'KR': [250, 200, 150, 100, 50]}) print(df1) print(df1['JP'].corr(df1['KR'])) print(df1['JP'].cov(df1['KR'])) print(df1.corr()) s2 = pd.Series(['S', 'S', 'S', 'M', 'L', 'S', 'M', 'L', 'S', 'M', 'L']) print(s2.value_counts()) print(pd.value_counts(s2)) df2 = pd.DataFrame({'JP': [1, 3, 4, 1, 4, 5], 'KR': [2, 3, 3, 2, 4, 2], 'TW': [5, 3, 1, 2, 4, 4]}) print(df2) result = df2.apply(pd.value_counts) print(result) result2 = df2.apply(pd.value_counts).fillna(0) print(result2) ``` ## cd C:\Python36\Python36\Scripts ``` import pandas as pd import numpy as np df1 = pd.DataFrame(np.random.randn(6,7),index=list(range(0,12,2)),columns=list(range(0,7,1))) df1 df1[:] df1[2:] df1[:2] df1.iloc[:,:] df1.iloc[:2,:] df1.iloc[2:,:] df1.iloc[:,:2] df1.iloc[:,2:] df1.iloc[1:3, 2:4] ``` ## worldbank * (worldbank)[https://data.worldbank.org/] ```python import pandas as pd data1 = pd.read_csv('data/data71.csv', skiprows=4) print(type(data1)) print(data1.columns) print(data1.shape) print(data1.head()) print(data1['Country Name'].values) data2 = data1[data1['Country Name']=='Caribbean small states'] print(data2.shape) ``` ## demo72_worldbank_demo ```python import pandas as pd from pandas_datareader import wb import numpy as np data = wb.download( indicator='SE.PRM.TENR', country=['all'], start=2002, end=2020 ) print(data.shape) print(data.head()) data2 = data.reset_index() print(data2.shape) data3 = data2[data2.country == 'Caribbean small states'] print(data3.shape) print(data3) print(np.unique(data2.country, return_counts=True)) print(np.unique(data2.year, return_counts=True)) maxData = data2.groupby(['country'])['SE.PRM.TENR'].transform(max) print(maxData.head()) index = maxData == data2['SE.PRM.TENR'] print(index) print(np.unique(index, return_counts=True)) data4 = data2[index] print(data4) data5 = data4.sort_values(by='SE.PRM.TENR', ascending=False) print(data5.head(n=20)) ``` # Day4 ## get world bank GDP Data * (GDP Data)[https://data.worldbank.org/indicator/NY.GDP.PCAP.KD.ZG] ### demo73_worldbank_lab2 ```python import pandas as pd import numpy as np df1 = pd.read_csv("data/demo73.csv", skiprows=4) print(df1.columns) print(df1['Country Name'].values) countries = ['Portugal', 'Italy', 'Ireland', 'Greece', 'Spain'] df2 = df1[df1['Country Name'].isin(countries)] print(df2.head()) years = [str(x) for x in list(range(1960, 2020))] print(years) df3 = pd.melt(df2, 'Country Name', var_name='year', value_name='GDPPerCapitalGrowth', value_vars=years) print(df3.head()) flattenGDPTable = pd.melt(df1, 'Country Name', var_name='year', value_name='GDPPerCapitalGrowth', value_vars=years) print(flattenGDPTable.columns) print(flattenGDPTable.shape) MaxFlattenGDPTable \ = flattenGDPTable.groupby(['Country Name'])['GDPPerCapitalGrowth'].transform(max) print(MaxFlattenGDPTable.head()) index = MaxFlattenGDPTable == flattenGDPTable['GDPPerCapitalGrowth'] print(np.unique(index, return_counts=True)) maxGDPTable = flattenGDPTable[index] print(maxGDPTable.head(n=20)) sortedMaxGDP = maxGDPTable.sort_values(by='GDPPerCapitalGrowth', ascending=False) print(sortedMaxGDP.head(n=20)) ``` ## demo74 * (net enrollment rate)[https://data.worldbank.org/indicator/SE.PRM.TENR] ### demo74_corr_from_enrollment ```python import pandas as pd data1 = pd.read_csv('data/demo74.csv', skiprows=4) print(data1.shape) print(data1.columns) years = [str(x) for x in list(range(1960, 2020))] flattenData1 = pd.melt(data1, 'Country Name', var_name='year', value_name='SE.PRM.TENR', value_vars=years) print(flattenData1.shape) print(flattenData1.head()) franceData = flattenData1[flattenData1['Country Name'] == 'France'] franceData.index = franceData['year'] print(franceData) germanData = flattenData1[flattenData1['Country Name'] == 'Germany'] germanData.index = germanData['year'] print(germanData) df1 = pd.DataFrame({'fr': franceData['SE.PRM.TENR'], 'de': germanData['SE.PRM.TENR']}) print(df1) print(df1['fr'].corr(df1['de'])) print(df1['fr'].cov(df1['de'])) print(df1.corr()) ``` ## demo75 * demo75_write_out ``` import sqlite3 import pandas as pd data1 = pd.read_csv('data/demo74.csv', skiprows=4) print(data1.shape) print(data1.columns) years = [str(x) for x in list(range(1960, 2020))] flattenData1 = pd.melt(data1, 'Country Name', var_name='year', value_name='SE.PRM.TENR', value_vars=years) flattenData1.to_csv('data/demo75.csv') flattenData1.to_excel('data/demo75.xlsx', sheet_name='net_enrollment') connection1 = sqlite3.connect('data/demo75.sqlite') flattenData1.to_sql(name='net_enrollment',con=connection1) connection1.close() ``` * pip show openpyxl * https://sqlitebrowser.org/ ## demo76 * demo76_cov19 * (github_cov19)[https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv] ```python import pandas as pd from pprint import pprint import pycountry import plotly.express as px PATH = 'data/demo76_cov19.csv' df1 = pd.read_csv(PATH) print(df1.head()) print('before change') print(df1.loc[df1['Country'] == 'Taiwan*']) # change Taiwan* to Taiwan df1.loc[df1['Country'] == 'Taiwan*', 'Country'] = "Taiwan" print(df1.loc[df1['Country'] == 'Taiwan']) list_countries = df1['Country'].unique().tolist() pprint(list_countries) d_country_code = {} for country in list_countries: try: country_data = pycountry.countries.search_fuzzy(country) country_code = country_data[0].alpha_3 d_country_code.update({country: country_code}) except: print('could not add ISO 3 code for ==>', country) d_country_code.update({country: ' '}) ``` ## demo77 ``` import numpy as np from matplotlib import pyplot as plt l1 = [1, 2, 3, 4, 5] l2 = [5, 4, 3, 2, 1] print(l1 + l2) a1 = np.array(l1) a2 = np.array(l2) print(a1 + a2) print(a1 - a2, a1 * a2, a1 / a2, a1 ** a2) print(6 * a1 + 4 * a2 ** 2) # 'o', '.','^','s' # plt.plot(l1, 's') # plt.plot(l1, l1, 's') # plt.plot(l1, l2, 's') plt.plot(a1, a2, 'rs-') plt.plot(a1, a2 ** 2 - 2 ** a2 + 5, 'go-') plt.xlabel('quantity') plt.ylabel('probability') plt.title("demo77 numpy to matplotlib") plt.grid() plt.show() ``` ## demo78 * demo78_plot2 ``` import numpy as np from matplotlib import pyplot as plt # a1 = np.array(range(10)) a1 = np.arange(0, 10, 0.25) a2 = a1 ** 2 plt.plot(a1, a2, 'o-') plt.show() b1 = np.arange(0, 2, 0.1) b2 = b1 ** 2 b3 = b1 ** 3 b4 = b1 ** 4 plt.plot(b1, b2, 'ro-') plt.plot(b1, b3, 'g.--') plt.plot(b1, b4, 'b^-') plt.axis([-2, 2, -2, 2]) plt.show() ``` ## keep python up2date ### under pyconsole(inside pycharm) * inside SC ```python import sys !{sys.executable} -m pip install --upgrade pip -i https://artifactory.global.standardchartered.com/artifactory/api/pypi/pypi/simple/ ``` * outside SC使用自己電腦外網 ```python import sys !{sys.executable} -m pip install --upgrade pip ``` ## demo79 ``` import numpy as np from matplotlib import pyplot as plt b1 = np.arange(0, 2, 0.1) plt.plot(b1, b1, 'r--', b1, b1 ** 2, 'g.-', b1, b1 ** 3, 'b*--', linewidth=3) plt.show() ``` ## demo80_subplot ``` import matplotlib.pyplot as plt import numpy as np fig = plt.figure() ax1 = fig.add_subplot(2, 2, 1) ax2 = fig.add_subplot(2, 2, 2) ax3 = fig.add_subplot(2, 2, 3) ax4 = fig.add_subplot(2, 2, 4) plt.plot(np.random.randn(20).cumsum(), 'r--') ax1.hist(np.random.randn(200), bins=20, color='c', alpha=1) ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30)) plt.show() fig, axes = plt.subplots(2, 2) print(type(axes), axes) axes[0, 0].hist(np.random.randn(50), bins=20, color='m', alpha=1) axes[1, 1].scatter(np.arange(30), np.arange(30) + 3 * np.log(10) * np.random.randn(30)) plt.show() ``` ## demo81_pandas_to_plot ```python import numpy as np import pandas as pd import matplotlib.pyplot as plt randomSequences = pd.DataFrame(np.random.normal(1.0, 0.07, (100, 8))) print(randomSequences.head()) accumulates = randomSequences.cumprod() accumulates.plot() #randomSequences.plot() plt.show() ``` ## demo82_animation ```python import matplotlib.animation as animation import matplotlib.pyplot as plt import numpy as np fig, ax = plt.subplots() x = np.arange(0, 2 * np.pi, 0.01) line, = ax.plot(x, np.sin(x)) def animate(i): line.set_ydata(np.sin(x + i / 5.0)) return line, def init(): line.set_ydata(np.ma.array(x, mask=True)) return line, anim = animation.FuncAnimation(fig, animate, np.arange(1, 400, 0.1), init_func=init, interval=50) plt.show() ``` ## demo83_pdf ``` import matplotlib.pyplot as plt import numpy as np from scipy.stats import norm mu = 80 sigma = 8 x = mu + sigma * np.random.randn(10000) print(len(x)) num_bins = 50 n, bins, patches = plt.hist(x, num_bins, density=1, facecolor='blue', alpha=0.5) y = norm.pdf(bins, mu, sigma) plt.plot(bins, y, "r*-") plt.show() ``` ## demo84_population * (population)[https://data.worldbank.org/indicator/SP.POP.TOTL] ``` import pandas import matplotlib.pyplot as plt from matplotlib import rc data = pandas.read_csv('data/demo84.csv', skiprows=4) print(data.head()) print(data.shape) data.to_excel('data/output84.xlsx', sheet_name='pandas_generated') print(data.describe()) data['created'] = data['1960'] + data['1980'] print(data.columns) ausData = data[data['Country Code'] == 'AUS'] print(ausData.shape) print(ausData['1980']) selected_years = ['1960', '1970', '1980'] print(plt.style.available) font1 = {'family': 'Source Code Pro'} rc('font', **font1) plt.style.use('fast') ausData.plot(kind='bar', y=selected_years, fontsize=12) plt.show() ``` * Practice * (Taipei realtime)[https://taipeicity.github.io/traffic_realtime/] * (json api)[https://tcgbusfs.blob.core.windows.net/blobyoubike/YouBikeTP.json] ## demo85_youbike ``` # 手動建一個目錄map import folium import json taipei = [25.034132, 121.564153] zoom = 15 map_osm = folium.Map(location=taipei, zoom_start=zoom) file1 = open('data/demo85.json', encoding='utf8') resultText = file1.read() file1.close() print(type(resultText), resultText[:50]) result = json.loads(resultText) print(type(result)) data = result["retVal"] # for k in data: # print(k) for v in data.values(): coord = [v['lat'], v['lng']] print(coord) folium.Marker(coord, icon=None, popup=None).add_to(map_osm) map_osm.save('map/demo85.html') ``` ## https://data.taipei/ * (勞基法)[https://data.taipei/#/dataset/detail?id=23630879-4926-4877-a48a-a0ae6cc2f7d5] ### lab86_read_labor_law ``` import pandas data1 = pandas.read_csv('data/demo86.csv') print(data1.head()) print(data1.columns) print(data1.info()) data1Grouped = data1[['處分字號', '違反勞動基準法條款']] \ .groupby(['違反勞動基準法條款']).count() print(data1Grouped.head(10)) result = data1Grouped.sort_values('處分字號', ascending=False) print(result.head(10)) data2Grouped = data1[['處分字號', '違反勞動基準法條款','違反法規內容']] \ .groupby(['違反勞動基準法條款','違反法規內容']).count() print(data2Grouped.head(10)) result2 = data2Grouped.sort_values('處分字號', ascending=False) print(result2.head(10)) ``` ### https://data.taipei/#/dataset/detail?id=a835f3ba-7f50-4b0d-91a6-9df128632d1c * demo87_recycle_bin ``` import folium import pandas as pd sample_data = pd.read_csv('data/shilin.csv', sep=',') print(sample_data.shape) print(sample_data.columns) sample_data.columns = ['section', 'road', 'road_detail', 'lon', 'lat', 'extra'] print(sample_data.columns) taipei = [25.095911, 121.518301] zoom = 15 map_osm = folium.Map(location=taipei, zoom_start=zoom) for i in range(len(sample_data)): coord = [sample_data.iloc[i, 4], sample_data.iloc[i, 3]] message = f"({i})[{sample_data.iloc[i,1]}]{sample_data.iloc[i,2]}" icon1 = folium.Icon(color='red', icon='info-sign') folium.Marker(coord, icon=icon1, popup=message).add_to(map_osm) spot1 = [24.773071, 121.009916] folium.CircleMarker(spot1, radius=500, popup='Science Park', fill_color='#C0FFEE').add_to(map_osm) spot2 = [24.777657, 121.043790] folium.Circle(spot2, radius=500, popup='ITRI', fill_color='#FFC0EE').add_to(map_osm) map_osm.save('map/demo87.html') ``` # spark installation ## download 2.4.6 ## download winutil * (origin)[https://github.com/steveloughran/winutils] * (newer)[https://github.com/cdarlint/winutils] ## unzip spark2.4.6 ## copy winutil hadoop 2.7 bin ==> spark \bin ## https://confluence.jetbrains.com/display/JBR/JetBrains+Runtime ``` scala> val textFile = sc.textFile("aa.txt") textFile: org.apache.spark.rdd.RDD[String] = aa.txt MapPartitionsRDD[1] at textFile at <console>:24 scala> textFile.count() res0: Long = 5 ``` ## demo87 ```python import sys import pyspark from pyspark import SparkContext from pprint import pprint pprint(sys.path) print(pyspark.__version__) sc = SparkContext("local","simple app") print(sc) ``` ## demo88_open_sqlite ``` import sqlite3 connection1 = sqlite3.connect('data/demo88.sqlite') DROP_DDL = "DROP TABLE IF EXISTS EMPLOYEE" CREATE_DDL = ''' CREATE TABLE EMPLOYEE (ID INTEGER PRIMARY KEY, NAME TEXT NOT NULL, AGE INT NOT NULL, DEPT INT, ADDRESS CHAR(50)); ''' connection1.execute(DROP_DDL) connection1.execute(CREATE_DDL) connection1.close() ``` ## demo89.py ``` import sqlite3 import time connection1 = sqlite3.connect('data/demo88.sqlite') emp1 = {'NAME': "Mark", 'AGE': 43, 'DEPT': 1, "ADDR": 'Taipei'} emp2 = {'NAME': "John", 'AGE': 42, 'DEPT': 2, "ADDR": 'Hsinchu'} emp3 = {'NAME': "Ken", 'AGE': 44, 'DEPT': 1, "ADDR": 'Taipei'} emp4 = {'NAME': "Tim", 'AGE': 45, 'DEPT': 2, "ADDR": 'Kaohsiung'} employees = [emp1, emp2, emp3, emp4] INSERT_DML = '''INSERT INTO EMPLOYEE(NAME, AGE, DEPT, ADDRESS) VALUES(?,?,?,?)''' start_time = time.time() for i in range(1000): for e in employees: connection1.execute(INSERT_DML, (e['NAME'], e['AGE'], e['DEPT'], e['ADDR'])) connection1.commit() end_time = time.time() connection1.close() print(f"total spend {end_time-start_time} seconds") ``` #### data prepare ```python import tensorflow as tf from keras import datasets print(tf.__version__) datasets.mnist.load_data() datasets.boston_housing.load_data() datasets.imdb.load_data() datasets.reuters.load_data() datasets.imdb.get_word_index() datasets.reuters.get_word_index() ``` # new 4 days ## [MLDL](https://hackmd.io/@O0oTwRN6TESqcS_-MQZDVQ/BJPVjRS7w)