# SC_0813
## `demo1_env_check_warm_up.py`
```python
import os, sys
import numpy
import pandas
import matplotlib
# ctrl+shift+F10
print(os.getcwd())
print(sys.executable)
print(numpy.__version__)
print(pandas.__version__)
print(matplotlib.__version__)
# ctrl+alt+L
# alt+Enter
demoObj = {'course': "Data Processing", 'duration': 28, 'instructor': "MarkHo"}
```
## `demo2_arithmatic.py`
```python
import numpy as np
print("hello" + ",world")
l1 = [1, 2, 3]
l2 = [4, 5, 6]
print(l1 + l2)
a1 = np.array(l1)
a2 = np.array(l2)
print(a1 + a2, a1 - a2, a1 * a2, a1 / a2)
print(a2 / a1, a2 // a1, a2 % a1)
print(a1 ** a2, a2 ** a1)
```
## `demo3_complex.py`
```python
c1 = 2 + 3j
c2 = 5
c3 = 4j
c4 = -5
print(type(c1), type(c2), type(c3))
print(c1.conjugate())
print(c1*5, c1*5j)
print(abs(c1), abs(c2), abs(c3), abs(c4), (2**2+3**2)**0.5)
print(c1.real, c1.imag)
```
## `demo4_fraction.py`
```Python
from fractions import Fraction
print(Fraction(250, 72))
print(Fraction(5, 2) + Fraction(19, 6) + Fraction(27, 3))
frac1 = Fraction(250, 70)
print(frac1.denominator, frac1.numerator)
print(f"除數是{frac1.denominator}, 被除數是{frac1.numerator}")
```
## `demo5_decimal.py`
```python
from decimal import Decimal as Dec
print(Dec(2.968))
print(Dec('2.968'))
print(Dec(0.001)*Dec(2968)-Dec(2.968))
print(Dec('0.001')*Dec(2968)-Dec('2.968'))
```
## `demo6_object_equality.py`
```python
class Person:
def __init__(self, age):
self.age = age
age = 38
print(f"age id = {hex(id(age))}")
age = 39
print(f"age id = {hex(id(age))}")
v1 = 39
print(f"v1 id={hex(id(v1))}")
print(v1 == age, v1 is age)
person1 = Person(38)
print(f"person1 id={hex(id(person1))}")
person2 = person1
print(f"person2 id={hex(id(person2))}")
person3 = Person(38)
print(f"person3 id={hex(id(person3))}")
person1.age = 39
person3.age = 39
print(f"person1 id={hex(id(person1))}")
print(f"person2 id={hex(id(person2))}")
print(f"person3 id={hex(id(person3))}")
```
## `demo7_equal_identical.py`
```python
l1 = ['apple', 'banana']
l2 = l1
l3 = ['apple', 'banana']
print(l1 is l2, l1 == l2)
print(l1 is l3, l1 == l3)
```
## `demo8_fp1.py`
```python
from pprint import pprint
courses = [{'name': 'poop', 'field': 'python', 'attendee': 10, 'remote': False},
{'name': 'bdpy', 'field': 'python', 'attendee': 15, 'remote': True},
{'name': 'andbiz3', 'field': 'android', 'attendee': 5, 'remote': False}]
pprint(courses)
courses = [{'name': 'poop', 'field': 'python', 'attendee': 10, 'remote': False},
{'name': 'bdpy', 'field': 'python', 'attendee': 15, 'remote': True},
{'name': 'andbiz3', 'field': 'android', 'attendeen': 5, 'remote': False}]
for course in courses:
print(course['attendee'])
```
## `demo9_fp2.py`
```python
import collections
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
poop = course(name='poop', field='python', attendee=10, remote=False)
print(poop)
print(poop.name, poop.field, poop.attendee)
poop.name='aiocv'
```
## `demo10_fp3.py`
```python
import collections
from pprint import pprint
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
courses = [poop, bdpy, andbiz]
pprint(courses)
del courses[0]
pprint(courses)
```
## `demo11_fp4.py`
```python=
import collections
from pprint import pprint
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv)
# using filter
result1 = filter(lambda x: x.remote is True, courses)
print(type(result1))
pprint([x for x in result1])
result2 = filter(lambda x: x.attendee >= 10, courses)
print(next(result2))
print(next(result2))
result3 = filter(lambda x: x.remote is True, courses)
result4 = filter(lambda x: x.attendee > 10, result3)
pprint([x for x in result4])
```
## `demo12_fp5.py`
```Python
import collections
from pprint import pprint
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv)
filter1 = lambda x: x.remote is True
result1 = tuple(filter(filter1, courses))
print(type(result1))
pprint(result1)
pprint(tuple(filter(filter1, courses)))
```
## `demo13_fp6.py`
```Python
import collections
from pprint import pprint
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv)
available1 = lambda c: c.attendee >= 8
def available(course):
return course.attendee >= 8
def isRemote(course):
return course.remote is True
validRemoteCourses = tuple(filter(isRemote, filter(available, courses)))
pprint(validRemoteCourses)
```
## `demo14_fp7.py`
```python
import collections
from pprint import pprint
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv)
pprint([c for c in courses if c.field == 'python'])
pprint(tuple([c for c in courses if c.field == 'python']))
pprint(tuple(c for c in courses if c.field == 'python'))
```
## `demo15_fp8.py`
```python
import collections
from pprint import pprint
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv)
name_and_field = map(lambda x: {'name': x.name, 'field': x.field}, courses)
pprint(tuple(name_and_field))
```
## demo16_fp9.py
```python=
import collections
from pprint import pprint
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv)
pprint([{'name': c.name, 'income': c.attendee * 8000} for c in courses])
pprint(tuple({'name': c.name, 'income': c.attendee * 8000} for c in courses))
```
## demo17_fp10.py
```python=
import collections
from pprint import pprint
from functools import reduce
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv)
income_tuple = tuple({'name': c.name, 'income': c.attendee * 8000} for c in courses)
total_income = reduce(lambda acc, val: acc+val['income'], income_tuple, 0)
pprint(total_income)
pprint(sum(x['income'] for x in income_tuple))
```
## demo18_fp11.py
```python=
import collections
from pprint import pprint
from functools import reduce
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv)
def reducer(acc, val):
acc[val.field].append(val.name)
return acc
courses_by_category = reduce(reducer, courses, {'python': [], 'android': []})
pprint(courses_by_category)
```
## demo19_fp12
```python
import collections
from functools import reduce
from pprint import pprint
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
cplus = course(name='cplus', field='c++', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv,cplus)
def reducer(acc, val):
acc.setdefault(val.field, [])
acc[val.field].append(val.name)
return acc
courses_by_category = reduce(reducer, courses, {})
pprint(courses_by_category)
```
## demo20_fp13
```python=
import collections
from functools import reduce
from pprint import pprint
from collections import defaultdict
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv)
def reducer(acc, val):
acc[val.field].append(val.name)
return acc
courses_by_category = reduce(reducer, courses, defaultdict(list))
pprint(courses_by_category)
pprint(dict(courses_by_category))
```
## demo21_fp14
```python=
import collections
from pprint import pprint
import itertools
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv)
sorted_courses = sorted(courses, key=lambda x: x.field)
pprint(sorted_courses)
courses_by_remote = {c[0]: list(c[1])
for c in itertools.groupby(sorted_courses, lambda x: x.field)}
pprint(courses_by_remote)
courses_by_remote2 = {c[0]: list([c.name for c in c[1]])
for c in itertools.groupby(sorted_courses, lambda x: x.field)}
pprint(courses_by_remote2)
courses_by_remote3 = [(c[0], list(c.name for c in c[1]))
for c in itertools.groupby(sorted_courses, lambda x: x.field)]
pprint(courses_by_remote3)
```
## demo22_fp15
```python=
from functools import reduce
print({'a': 0, 'b': 1, 'a': 0 + 1, 'b': 1 + 2})
import collections
from pprint import pprint
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv)
reducer = lambda acc, val: {**acc, **{val.field: acc[val.field] + [val.name]}}
courses_by_category = reduce(reducer, courses, {'python': [], 'android': []})
pprint(courses_by_category)
```
## demo23_fp16.py
```python=
import collections
from pprint import pprint
import time
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv)
def transform(x):
print(f"process record:{x.name}")
time.sleep(3)
result = {'name': x.name, 'revenue': x.attendee * 5000}
print(f"done process record:{x.name}")
return result
start = time.time()
result = tuple(map(transform, courses))
end = time.time()
print(f"total time:{end-start:.2f} seconds")
pprint(result)
```
## demo24_fp17
```python=
import collections
from pprint import pprint
import time
import multiprocessing
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
import os
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv)
def transform(x):
print(f"process:{os.getpid()} record:{x.name}")
time.sleep(3)
result = {'name': x.name, 'revenue': x.attendee * 5000}
print(f"done process:{os.getpid()} record:{x.name}")
return result
if __name__ == '__main__':
start = time.time()
pool = multiprocessing.Pool(processes=4, maxtasksperchild=1)
print(f"now we run with process:{pool._processes}")
result = pool.map(transform, courses)
end = time.time()
print(f"total time:{end-start:.2f} seconds")
pprint(result)
```
## demo25_fp18
```python=
import collections
from pprint import pprint
import time
import concurrent.futures
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
import os
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv)
def transform(x):
print(f"process:{os.getpid()} record:{x.name}")
time.sleep(3)
result = {'name': x.name, 'revenue': x.attendee * 5000}
print(f"done process:{os.getpid()} record:{x.name}")
return result
if __name__ == '__main__':
start = time.time()
with concurrent.futures.ProcessPoolExecutor() as executor:
result = executor.map(transform, courses)
end = time.time()
print(f"total time:{end-start:.2f} seconds")
pprint(result)
```
# demo25_fp18(change to thread)
```python=
import collections
from pprint import pprint
import time
import concurrent.futures
course = collections.namedtuple('course', ['name', 'field', 'attendee', 'remote'])
print(type(course))
print(course)
import os
poop = course(name='poop', field='python', attendee=10, remote=False)
bdpy = course(name='bdpy', field='python', attendee=15, remote=True)
pykt = course(name='python', field='python', attendee=9, remote=False)
andbiz = course(name='andbiz', field='android', attendee=18, remote=False)
aiocv = course(name='aiocv', field='python', attendee=10, remote=True)
courses = (poop, bdpy, andbiz, pykt, aiocv)
def transform(x):
print(f"process:{os.getpid()} record:{x.name}")
time.sleep(3)
result = {'name': x.name, 'revenue': x.attendee * 5000}
print(f"done process:{os.getpid()} record:{x.name}")
return result
if __name__ == '__main__':
start = time.time()
with concurrent.futures.ThreadPoolExecutor() as executor:
#with concurrent.futures.ProcessPoolExecutor() as executor:
result = executor.map(transform, courses)
end = time.time()
print(f"total time:{end-start:.2f} seconds")
pprint(result)
```
## demo26_library
```python=
def foo(a, b):
return "[demo26][foo]result=" + str(a + b)
def bar(a, b):
return "[demo26][bar]result=" + str(a * b)
print("inside demo26, foo(1,2)=", foo(1, 2))
print("inside demo26, bar(3,4)=", bar(3, 4))
```
## demo27_use_library
```python=
import demo26_library
print(demo26_library.foo(1, 2))
print(demo26_library.bar(3, 4))
import demo26_library as library
print(library.foo(5, 6))
print(library.bar(7, 8))
from demo26_library import foo, bar
print(foo(9, 10))
print(bar(11, 12))
from demo26_library import foo as f1, bar as f2
print(f1(13, 14))
print(f2(15, 16))
```
## demo28_variable_function_call.py
```python=
def sample_call(fix1, fix2, *args):
print(fix1)
print(fix2)
print(list(args))
sample_call("hello", "world")
sample_call("hihi", "welcome", "ABC123")
sample_call("multiple", "input", 5, 300, None, 5 + 4j)
l1 = ['Sunday', 'Monday', 'Tuesday']
sample_call("pass a", "list", l1)
sample_call("pass a", "list", *l1)
```
## demo29_key_value_function_call.py
```python=
def sample_key_value_arguments(fix1, fix2, **kwargs):
print(f"fix1={fix1}, fix2={fix2}")
for key, value in kwargs.items():
print(f"parameter name={key}, value={value}")
sample_key_value_arguments(500, "hi", name="mark", location="Taipei")
sample_key_value_arguments("hello", 'world')
sample_key_value_arguments("hihi", "welcome",
course="BDPY",
duration=35,
period=['Sun', 'Mon', 'Tue'])
course = {'name': "poop", "level": "entry", "duration": 35}
sample_key_value_arguments("unpack"," a dict", **course)
```
## demo30_fix_argument1.py
```python=
def lab30(fix1, fix2, fix3):
print("---")
print(f"arg1={fix1}, arg2={fix2}, arg3={fix3}")
print("---")
args = ("two", 3.0)
lab30(1, *args)
lab30(*args, 5)
args2 = ['two', 3.0]
lab30('OK', *args2)
args3 = {'Hi', "OK", 'Welcome'}
lab30(*args3)
args4 = {'course':"PYKT",'level':'advanced',"duration":35}
lab30(*args4)
```
## dmeo31_fix_argument2
```python=
def dumpEmployee(id, name, department):
print(f"id={id}")
print(f"name={name}")
print(f"department={department}")
dumpEmployee(5, "mark", "R&D")
dumpEmployee(id=5, name="Mark Ho", department="Research&Development")
dumpEmployee(5, name="Mark Lin", department="FAE")
emp1 = {'id': 1, 'name': "Kelvin", 'department': 'Sales'}
dumpEmployee(**emp1)
emp2 = (2, "James", "legal")
dumpEmployee(*emp2)
```
## demo32_str_repr.py
```python=
from datetime import datetime
now = datetime.now()
print("repr", repr(now))
print("str", str(now))
print(now)
print('inside list:', [now])
print('inside tuple', (now,))
print('not inside tuple', (now))
print('inside dict',{'k1':now})
print('inside dict, but keep str',{'k1':str(now)})
```
## demo32_system_command.py
```python=
import os
import sys
print(os.getcwd())
print(sys.executable)
print(sys.argv)
PATH1 = "logs"
os.mkdir(PATH1)
os.chdir(PATH1)
print(os.getcwd())
os.chdir("..")
os.rmdir(PATH1)
```
## demo33_math_random.py
```python=
import math
import random
print(math.pi, math.log10(10), math.log10(5))
print(math.sqrt(5))
for x in range(50):
print(random.randint(10, 20))
stores = ['7-11', 'fami', 'Ok', "Hi-Life"]
for x in range(20):
print(random.choice(stores))
cards = ['A', 'K', 'Q', 'J', 10]
for x in range(10):
random.shuffle(cards)
print(cards)
```
## demo34_itertools.py
```python=
import itertools
r1 = itertools.chain('abc', "123", 'PQRST')
t1 = tuple(r1)
print(t1)
print(t1)
TEAMS = ['A', 'B', 'C', 'D', 'E']
r2 = itertools.permutations(TEAMS, 2)
t2 = tuple(r2)
print(len(t2), t2)
r3 = itertools.combinations(TEAMS, 3)
t3 = tuple(r3)
print(len(t3), t3)
```
## `demo35_shutil.py`
```python=
import shutil
import os
ORIGINAL = "data1"
DUPLICATE = "data2"
shutil.copytree(ORIGINAL, DUPLICATE)
os.chdir(DUPLICATE)
print("now inside:", os.getcwd())
os.chdir("..")
shutil.rmtree(DUPLICATE)
```
## `demo36_json.py`
```python=
import json
v1 = ["Sunday", "monday", 100, 3.14, None, '中文', {"name": "Mark"}]
print(type(v1))
r1 = json.dumps(v1)
print(type(r1))
print(r1)
v2 = {"course": "BDPY", "instructor": "Mark",
"period": ['Thursday', 'Friday']}
r2 = json.dumps(v2)
print(type(r2))
print(r2)
```
## `demo37_bugzilla.py`
* (https://bugzilla.mozilla.org/rest/bug/35)[https://bugzilla.mozilla.org/rest/bug/35]
* (json viewer)[https://chrome.google.com/webstore/detail/json-viewer/gbmdgpbipfallnflgajpaliibnhdgobh?utm_source=chrome-ntp-icon]
```python=
import requests
proxies = {
"http": "10.239.9.190:443",
"https": "10.239.9.190:443",
}
URL = 'https://bugzilla.mozilla.org/rest/bug/35'
#response = requests.get(URL, proxies=proxies, verify=False)
response = requests.get(URL, proxies=None, verify=False)
print(response.status_code)
print(type(response.json()))
result = response.json()
for k,v in result.items():
print(f"key={k}, value={v}")
bugs = result["bugs"]
firstBug = bugs[0]
firstBugCreator = firstBug["creator"]
print(firstBugCreator)
```
## demo38_ucom_http.py
```python=
import requests
from bs4 import BeautifulSoup
proxies = {
"http": "10.239.9.190:443",
"https": "10.239.9.190:443",
}
URL = 'https://www.uuu.com.tw/'
# r = requests.get(URL, proxies=proxies, verify=False)
r = requests.get(URL, proxies=None, verify=False)
soup = BeautifulSoup(r.content, "html.parser")
print(type(soup))
print(soup.title.name)
print(soup.title.string)
hot_courses = soup.find('div', {'id': 'course_list'})
print(type(hot_courses))
print(hot_courses)
items = hot_courses.find_all('a')
for item in items:
print(item)
```
## demo39_urllib_get_image
* (cwb image)[https://www.cwb.gov.tw/Data/satellite/LCC_IR1_CR_2750/LCC_IR1_CR_2750-2020-08-14-10-30.jpg]
```python=
from urllib.request import urlopen
from urllib import request
from PIL import Image
URL = 'https://www.cwb.gov.tw/Data/satellite/LCC_IR1_CR_2750/LCC_IR1_CR_2750-2020-08-14-10-30.jpg'
# for SC proxy settings
# proxy = request.ProxyHandler({'https': '10.239.9.190:443'})
# opener = request.build_opener(proxy)
# request.install_opener(opener)
# end of SC proxy setting
fileToSave = urlopen(URL)
# manual make a directory images
image = Image.open(fileToSave)
image.save('images/demo39.jpg')
# get half size
halfSize = (image.size[0] // 2, image.size[1] // 2)
halfImage = image.resize(halfSize, Image.ANTIALIAS)
halfImage.save('images/demo39_small.jpg')
rot1 = halfImage.transpose(Image.ROTATE_90)
rot1.save('images/r90.jpg')
rot2 = halfImage.transpose(Image.ROTATE_180)
rot2.save('images/r180.jpg')
rot3 = halfImage.transpose(Image.ROTATE_270)
rot3.save('images/r270.jpg')
rot4 = halfImage.rotate(60)
rot4.save('images/r60.jpg')
```
## README.md
```markdown
# Python and Data Wrangling 4 days
## `requests` under proxy usage
```python
proxies = {
"http": "10.239.9.190:443",
"https": "10.239.9.190:443",
}
URL = 'https://www.uuu.com.tw/'
# r = requests.get(URL, proxies=proxies, verify=False)
r = requests.get(URL, proxies=None, verify=False)
```
## `urlopen`under proxy usage
```python
# for SC proxy settings
# proxy = request.ProxyHandler({'https': '10.239.9.190:443'})
# opener = request.build_opener(proxy)
# request.install_opener(opener)
# end of SC proxy setting
```
```
```
## demo40_file_io
```txt (Python_Introduction)
Python(英國發音:/ˈpaɪθən/ 美國發音:/ˈpaɪθɑːn/)是一種廣泛使用的直譯式、進階程式、通用型程式語言,由吉多·范羅蘇姆創造,第一版釋出於1991年。Python是ABC語言的後繼者,也可以視之為一種使用傳統中綴表達式的LISP方言[4]。Python的設計哲學強調代碼的可讀性和簡潔的語法(尤其是使用空格縮排劃分代碼塊,而非使用大括號或者關鍵詞)。相比於C++或Java,Python讓開發者能夠用更少的代碼表達想法。不管是小型還是大型程式,該語言都試圖讓程式的結構清晰明瞭。
Python(パイソン)インタープリタ型の高水準汎用プログラミング言語である。グイド・ヴァン・ロッサムにより創り出され、1991年に最初にリリースされたPythonの設計哲学は、その顕著なホワイトスペースの使用によってコードの可読性が重視されている。その言語構成とオブジェクト指向のアプローチは、プログラマが小規模なプロジェクトから大規模なプロジェクトまで、明確で論理的なコードを書くのを支援することを目的としている。
Python is an interpreted, high-level, general-purpose programming language. Created by Guido van Rossum and first released in 1991, Python's design philosophy emphasizes code readability with its notable use of significant whitespace. Its language constructs and object-oriented approach aim to help programmers write clear, logical code for small and large-scale projects.[28]
```
```python
# mkdir data
# make a file Python_Introduction.txt
file1 = open('data/Python_Introduction.txt',
encoding='utf8')
readme_txt = file1.read()
print(type(readme_txt))
print(readme_txt)
file1.close()
with open('data/Python_Introduction.txt',
encoding='utf8') as file2:
readme_txt2 = file2.read()
print(type(readme_txt2))
print(readme_txt2)
file3 = open('data/clone1','w', encoding='utf8')
file3.write(readme_txt)
file3.close()
with open('data/clone2','w', encoding='utf-8') as file4:
file4.write(readme_txt2)
```
## demo41_csv_read.py
```python
import csv
sampleFile = open('data/demo41.csv', encoding='utf8')
sampleReader = csv.reader(sampleFile)
sampleData = list(sampleReader)
sampleFile.close()
print(type(sampleData))
print(sampleData)
```
## demo42_tk1.py
```python
import tkinter
from tkinter import font
import tkinter.font
counter = 10
counter2 = [10]
def callback1():
global counter
# counter = 0
# print('button clicked')
label1.config(text=f'button clicked {counter} times')
counter += 1
def callback2():
label2.config(text=f"button2 clicked {counter2[0]} times")
counter2[0] += 1
def callback3():
label1.config(text=f'button3 clicked {counter3.get()} times')
counter3.set(counter3.get() + 1)
top = tkinter.Tk()
counter3 = tkinter.IntVar()
counter3.set(10)
# query available font
for f in font.families():
print(f)
myFont1 = font.Font(family="Tahoma", size=24)
label1 = tkinter.Label(top, text='Hello Tk@Python',
font=myFont1, fg='#C0FFEE',
bg='#000000', padx=20, pady=10)
label2 = tkinter.Label(top, text="mark@patristar.com",
font=myFont1, fg='#000000',
bg='#FFC0EE', padx=10, pady=20)
button1 = tkinter.Button(top, text='click me1',
font=myFont1, fg='#C0FFEE',
bg='#440000', command=callback1)
button2 = tkinter.Button(top, text='click me2',
font=myFont1, fg='#FFEEC0',
bg='#004400', command=callback2)
button3 = tkinter.Button(top, text='clicked me3',
font=myFont1, fg='#EEFFC0',
bg='#000044', command=callback3)
label2.pack()
label1.pack()
button1.pack()
button2.pack()
button3.pack()
top.mainloop()
```
## demo43_tk2.py
```python
import tkinter
from tkinter import font
import tkinter.font
def function1(ev):
label1.config(text=f'left single clicked:{ev}', bg='#880000')
def function2(ev):
label1.config(text=f'right double clicked{ev}', bg='#008800')
def function3(ev):
label1.config(text=f"middle click drag:{ev}", bg='#000088')
top = tkinter.Tk()
myFont1 = font.Font(family="Tahoma", size=48)
label1 = tkinter.Label(top, text="status", font=myFont1, fg='#C0FFEE', bg='#000000', padx=30, pady=20)
button1 = tkinter.Button(top, text='click', font=myFont1, fg='#000000', bg='#FFC0EE', padx=20, pady=30)
label1.pack()
button1.pack()
button1.bind('<Button-1>', function1)
button1.bind('<Double-3>', function2)
button1.bind('<B2-Motion>', function3)
top.mainloop()
```
## demo44_tk3.py
```python
import tkinter
from tkinter import font
import tkinter.font
def func1(ev):
message1.config(text=f'move to [{ev.x},{ev.y}]')
top = tkinter.Tk()
myFont1 = font.Font(family="Tahoma", size=48)
message1 = tkinter.Message(top, text="detect mose move", font=myFont1)
label1 = tkinter.Label(top, text="detect area", bg='#C0FFEE', font=myFont1,
padx=30, pady=30)
message1.pack()
label1.pack()
label1.bind('<Motion>', func1)
top.mainloop()
```
## demo45_tk4.py
```python
import tkinter
from tkinter import font
import tkinter.font
def func1():
label1.config(text="you will buy pixel5")
def func2():
label1.config(text="you will buy iphon12")
def func3():
if var1.get() is 1:
label1.config(text="you will buy pixel5")
elif var1.get() is 2:
label1.config(text="you will buy iphon12")
top = tkinter.Tk()
myFont1 = font.Font(family="Tahoma", size=48)
var1 = tkinter.IntVar()
var1.set(2)
label1 = tkinter.Label(top, font=myFont1, text="you will choose from below:")
button1 = tkinter.Radiobutton(top, font=myFont1, text='Google', value=1,
variable=var1, command=func3)
button2 = tkinter.Radiobutton(top, font=myFont1, text='Apple', value=2,
variable=var1, command=func3)
label1.pack()
button1.pack()
button2.pack()
top.mainloop()
```
## demo46_tk5.py
```python
import tkinter
from tkinter import font
import tkinter.font
def func(scale):
label1.config(text=formattedString % int(scale))
top = tkinter.Tk()
formattedString = 'value=%d'
value = tkinter.IntVar()
value.set(0)
myFont1 = font.Font(family="Tahoma", size=36)
label1 = tkinter.Label(top, text=formattedString % value.get(), font = myFont1)
scale = tkinter.Scale(top, label='Scale', font=myFont1, orient='h',from_=0, to=100,
showvalue=True, variable=value, command=func)
label1.pack()
scale.pack()
top.minsize(300,300)
top.maxsize(300,300)
top.mainloop()
```
## demo47_tk6
```python
import tkinter
from tkinter import font
import tkinter.font
def display(ev):
label1.config(text=entry1.get())
print(ev)
top = tkinter.Tk()
myFont1 = font.Font(family="Tahoma", size=36)
label1 = tkinter.Label(top, text="Input Text", font=myFont1)
entry1 = tkinter.Entry(top, font=myFont1)
button1 = tkinter.Button(top, text='submit', font=myFont1)
label1.pack()
entry1.pack()
button1.pack()
entry1.bind('<Return>', display)
button1.bind('<Button-1>', display)
top.mainloop()
```
## demo48
```python
import graphviz as gv
print(gv.__version__)
g1 = gv.Digraph(format='svg')
g1.node('A')
g1.node('B')
g1.edge('A', 'A')
g1.edge('A', 'A')
g1.edge('A', 'A')
g1.edge('A', 'B')
g1.edge('B', 'B')
g1.edge('B', 'B')
g1.render('graph/demo48')
```
## demo49_gv2
* download this, and unzip
* (https://www2.graphviz.org/Packages/stable/windows/10/msbuild/Release/Win32/graphviz-2.38-win32.zip)[https://www2.graphviz.org/Packages/stable/windows/10/msbuild/Release/Win32/graphviz-2.38-win32.zip]
```python
import functools
import graphviz as gv
from itertools import combinations
graph = functools.partial(gv.Graph, format='svg')
digraph = functools.partial(gv.Digraph, format='svg')
g3 = graph()
g4 = digraph()
# 'A','Apple','Google'
def add_nodes(graph, nodes):
for n in nodes:
if isinstance(n, tuple):
graph.node(n[0], **n[1])
else:
graph.node(n)
return graph
def add_edges(graph, edges):
for e in edges:
if isinstance(e[0], tuple):
graph.edge(*e[0], **e[1])
else:
graph.edge(*e)
return graph
teams = ['apple', 'google', 'facebook']
races = tuple(combinations(teams, 2))
g3 = add_edges(add_nodes(g3, teams), races)
print(g3.source)
g3.render('images/demo50_g3')
nodes = [('A', {'label': 'Apple'}),
('B', {'label': 'Google'}),
('C', {'label': '臉書'}),
('D', {})]
edges = [(('A', 'B'), {'label': 'phone maker compete'}),
(('A', 'C'), {'label': 'game platform'}),
(('B', 'C'), {'label': 'AI develop'}),
(('B', 'D'), {})]
g4 = add_nodes(g4, nodes)
g4 = add_edges(g4, edges)
g4.render('graph/demo50_g4')
styles = {
'graph': {
'label': 'GAFA',
'fontsize': '36',
'fontcolor': '#C0FFEE',
'bgcolor': '#99CCFF',
'rankdir': 'BT',
'fillcolor': '#FFC0EE'
}
}
def apply_styles(graph, styles):
graph.graph_attr.update(('graph' in styles and styles['graph']) or {})
return graph
g4 = apply_styles(g4, styles)
g4.render('graph/demo50_g4_complete')
```
## demo51_and_or_explain.py
```python
x1 = True
x2 = False
x3 = False
print(x1 and x2, x1 and x1, x2 and x2)
x2s = [3.14, 500, 'hello world', 3 + 4j, None, True, False]
for x2 in x2s:
print(x1 and x2)
print("@@@@@@@@@")
for x2 in x2s:
print(x3 or x2)
```
## demo52_class_object_creation
```python
# ctrl+H
class MyClass(object):
pass
inst1 = MyClass()
print(f"MyClass type:{type(MyClass)}, generate instance type:{type(inst1)}")
print(f"inst1 class = {inst1.__class__}, class name={inst1.__class__.__name__}")
print(f"inst1 bases = {inst1.__class__.__bases__}")
print(f"inst1 class equal to type? {type(inst1)==MyClass}")
class MySubClass(MyClass):
pass
inst2 = MySubClass()
print(f"my subclass bases = {inst2.__class__.__bases__}")
```
## demo53_object_class_property
```python
class Car:
vendor = "lexus"
valid = True
c1 = Car()
c2 = Car()
print(c1.vendor, c2.vendor, Car.vendor)
print(c1.valid, c2.valid, Car.valid)
Car.valid = False
print(c1.valid, c2.valid, Car.valid)
c1.valid = True
print(c1.valid, c2.valid, Car.valid)
c1.capacity = 7
c2.color='RED'
print(c1.capacity, c1.valid, c1.vendor)
print(c2.color, c2.vendor, c2.valid)
Car.max = 10000
print(c1.max, c2.max, Car.max)
```
## demo54_class_instance_property
```python
class Team:
name = 'Normal Team'
pass
team1 = Team()
print(team1.name)
team1.name = 'Big Data Team'
team2 = Team()
print(team1.name, team2.name, Team.name, sep='||')
del team1.name
print(team1.name, team2.name, Team.name, sep='||')
```
## demo55_whole_class
```python
class Team:
member = 7 # class variable
def working_hour(self): # method
return self.day
def all_working_hour(self): # method
self.day = 7 # field
return self.day * self.member
@classmethod
def get_member(cls):
return cls.member
@staticmethod
def calculate(x, y):
return x ** y + 200
print(Team.calculate(3, 5))
print(Team.get_member())
t1 = Team()
print(t1.all_working_hour())
print(t1.working_hour())
print(Team.get_member(), t1.get_member())
```
## demo56_field_practice.py
```python
class Rectangle:
def __init__(self, width, height):
self.width = width
self.height = height
def calculate(self):
return self.width * self.height
r1 = Rectangle(3, 5)
r2 = Rectangle(7, 9)
print(r1.calculate(), r2.calculate())
```
## demo56_inherit
```python
class Emp:
gradeLevel = 6
def startWork(self):
pass
def endWork(self):
pass
pass
class PM(Emp):
def startWork(self):
print('pm with grade=',self.gradeLevel,"start to work")
pass
class RD(Emp):
pass
pm1 = PM()
pm1.startWork()
rd1 = RD()
rd1.startWork()
print(Emp.gradeLevel, RD.gradeLevel, PM.gradeLevel)
RD.gradeLevel = 7
print(Emp.gradeLevel, RD.gradeLevel, PM.gradeLevel)
Emp.gradeLevel = 8
print(Emp.gradeLevel, RD.gradeLevel, PM.gradeLevel)
del RD.gradeLevel
print(Emp.gradeLevel, RD.gradeLevel, PM.gradeLevel)
```
## demo57_instance
```
sum = 0
l1 = [10, "300", 35, None, 24, "---", 29, "Hello", 31, 5]
for l in l1:
if isinstance(l, int):
sum += l
print(f'sum={sum}')
```
## demo58_series1
```python
import pandas as pd
import numpy as np
l1 = [3, 1, 4, 5, 9, -2, 8]
a1 = np.array(l1)
pd1 = pd.Series(l1)
pd2 = pd.Series(a1)
print(type(l1), type(a1), type(pd1), type(pd2))
print(pd1 == pd2)
# check pd1 attributes
print(type(pd1.values), pd1.values)
print(type(pd1.index), pd1.index)
pd3 = pd.Series([4, 7, -5, 3],
index=['Nangang', 'Taipei', 'Banqiao', 'Taoyuan'])
print(pd3)
print(pd3.values)
print(type(pd3.index), pd3.index)
# access
print('access 1 by 1==>', type(pd1[0]), pd1[0], pd1[2])
print('access multiple==>', type(pd1[[0, 2]]), pd1[[0, 2]],sep='\n')
print(pd3['Taipei'])
print(pd3[['Nangang','Banqiao']])
```
## demo59_series2
```Python
import pandas as pd
d1 = {'poop': 35, 'bdpy': 35, 'andbiz2': 28, 'testit': 14}
s1 = pd.Series(d1)
print(d1)
print(s1)
print('c#' in d1, 'C#' in s1)
print('poop' in d1, 'poop' in s1)
l1 = ['andbiz2', 'testit', 'poop', 'bdpy']
s2 = pd.Series(d1, index=l1)
print(s2)
# add index more than dictionary
l2 = ['arduino', 'andbiz2', 'testit', 'andbiz3', 'poop', 'bdpy', 'pykt']
s3 = pd.Series(d1, index=l2)
print(s3)
print('method1 use pd\n', pd.isna(s3))
print('method2 use pd\n', pd.isnull(s3))
print('method3 use series instance\n', s3.isna())
print('method4 use series instance\n', s3.isnull())
d2 = {'poop': "Mark", 'bdpy': None, 'andbiz2': None, 'testit': 'Frank'}
s4 = pd.Series(d2, index=l2)
print(s4)
print(s4.isna())
print(s4.isnull())
```
## demo60_series3
```
import pandas as pd
s1 = pd.Series([1000, 800, 500, 300],
index=['nangang', 'taipei', 'banqiao', 'taoyuan'])
s2 = pd.Series([500, 300, 400],
index=['hsinchu', 'taichung', 'tainan'])
s3 = pd.Series([900, 1200, 1500, 1300],
index=['nangang', 'taipei', 'banqiao', 'taoyuan'])
s4 = pd.Series([1500, 350, 480],
index=['hsinchu', 'taichung', 'tainan'])
print(s1)
print(s2)
print(s1 + s2)
print(s1 + s3)
print(s2 + s4)
# append
l1 = [1, 2, 3]
l1.append(4)
print(l1)
s5 = s1.append(s2)
s5.index.name = 'station'
s5.name = 'sold(k)'
print(s1)
print(s3)
print(s5)
# change index name (but keep values unchanged)!!
s5.index = ['taipei', 'taichung', 'tainan', 'banqiao', 'nangang',
'hsinchu', 'taoyuan']
print(s5)
# use index to generate a new Series with index order
s6 = pd.Series(s1.append(s2),
index=['taipei', 'taichung', 'tainan', 'banqiao', 'nangang',
'hsinchu', 'taoyuan'])
print(s6)
```
## demo61_series4
```python
import pandas as pd
dict1 = {'course': ['poop', 'bdpy', 'pytk', 'aiocv'],
'year': [2018, 2017, 2019, 2020],
'slide': [200, 250, 230, 300]}
dataFrame1 = pd.DataFrame(dict1)
print(type(dict1), type(dataFrame1))
print(dataFrame1.head(n=2))
dataFrame2 = pd.DataFrame(dict1,
columns=['course', 'slide', 'year', 'instructor'])
print(dataFrame2)
dataFrame3 = pd.DataFrame(dict1, index=['c1', 'c2', 'c3', 'c4'],
columns=['course', 'slide', 'year', 'instructor'])
print(dataFrame3)
print(dataFrame3.columns)
print(dataFrame3.index)
# slice data (choose columns)
print(type(dataFrame3), type(dataFrame3['course']),
type(dataFrame3[['course', 'slide']]))
# slice data (choose rows)
print(type(dataFrame3.loc['c1']), dataFrame3.loc['c1'], sep='\n')
print(type(dataFrame3.loc[['c1', 'c3']]),
dataFrame3.loc[['c1', 'c3']], sep='\n')
dataFrame3['year'] = 2020
print(dataFrame3)
dataFrame3['year'] = [2020, 2020, 2020, 2021]
print(dataFrame3)
```
## demo62_series5
```python
import pandas as pd
d1 = {'course': ['poop', 'bdpy', 'pykt', 'aiocv'],
'year': [2018, 2017, 2019, 2020],
'slide': [200, 250, 230, 300]}
pd1 = pd.DataFrame(d1)
print(pd1)
print("----")
s1 = pd.Series(['Taipei', 'Hsinchu', 'Taichung', 'Kaohsiung'],
index=[0, 1, 2, 3])
pd1['location'] = s1
s2 = pd.Series(['remote', 'local'], index=[0, 3])
pd1['method'] = s2
pd1['heavy'] = pd1['slide'] >= 250
print(pd1)
del pd1['slide']
print(pd1)
```
## demo63_pandas1
```python
import pandas as pd
dict1 = {'poop': {2019: 5, 2020: 8},
'bdpy': {2018: 5, 2019: 7, 2020: 10}}
df1 = pd.DataFrame(dict1)
print(df1)
df2 = df1.T
print(df2)
df3 = pd.DataFrame(dict1, index=[2018, 2019, 2020, 2021])
print(df3)
print(type(df3.values))
print(df3.values)
dict2 = {'poop': {2019: 5, 2020: 8},
'bdpy': {2018: 5, 2019: 7, 2020: 'not yet'}}
df4 = pd.DataFrame(dict2, index=[2018, 2019, 2020, 2021])
print(df4)
print(type(df4.values), type(df4.values[0, 0]),
type(df4.values[2, 1]))
print(df4.values[0, 0], df4.values[2, 1])
print(df4.values)
df4.iloc[0, 0] = -1
df4.iloc[2, 1] = 50
print(df4)
```
## demo64_index1
```python
import pandas as pd
import numpy as np
object1 = pd.Series(range(5), index=['p', 'q', 'r', 's', 't'])
print(object1)
index1 = pd.Index(['C', 'D', 'E', 'F', 'G', 'H'], dtype='object')
print(type(index1), index1)
print(index1[:3], index1[4:])
i1 = pd.Index(np.arange(3))
i2 = list(np.arange(3))
print(i1, i2)
data1 = ['Nangang', 'Taipei', 'Banqiao']
s1 = pd.Series(data1, index=i1)
print(s1)
s2 = pd.Series(data1, index=i2)
print(s2)
print(2 in i1, 2 in i2)
index3 = pd.Index(['Taipei', 'Taipei', 'Taipei'])
print(index3)
s3 = pd.Series(data1, index=index3)
print(s3)
print("result=\n", s3['Taipei'])
```
## demo65_index2
```python
import pandas as pd
import numpy as np
s1 = pd.Series([20, 15, 18, 37, 25],
index=['mar', 'jan', 'feb', 'may', 'apr'])
s2 = s1.reindex(['jan', 'feb', 'mar', 'apr', 'may', 'jun'])
print(s2)
s3 = pd.Series(["L", "M", "S"], index=[0, 5, 10])
print(s3)
s4 = s3.reindex(range(15), method='ffill')
print(s4)
s5 = pd.DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4],
columns=['Kotlin', 'Swift', 'C++', 'Java'])
print(s5)
s6 = s5.reindex(columns=['objC', 'Kotlin', 'Swift', 'Java', 'C++'])
print(s6)
```
## demo66_drop
```python
import pandas as pd
dict1 = {'station': ['Nangang', 'Taipei', 'Banqiao', 'Taoyuan', 'Hsinchu'],
'order': [1, 2, 3, 4, 5],
'backOrder': [5, 4, 3, 2, 1]}
df1 = pd.DataFrame(dict1)
print(df1)
print(df1.drop(2))
print(df1.drop([1, 3]))
print(df1.drop('order', axis=1))
print(df1.drop(['order', 'backOrder'], axis=1))
print(df1.drop(['order', 'backOrder'], axis='columns'))
df1.drop([1, 4], inplace=True)
print(df1)
```
## demo67_functional
```python
import pandas as pd
import numpy as np
df1 = pd.DataFrame(np.random.randn(6, 7),
index=list(range(0, 12, 2)),
columns=list(range(0, 7, 1)))
print(df1)
func1 = lambda x: x.max() - x.min()
result1 = df1.apply(func1)
print(type(result1), result1)
result2 = df1.apply(func1, axis=1)
print(type(result2), result2)
result3 = df1.apply(func1, axis='columns')
print(type(result3), result3)
func2 = lambda x: pd.Series([x.min(), x.max()], index=['min', 'max'])
result4 = df1.apply(func2)
print(type(result4), result4, sep='\n')
result5 = df1.apply(func2, axis=1)
print(type(result5), result5, sep="\n")
print(df1)
df1.sort_index()
print(df1)
print("after sort")
df1.index = df1.iloc[:, 1]
print(df1.sort_index())
df1.columns = df1.iloc[2, :]
print("sort by axis=1")
df2 = df1.sort_index(axis=1)
print(df2)
```
## demo68_function2
```python
import pandas as pd
import numpy as np
df3 = pd.DataFrame(np.random.randn(6, 7),
index=list(range(0, 12, 2)),
columns=list(range(0, 7, 1)))
print(df3.sort_values(by=3))
s1 = pd.Series([4, -15, 7, 7, 2, 2, 0, 0, 4])
print(s1.rank())
print(s1.rank(method='first')) # same value, use order to assign rank
print(s1.rank(method='min'))
print(s1.rank(method='max'))
```
## demo69_function3
```python
import pandas as pd
import numpy as np
df1 = pd.DataFrame(np.random.randn(6, 7),
index=list(range(0, 12, 2)),
columns=list(range(0, 7, 1)))
print(df1)
print(df1.sum())
print(df1.sum(axis='columns'))
df1.iloc[2, 3] = np.NaN
print(df1.sum())
print(df1.sum(skipna=False))
print(df1.sum(axis='columns',skipna=False))
print(df1.idxmax())
print(df1.idxmin())
print(df1.idxmax(axis='columns'))
print(df1.idxmin(axis='columns'))
print(df1.cumsum())
print(df1.cummax())
print(df1.describe())
```
## demo70_functional4
```python
import pandas as pd
s1 = pd.Series([1, 1, 2, 3, 5, 7, 9])
print(s1.pct_change())
df1 = pd.DataFrame({'JP': [100, 200, 300, 400, 500],
'KR': [250, 200, 150, 100, 50]})
print(df1)
print(df1['JP'].corr(df1['KR']))
print(df1['JP'].cov(df1['KR']))
print(df1.corr())
s2 = pd.Series(['S', 'S', 'S', 'M', 'L', 'S', 'M', 'L', 'S', 'M', 'L'])
print(s2.value_counts())
print(pd.value_counts(s2))
df2 = pd.DataFrame({'JP': [1, 3, 4, 1, 4, 5],
'KR': [2, 3, 3, 2, 4, 2],
'TW': [5, 3, 1, 2, 4, 4]})
print(df2)
result = df2.apply(pd.value_counts)
print(result)
result2 = df2.apply(pd.value_counts).fillna(0)
print(result2)
```
## cd C:\Python36\Python36\Scripts
```
import pandas as pd
import numpy as np
df1 = pd.DataFrame(np.random.randn(6,7),index=list(range(0,12,2)),columns=list(range(0,7,1)))
df1
df1[:]
df1[2:]
df1[:2]
df1.iloc[:,:]
df1.iloc[:2,:]
df1.iloc[2:,:]
df1.iloc[:,:2]
df1.iloc[:,2:]
df1.iloc[1:3, 2:4]
```
## worldbank
* (worldbank)[https://data.worldbank.org/]
```python
import pandas as pd
data1 = pd.read_csv('data/data71.csv',
skiprows=4)
print(type(data1))
print(data1.columns)
print(data1.shape)
print(data1.head())
print(data1['Country Name'].values)
data2 = data1[data1['Country Name']=='Caribbean small states']
print(data2.shape)
```
## demo72_worldbank_demo
```python
import pandas as pd
from pandas_datareader import wb
import numpy as np
data = wb.download(
indicator='SE.PRM.TENR',
country=['all'],
start=2002,
end=2020
)
print(data.shape)
print(data.head())
data2 = data.reset_index()
print(data2.shape)
data3 = data2[data2.country == 'Caribbean small states']
print(data3.shape)
print(data3)
print(np.unique(data2.country, return_counts=True))
print(np.unique(data2.year, return_counts=True))
maxData = data2.groupby(['country'])['SE.PRM.TENR'].transform(max)
print(maxData.head())
index = maxData == data2['SE.PRM.TENR']
print(index)
print(np.unique(index, return_counts=True))
data4 = data2[index]
print(data4)
data5 = data4.sort_values(by='SE.PRM.TENR', ascending=False)
print(data5.head(n=20))
```
# Day4
## get world bank GDP Data
* (GDP Data)[https://data.worldbank.org/indicator/NY.GDP.PCAP.KD.ZG]
### demo73_worldbank_lab2
```python
import pandas as pd
import numpy as np
df1 = pd.read_csv("data/demo73.csv", skiprows=4)
print(df1.columns)
print(df1['Country Name'].values)
countries = ['Portugal', 'Italy', 'Ireland', 'Greece', 'Spain']
df2 = df1[df1['Country Name'].isin(countries)]
print(df2.head())
years = [str(x) for x in list(range(1960, 2020))]
print(years)
df3 = pd.melt(df2, 'Country Name',
var_name='year', value_name='GDPPerCapitalGrowth',
value_vars=years)
print(df3.head())
flattenGDPTable = pd.melt(df1, 'Country Name',
var_name='year', value_name='GDPPerCapitalGrowth',
value_vars=years)
print(flattenGDPTable.columns)
print(flattenGDPTable.shape)
MaxFlattenGDPTable \
= flattenGDPTable.groupby(['Country Name'])['GDPPerCapitalGrowth'].transform(max)
print(MaxFlattenGDPTable.head())
index = MaxFlattenGDPTable == flattenGDPTable['GDPPerCapitalGrowth']
print(np.unique(index, return_counts=True))
maxGDPTable = flattenGDPTable[index]
print(maxGDPTable.head(n=20))
sortedMaxGDP = maxGDPTable.sort_values(by='GDPPerCapitalGrowth',
ascending=False)
print(sortedMaxGDP.head(n=20))
```
## demo74
* (net enrollment rate)[https://data.worldbank.org/indicator/SE.PRM.TENR]
### demo74_corr_from_enrollment
```python
import pandas as pd
data1 = pd.read_csv('data/demo74.csv', skiprows=4)
print(data1.shape)
print(data1.columns)
years = [str(x) for x in list(range(1960, 2020))]
flattenData1 = pd.melt(data1, 'Country Name',
var_name='year', value_name='SE.PRM.TENR',
value_vars=years)
print(flattenData1.shape)
print(flattenData1.head())
franceData = flattenData1[flattenData1['Country Name'] == 'France']
franceData.index = franceData['year']
print(franceData)
germanData = flattenData1[flattenData1['Country Name'] == 'Germany']
germanData.index = germanData['year']
print(germanData)
df1 = pd.DataFrame({'fr': franceData['SE.PRM.TENR'], 'de': germanData['SE.PRM.TENR']})
print(df1)
print(df1['fr'].corr(df1['de']))
print(df1['fr'].cov(df1['de']))
print(df1.corr())
```
## demo75
* demo75_write_out
```
import sqlite3
import pandas as pd
data1 = pd.read_csv('data/demo74.csv', skiprows=4)
print(data1.shape)
print(data1.columns)
years = [str(x) for x in list(range(1960, 2020))]
flattenData1 = pd.melt(data1, 'Country Name',
var_name='year', value_name='SE.PRM.TENR',
value_vars=years)
flattenData1.to_csv('data/demo75.csv')
flattenData1.to_excel('data/demo75.xlsx', sheet_name='net_enrollment')
connection1 = sqlite3.connect('data/demo75.sqlite')
flattenData1.to_sql(name='net_enrollment',con=connection1)
connection1.close()
```
* pip show openpyxl
* https://sqlitebrowser.org/
## demo76
* demo76_cov19
* (github_cov19)[https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv]
```python
import pandas as pd
from pprint import pprint
import pycountry
import plotly.express as px
PATH = 'data/demo76_cov19.csv'
df1 = pd.read_csv(PATH)
print(df1.head())
print('before change')
print(df1.loc[df1['Country'] == 'Taiwan*'])
# change Taiwan* to Taiwan
df1.loc[df1['Country'] == 'Taiwan*', 'Country'] = "Taiwan"
print(df1.loc[df1['Country'] == 'Taiwan'])
list_countries = df1['Country'].unique().tolist()
pprint(list_countries)
d_country_code = {}
for country in list_countries:
try:
country_data = pycountry.countries.search_fuzzy(country)
country_code = country_data[0].alpha_3
d_country_code.update({country: country_code})
except:
print('could not add ISO 3 code for ==>', country)
d_country_code.update({country: ' '})
```
## demo77
```
import numpy as np
from matplotlib import pyplot as plt
l1 = [1, 2, 3, 4, 5]
l2 = [5, 4, 3, 2, 1]
print(l1 + l2)
a1 = np.array(l1)
a2 = np.array(l2)
print(a1 + a2)
print(a1 - a2, a1 * a2, a1 / a2, a1 ** a2)
print(6 * a1 + 4 * a2 ** 2)
# 'o', '.','^','s'
# plt.plot(l1, 's')
# plt.plot(l1, l1, 's')
# plt.plot(l1, l2, 's')
plt.plot(a1, a2, 'rs-')
plt.plot(a1, a2 ** 2 - 2 ** a2 + 5, 'go-')
plt.xlabel('quantity')
plt.ylabel('probability')
plt.title("demo77 numpy to matplotlib")
plt.grid()
plt.show()
```
## demo78
* demo78_plot2
```
import numpy as np
from matplotlib import pyplot as plt
# a1 = np.array(range(10))
a1 = np.arange(0, 10, 0.25)
a2 = a1 ** 2
plt.plot(a1, a2, 'o-')
plt.show()
b1 = np.arange(0, 2, 0.1)
b2 = b1 ** 2
b3 = b1 ** 3
b4 = b1 ** 4
plt.plot(b1, b2, 'ro-')
plt.plot(b1, b3, 'g.--')
plt.plot(b1, b4, 'b^-')
plt.axis([-2, 2, -2, 2])
plt.show()
```
## keep python up2date
### under pyconsole(inside pycharm)
* inside SC
```python
import sys
!{sys.executable} -m pip install --upgrade pip -i https://artifactory.global.standardchartered.com/artifactory/api/pypi/pypi/simple/
```
* outside SC使用自己電腦外網
```python
import sys
!{sys.executable} -m pip install --upgrade pip
```
## demo79
```
import numpy as np
from matplotlib import pyplot as plt
b1 = np.arange(0, 2, 0.1)
plt.plot(b1, b1, 'r--', b1, b1 ** 2, 'g.-', b1, b1 ** 3, 'b*--', linewidth=3)
plt.show()
```
## demo80_subplot
```
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
ax4 = fig.add_subplot(2, 2, 4)
plt.plot(np.random.randn(20).cumsum(), 'r--')
ax1.hist(np.random.randn(200), bins=20, color='c', alpha=1)
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))
plt.show()
fig, axes = plt.subplots(2, 2)
print(type(axes), axes)
axes[0, 0].hist(np.random.randn(50), bins=20, color='m', alpha=1)
axes[1, 1].scatter(np.arange(30), np.arange(30) + 3 *
np.log(10) * np.random.randn(30))
plt.show()
```
## demo81_pandas_to_plot
```python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
randomSequences = pd.DataFrame(np.random.normal(1.0, 0.07, (100, 8)))
print(randomSequences.head())
accumulates = randomSequences.cumprod()
accumulates.plot()
#randomSequences.plot()
plt.show()
```
## demo82_animation
```python
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
x = np.arange(0, 2 * np.pi, 0.01)
line, = ax.plot(x, np.sin(x))
def animate(i):
line.set_ydata(np.sin(x + i / 5.0))
return line,
def init():
line.set_ydata(np.ma.array(x, mask=True))
return line,
anim = animation.FuncAnimation(fig, animate, np.arange(1, 400, 0.1),
init_func=init,
interval=50)
plt.show()
```
## demo83_pdf
```
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
mu = 80
sigma = 8
x = mu + sigma * np.random.randn(10000)
print(len(x))
num_bins = 50
n, bins, patches = plt.hist(x, num_bins, density=1,
facecolor='blue', alpha=0.5)
y = norm.pdf(bins, mu, sigma)
plt.plot(bins, y, "r*-")
plt.show()
```
## demo84_population
* (population)[https://data.worldbank.org/indicator/SP.POP.TOTL]
```
import pandas
import matplotlib.pyplot as plt
from matplotlib import rc
data = pandas.read_csv('data/demo84.csv', skiprows=4)
print(data.head())
print(data.shape)
data.to_excel('data/output84.xlsx', sheet_name='pandas_generated')
print(data.describe())
data['created'] = data['1960'] + data['1980']
print(data.columns)
ausData = data[data['Country Code'] == 'AUS']
print(ausData.shape)
print(ausData['1980'])
selected_years = ['1960', '1970', '1980']
print(plt.style.available)
font1 = {'family': 'Source Code Pro'}
rc('font', **font1)
plt.style.use('fast')
ausData.plot(kind='bar', y=selected_years, fontsize=12)
plt.show()
```
* Practice
* (Taipei realtime)[https://taipeicity.github.io/traffic_realtime/]
* (json api)[https://tcgbusfs.blob.core.windows.net/blobyoubike/YouBikeTP.json]
## demo85_youbike
```
# 手動建一個目錄map
import folium
import json
taipei = [25.034132, 121.564153]
zoom = 15
map_osm = folium.Map(location=taipei, zoom_start=zoom)
file1 = open('data/demo85.json', encoding='utf8')
resultText = file1.read()
file1.close()
print(type(resultText), resultText[:50])
result = json.loads(resultText)
print(type(result))
data = result["retVal"]
# for k in data:
# print(k)
for v in data.values():
coord = [v['lat'], v['lng']]
print(coord)
folium.Marker(coord, icon=None, popup=None).add_to(map_osm)
map_osm.save('map/demo85.html')
```
## https://data.taipei/
* (勞基法)[https://data.taipei/#/dataset/detail?id=23630879-4926-4877-a48a-a0ae6cc2f7d5]
### lab86_read_labor_law
```
import pandas
data1 = pandas.read_csv('data/demo86.csv')
print(data1.head())
print(data1.columns)
print(data1.info())
data1Grouped = data1[['處分字號', '違反勞動基準法條款']] \
.groupby(['違反勞動基準法條款']).count()
print(data1Grouped.head(10))
result = data1Grouped.sort_values('處分字號', ascending=False)
print(result.head(10))
data2Grouped = data1[['處分字號', '違反勞動基準法條款','違反法規內容']] \
.groupby(['違反勞動基準法條款','違反法規內容']).count()
print(data2Grouped.head(10))
result2 = data2Grouped.sort_values('處分字號', ascending=False)
print(result2.head(10))
```
### https://data.taipei/#/dataset/detail?id=a835f3ba-7f50-4b0d-91a6-9df128632d1c
* demo87_recycle_bin
```
import folium
import pandas as pd
sample_data = pd.read_csv('data/shilin.csv', sep=',')
print(sample_data.shape)
print(sample_data.columns)
sample_data.columns = ['section', 'road', 'road_detail',
'lon', 'lat', 'extra']
print(sample_data.columns)
taipei = [25.095911, 121.518301]
zoom = 15
map_osm = folium.Map(location=taipei, zoom_start=zoom)
for i in range(len(sample_data)):
coord = [sample_data.iloc[i, 4], sample_data.iloc[i, 3]]
message = f"({i})[{sample_data.iloc[i,1]}]{sample_data.iloc[i,2]}"
icon1 = folium.Icon(color='red', icon='info-sign')
folium.Marker(coord, icon=icon1, popup=message).add_to(map_osm)
spot1 = [24.773071, 121.009916]
folium.CircleMarker(spot1, radius=500, popup='Science Park',
fill_color='#C0FFEE').add_to(map_osm)
spot2 = [24.777657, 121.043790]
folium.Circle(spot2, radius=500, popup='ITRI',
fill_color='#FFC0EE').add_to(map_osm)
map_osm.save('map/demo87.html')
```
# spark installation
## download 2.4.6
## download winutil
* (origin)[https://github.com/steveloughran/winutils]
* (newer)[https://github.com/cdarlint/winutils]
## unzip spark2.4.6
## copy winutil hadoop 2.7 bin ==> spark \bin
## https://confluence.jetbrains.com/display/JBR/JetBrains+Runtime
```
scala> val textFile = sc.textFile("aa.txt")
textFile: org.apache.spark.rdd.RDD[String] = aa.txt MapPartitionsRDD[1] at textFile at <console>:24
scala> textFile.count()
res0: Long = 5
```
## demo87
```python
import sys
import pyspark
from pyspark import SparkContext
from pprint import pprint
pprint(sys.path)
print(pyspark.__version__)
sc = SparkContext("local","simple app")
print(sc)
```
## demo88_open_sqlite
```
import sqlite3
connection1 = sqlite3.connect('data/demo88.sqlite')
DROP_DDL = "DROP TABLE IF EXISTS EMPLOYEE"
CREATE_DDL = '''
CREATE TABLE EMPLOYEE
(ID INTEGER PRIMARY KEY,
NAME TEXT NOT NULL,
AGE INT NOT NULL,
DEPT INT,
ADDRESS CHAR(50));
'''
connection1.execute(DROP_DDL)
connection1.execute(CREATE_DDL)
connection1.close()
```
## demo89.py
```
import sqlite3
import time
connection1 = sqlite3.connect('data/demo88.sqlite')
emp1 = {'NAME': "Mark", 'AGE': 43, 'DEPT': 1, "ADDR": 'Taipei'}
emp2 = {'NAME': "John", 'AGE': 42, 'DEPT': 2, "ADDR": 'Hsinchu'}
emp3 = {'NAME': "Ken", 'AGE': 44, 'DEPT': 1, "ADDR": 'Taipei'}
emp4 = {'NAME': "Tim", 'AGE': 45, 'DEPT': 2, "ADDR": 'Kaohsiung'}
employees = [emp1, emp2, emp3, emp4]
INSERT_DML = '''INSERT INTO EMPLOYEE(NAME, AGE, DEPT, ADDRESS)
VALUES(?,?,?,?)'''
start_time = time.time()
for i in range(1000):
for e in employees:
connection1.execute(INSERT_DML, (e['NAME'], e['AGE'], e['DEPT'],
e['ADDR']))
connection1.commit()
end_time = time.time()
connection1.close()
print(f"total spend {end_time-start_time} seconds")
```
#### data prepare
```python
import tensorflow as tf
from keras import datasets
print(tf.__version__)
datasets.mnist.load_data()
datasets.boston_housing.load_data()
datasets.imdb.load_data()
datasets.reuters.load_data()
datasets.imdb.get_word_index()
datasets.reuters.get_word_index()
```
# new 4 days
## [MLDL](https://hackmd.io/@O0oTwRN6TESqcS_-MQZDVQ/BJPVjRS7w)