# Linux LLC專題
### 基礎專案1
1. [HTTP]使用 noip.com 服務,建立各組主網域位置(如 dv105.ddns.net),透過主網域可看到各組網頁內容(PHP套件可架設之網站)
2. [DB]啟用 MariaDB 服務。
3. [FS]啟用 NFS&SAMBA 伺服器。
#### 第一題:
* 架設Apache與調整首頁
1. 在Ubuntu安裝Apache、設置防火牆
```
$ sudo apt update && sudo apt install apache2
$ #防火牆先不要做 sudo apt install ufw
$ #防火牆先不要做sudo ufw allow OpenSSH
$ #防火牆先不要做sudo ufw allow in "Apache Full"
$ #防火牆先不要做sudo ufw enable
$ #防火牆先不要做sudo ufw status #檢查當前的防火牆狀態
$ sudo service apache2 status #若已啟動是綠色的Active
```
2.在文件夾 /var/www/html 中找到這個 Apache 默認歡迎頁面
```
$ sudo vi /var/www/html/index.html
```
3. 替換index.html就可以換成自己想要的網頁
4. 到noip註冊免費網域指向主機ip

* 在同一台主機上用Virtual Host做兩個資料夾給兩個不同網域指向。
1. 在 /var/www/ 目錄中為兩個網域建立兩個新目錄
```
$ sudo mkdir -p /var/www/html/sundial.ddns.net/public_html
$ sudo mkdir -p /var/www/html/mapleyu.ddns.net/public_html
$ sudo chmod -R 755 /var/www
```
2. 為兩個目錄新增兩個index.html
```
$ sudo vi /var/www/html/mapleyu.ddns.net/public_html/index.html
```
```
$ sudo vi /var/www/html/sundial.ddns.net/public_html/index.html
```
3. 建立Virtual Host虛擬主機文件 (位於 /etc/apache2/sites-available/ 中,告訴Apache網頁伺服器如何響應各種網域請求,修改virtualhost標籤的兩段
```
$ sudo vi /etc/apache2/sites-available/000-default.conf
<VirtualHost *:8080>
ServerName mapleyu.ddns.net
DocumentRoot /var/www/html/mapleyu.ddns.net/public_html
ErrorDocument 404 http://mapleyu.ddns.net/error.html
<Directory "/var/www/html/mapleyu.ddns.net/public_html">
AuthType Basic
AuthName "Restricted Area"
AuthUserFile /var/www/html/.htpasswd
Require valid-user
</Directory>
<Directory /var/www/mapleyu.ddns.net/public_html>
Options Indexes FollowSymLinks
AllowOverride All
Require all granted
</Directory>
<Directory /var/www/mapleyu.ddns.net/public_html/bob>
Options Indexes FollowSymLinks
AllowOverride All
Require all granted
</Directory>
</VirtualHost>
<VirtualHost *:80>
ServerName sundial.ddns.net
DocumentRoot /var/www/html/sundial.ddns.net/public_html
</VirtualHost>
<VirtualHost *:80>
#後面不變
```
* 404轉址如上ErrorDocument
* 改埠號如上改000-default.conf變8080,且修改ports.conf要Listen8080
* 增加使用者驗證,要改同上面000-default.conf第一段Directory標籤
```
$sudo apt-get install apache2-utils
$sudo htpasswd -c /etc/apache2/.htpasswd username001
```
* 建立使用者網站如http://你的主機名稱/~bob/ 呈現網頁內容,要改同上面000-default.conf第二與第三段Directory標籤
```
#啟動userdir模組
$ sudo a2enmod userdir
$ sudo adduser bob
$ mkdir -p /home/bob/public_html
$ echo Hi bob > index.html
$ chmod 777 /home/bob
$ sudo systemctl restart apache2
```
4. 啟用新的虛擬主機文件
```
sudo a2ensite 000-default.conf
sudo systemctl reload apache2
```
#### 第二題:
```
$ systemctl start mariadb.service
```
#### 第三題:
```
https://www.linuxprobe.com/ubuntu-configure-nfs.html
https://www.myfreax.com/how-to-install-and-configure-samba-on-ubuntu-20-04how-to-install-and-configure-samba-on-ubuntu-20-04/
https://magiclen.org/ubuntu-server-samba/
```
### 進階專案1
1. [DB]滿足各組主網站資料庫需求
2. [SCRIPT]使用 python 撰寫針對某帳號每日自動換密碼腳本。(需定義密碼表規範)
3. [PWC]將日前 python 爬蟲專案一Technews獲取的資料,匯入各組主網站資料庫以自己命名的資料庫中
* 第二題:Ubuntu 修改密碼
1. 在python程式中要執行linux的指令可以import subprocess。
2. 產生亂數可以imort random,使用random.choice()的方法。
3. 要產生n位數的字串可以使用.join
```py=
#changepswd.py
import subprocess
import random
import string
import json
#產生長度為參數len,且每一個字元為亂數產生的密碼
def generatePassword(len):
chart = string.ascii_letters + string.digits #所有英文符號跟數字
password = ''.join(random.choice(chart) for _ in range(len))
return password
#修改user的密碼為newPasswd
def changePassword(user, newPasswd):
command = f"echo '{user}:{newPasswd}' | sudo chpasswd"
subprocess.run(command, shell=True)
#產生num組密碼,每組密碼長度為passwdLen
def genPasswdTable(num, passwdLen):
passwords = {}
for i in range(num):
password = generatePassword(passwdLen)
passwords[f"Password {i+1}"] = password
return passwords
username = 'user1' #使用者帳號
num = 7 #密碼表內密碼組數
passwdLen = 6 #密碼長度
#生成密碼表,寫入檔案
passwordTable = genPasswdTable(num, passwdLen)
with open('passwd.json', 'w', encoding='utf-8') as fp:
json.dump(passwordTable, fp, indent=4, ensure_ascii=False)
#生成密碼
selectedPasswd = random.choice(list(passwordTable.values()))
#修改密碼
changePassword(username, selectedPasswd)
result = f"使用者 {username} 的密碼已改為: {selectedPasswd}"
#密碼修改紀錄 (path需自行調整)
with open('/home/admin1/changepasswd/passwdlog.json', 'a', encoding='utf-8') as fp:
json.dump(result, fp, indent=4, ensure_ascii=False)
fp.write('\n')
```
Output:

接著把此python檔案改成執行檔,並執行crontab -e將排成寫入


* 第三題:先到Mariadb建立資料庫、資料表
```
CREATE SCHEMA `technews` ;
CREATE TABLE `technews`.`news_contents` (
`index` INT NOT NULL AUTO_INCREMENT,
`category` VARCHAR(45) NULL,
`title` VARCHAR(100) NULL,
`url` VARCHAR(200) NULL,
`sum` INT NULL,
`spotlist` INT NULL,
PRIMARY KEY (`index`));
```
```py=
import requests
from bs4 import BeautifulSoup
import json
import pymysql
url = "https://technews.tw/"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
#找到要爬的整塊區塊
paragraph = soup.find("ul", class_="screen")
#要放入MySQL的前置作業
db_settings = {
"host": "127.0.0.1",
"port": 3366,
"user": "root",
"password": "123456",
"db": "technews",
"charset": "utf8"
}
# 建立Connection物件
connect_db = pymysql.connect(**db_settings)
paraAll=[]
#先找到第一筆資料
para1 = paragraph.find("li", class_="block2014")
category = para1.find("div",class_="cat01").text
sum_title = para1.find("h3").text
sum_title_url = para1.find("a").get("href")
spotlist1_title = para1.find("li", class_="spotlist").find("a").text
spotlist1_url = para1.find("li", class_="spotlist").find("a").get("href")
spotlist2_title = para1.find("li", class_="spotlist").findNextSibling("li").find("a").text
spotlist2_url = para1.find("li", class_="spotlist").findNextSibling("li").find("a").get("href")
spotlist3_title = para1.find("li", class_="spotlist").findNextSibling("li").findNextSibling("li").find("a").text
spotlist3_url = para1.find("li", class_="spotlist").findNextSibling("li").findNextSibling("li").find("a").get("href")
paraAll.append({"category": category,
"sum_title": sum_title,
"sum_title_url": sum_title_url,
"spotlist": [{"title": spotlist1_title,
"url": spotlist1_url},
{"title": spotlist2_title,
"url": spotlist2_url},
{"title": spotlist3_title,
"url": spotlist3_url}
]})
#insert 到 mariadb
values = [
(category, sum_title, sum_title_url, 1, 0),
(category, spotlist1_title, spotlist1_url, 0, 1),
(category, spotlist2_title, spotlist2_url, 0, 2),
(category, spotlist3_title, spotlist3_url, 0, 3)
]
with connect_db.cursor() as cursor:
for value in values:
sql = f'insert into technews.news_contents (category, title, url, sum, spotlist) values {value};'
cursor.execute(sql)
connect_db.commit()
#再找第二到八筆資料
para2 = para1.findNextSibling("li", class_="block2014")
for i in range(7):
category = para2.find("div",class_="cat01").text
sum_title = para2.find("h3").text
sum_title_url = para2.find("a").get("href")
spotlist1_title = para2.find("li", class_="spotlist").find("a").text
spotlist1_url = para2.find("li", class_="spotlist").find("a").get("href")
spotlist2_title = para2.find("li", class_="spotlist").findNextSibling("li").find("a").text
spotlist2_url = para2.find("li", class_="spotlist").findNextSibling("li").find("a").get("href")
spotlist3_title = para2.find("li", class_="spotlist").findNextSibling("li").findNextSibling("li").find("a").text
spotlist3_url = para2.find("li", class_="spotlist").findNextSibling("li").findNextSibling("li").find("a").get("href")
if para2:
paraAll.append({"category": category,
"sum_title": sum_title,
"sum_title_url": sum_title_url,
"spotlist": [{"title": spotlist1_title,
"url": spotlist1_url},
{"title": spotlist2_title,
"url": spotlist2_url},
{"title": spotlist3_title,
"url": spotlist3_url}
]})
values = [
(category, sum_title, sum_title_url, 1, 0),
(category, spotlist1_title, spotlist1_url, 0, 1),
(category, spotlist2_title, spotlist2_url, 0, 2),
(category, spotlist3_title, spotlist3_url, 0, 3)
]
with connect_db.cursor() as cursor:
for value in values:
sql = f'insert into technews.news_contents (category, title, url, sum, spotlist) values {value};'
cursor.execute(sql)
connect_db.commit()
i += 1
para2 = para2.findNextSibling("li", class_="block2014")
else:
break
connect_db.close()
with open('TechNews2.json','w',encoding="utf-8") as f:
json.dump(paraAll, f, ensure_ascii=False, indent=4)
```
Output:

### 進階專案2:
1. 將[海量圖檔](https://drive.google.com/drive/folders/1vWe_UioA0vzZwKeMfUWLAeqQLgLkAbWN?usp=sharing)進行路徑編列,於資料庫中建立表格,提供檔名及路徑查詢(台灣金融單位票據憑證專案皆須使用)
2. 使用python將[IDP日誌](https://drive.google.com/drive/folders/1z7R4ep_G0cIxWLesv3Noe9IyMHjpcnmf?usp=sharing
)進行數據處理,整理日期、時間、來源ip、來源port、目標ip、目標port、事件名稱等欄位,自動化讀取目標資料夾檔案後,增加至資料庫中(常見於大型企業資料分析師實機面試題目)
3.
第一題
```
#在Mariadb建立資料表
create database img
use img
create table img1(name VARCHAR(30),path VARCHAR(50));
```
```py=
#img_to_mariadb.py
import os
import pymysql
#連接 MariaDB
conn = pymysql.connect(
user='admin1',
password='123456',
host='127.0.0.1',
database='img'
)
cursor = conn.cursor()
#圖像路径
image_path = '/home/admin1/Vol_001/'
#遍歷路徑中檔案
dirs = os.listdir(image_path)
for filename in dirs:
print(filename)
if filename.endswith('.jpg'):
#將文件名插入資料表中
cursor.execute("INSERT INTO img1 (name, path) VALUES (%s, %s)", (filename, image_path))
#提交更改並關閉
conn.commit()
conn.close()
```
Output:

第二題:
先建立資料庫、資料表
```
CREATE database `diarylog` ;
CREATE TABLE `diarylog`.`idp` (
`index` INT NOT NULL AUTO_INCREMENT,
`priority` VARCHAR(45) NULL,
`datetime` DATETIME NULL,
`user` VARCHAR(45) NULL,
`event` VARCHAR(200) NULL,
`source_ip` VARCHAR(45) NULL,
`insert_datetime` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`index`)
);
```
```py=
import csv
import pymysql
import pandas as pd
import re
# 讀取CSV檔案
source_file = 'syslog_2023-5-17-9_2_54.csv'
with open(source_file, 'r', encoding='utf-8') as file:
reader = csv.reader(file)
headers_ = next(reader) # 獲取CSV標題列
header = next(reader) # 獲取CSV標題列
listContents = list(reader)
header.pop()
df = pd.DataFrame(listContents, columns=header)
df['ip'] = None
for i in range(df.shape[0]):
if df['優先層級'][i] == 'Warning':
ip_start = df['事件'][i].find(' [') + 2
ip_end = df['事件'][i].find('] ')
ip = df['事件'][i][ip_start:ip_end]
df.at[i, 'ip'] = ip
#使用正則表達式移除事件中的[ip]字段,\d+ 表示一個或多個數字,\. 表示句點
df['事件'] = df['事件'].apply(lambda x: re.sub(r' \[\d+\.\d+\.\d+\.\d+\]', '', x))
df['時間'] = pd.to_datetime(df['時間'])
df = df.drop("日誌", axis=1)
#df.to_csv('test123.csv', index=False)
#要放入MySQL的前置作業
db_settings = {
"host": "127.0.0.1",
"port": 3366,
"user": "root",
"password": "123456",
"db": "diarylog",
"charset": "utf8"
}
# 建立Connection物件
connect_db = pymysql.connect(**db_settings)
with connect_db.cursor() as cursor:
for i in range(df.shape[0]):
sql = """
INSERT INTO diarylog.idp (priority, datetime, user, event, source_ip)
VALUES ('{}', '{}', '{}', '{}', '{}');
""".format(df.at[i, '優先層級'], df.at[i, '時間'], df.at[i, '使用者'], df.at[i, '事件'], df.at[i, 'ip'])
cursor.execute(sql)
connect_db.commit()
connect_db.close()
```
Output:

第三題
---
### 補充
#### 如何在Apache上增加不同網域,指向不同網頁。
https://pala.tw/subdomain-apache-virtual-host/
https://ui-code.com/archives/271
https://hackmd.io/@Flion/SJzQrURX2#%E5%8F%83%E8%80%83%E7%B6%B2%E7%AB%99
###### tags: `python` `爬蟲` `apache` `專案一` `mariadb` `LinuxCCP`