ENV - HackMD

ENV === set hostnames --- ``` # vim /etc/hosts 10.18.30.43 atw-lab-pc1-vm1 10.18.30.42 atw-lab-pc1-vm2 ``` config ssh --- *ssh-keygen* *ssh-copy-id -i ~/.ssh/id_rsa.pub hdoop@atw-lab-pc1-vm1* (all nodes) (set firewall) --- JAVA --- java-8-openjdk-amd64 install --- ``` sudo apt install openjdk-8-jdk ``` set JAVA home path --- ``` # ~/.bashrc export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 ``` HADOOP === Hadoop 3.2.2 set hadoop-env. sh --- ``` # /usr/local/hadoop/etc/hadoop/hadoop-env.sh export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 export HADOOP_OPTS="-Djava.library.path=${HADOOP_HOME}/lib/native" export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)} ``` set hdfs-site.xml --- * mkdir -m 777 /usr/local/hadoop/data/nameNode mkdir -m 777 /usr/local/hadoop/data/dataNode * ``` # /usr/local/hadoop/etc/hadoop/hdfs-site.xml <configuration> <property> <name>dfs.namenode.name.dir</name> <value>/usr/local/hadoop/data/nameNode</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>/usr/local/hadoop/data/dataNode</value> </property> <property> <name>dfs.replication</name> <value>2</value> </property> <property> <name>dfs.hosts.exclude</name> <value>/usr/local/hadoop/etc/hadoop/excludes</value> </property> </configuration> ``` set core-site.xml --- *mkdir -m 777 /home/hdoop/tmpdata* ``` # /usr/local/hadoop/etc/hadoop/core-site.xml <configuration> <property> <name>hadoop.tmp.dir</name> <value>/home/hdoop/tmpdata</value> </property> <property> <name>fs.defaultFS</name> <value>hdfs://atw-lab-pc1-vm1:9000</value> </property> </configuration> ``` set workers --- ``` # /usr/local/hadoop/etc/hadoop/workers atw-lab-pc1-vm1 atw-lab-pc1-vm2 ``` set yarn-site.xml --- ``` # /usr/local/hadoop/etc/hadoop/yarn-site.xml <configuration> <property> <name>yarn.resourcemanager.hostname</name> <value>atw-lab-pc1-vm1</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> </configuration> ``` set Hadoop env path --- ``` # ~/.bashrc export HADOOP_HOME=/usr/local/hadoop export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HBASE_HOME/bin export HADOOP_CLASSPATH=$JAVA_HOME/lib/tools.jar export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib" ``` format and start --- *source ~/.bashrc* *hdfs namenode -format* *start-all.sh* SPARK === spark 3.1.1 scala 2.12.10 Download Scale and Spark --- Hadoop 3.2+ supports Spark 3.1.1 Spark 3.0+ pre-built with Scala 2.12 ``` cd /usr/local/spark_source # scala wget https://downloads.lightbend.com/scala/2.12.13/scala-2.12.13.tgz # spark wget https://downloads.apache.org/spark/spark-3.1.1/spark-3.1.1-bin-hadoop3.2.tgz ``` install Scala (ALL NODES) --- uncompress ``` # scala tar -zxvf scala-2.12.13.tgz # spark tar -zxvf spark-3.1.1-bin-hadoop3.2.tgz ``` build a soft link ``` sudo ln -f -s /usr/local/spark_source/scala-2.12.13 /usr/lib/scala ``` Install Spark (ALL NODES) --- move spark folder to /usr/local/spark ``` sudo mv /usr/local/spark_source/spark-3.1.1-bin-hadoop3.2 /usr/local/spark ``` set config ``` cd /usr/local/spark/conf/ mv spark-env.sh.template spark-env.sh mv workers.template workers mv spark-defaults.conf.template spark-defaults.conf ``` ``` # vim spark-env.sh export SCALA_HOME=/usr/lib/scala export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 export HADOOP_HOME=/usr/local/hadoop export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop export SPARK_MASTER_HOST=atw-lab-pc1-vm1 export SPARK_WORKER_MEMORY=32G export SPARK_DAEMON_MEMORY=4G ``` ``` # vim workers loaclhost (delete) atw-lab-pc1-vm1 atw-lab-pc1-vm2 ``` ``` # vim spark-defaults.conf spark.master spark://atw-lab-pc1-vm1:7077 spark.eventLog.enabled true spark.eventLog.dir hdfs://atw-lab-pc1-vm1:9000/user/hdoop/log spark.serializer org.apache.spark.serializer.KryoSerializer spark.driver.memory 4g spark.executor.memory 6g spark.executor.cores 2 spark.dynamicAllocation.maxExecutors 2 spark.ui.enabled true ``` Set Spark env path --- ``` # vim ~/.bashrc export SCALA_HOME=/usr/lib/scala export PATH=$PATH:$SCALA_HOME/bin export SPARK_HOME=/usr/local/spark export PATH=$PATH:$SPARK_HOME/bin # source ~/.bashrc ``` Start Spark --- *cd /usr/local/spark ./sbin/start-all.sh* HIVE === install hive --- *tar -zxvf apache-hive-3.1.2-bin.tar.gz sudo mv apache-hive-3.1.2-bin /usr/local/hive* Setup environment variables --- ``` #vim ~/.bashrc export HIVE_HOME=/usr/local/hive export PATH=$HIVE_HOME/bin:$PATH export HIVE_CONF_DIR=/usr/local/hive/conf #source ~/.bashrc ``` Setup Hive HDFS folders --- ``` hadoop fs -mkdir /tmp hadoop fs -mkdir -p /user/hive/warehouse hadoop fs -chmod g+w /tmp hadoop fs -chmod g+w /user/hive/warehouse hdfs dfs -mkdir /user/hdoop/log ``` install MySQL(only master) --- ``` sudo apt update sudo apt install mysql-server (-y) sudo mysql mysql> CREATE USER 'hive'@'localhost' IDENTIFIED BY 'hive' password expire never; mysql> GRANT ALL ON *.* TO 'hive'@'localhost'; mysql> exit; mysql -u hive -phive mysql> create database hive_metastore; # Query OK, 1 row affected (0.00 sec) mysql> show databases; # After these steps, the following database objects are created: # user: hive # password: hive # database: hive_metastore ``` Download MySQL JDBC driver --- wget https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.22/mysql-connector-java-8.0.22.jar mv mysql-connector-java-8.0.22.jar /usr/local/hive/lib/ cp /usr/local/hive/conf/hive-default.xml.template /usr/local/hive/conf/hive-site.xml guava lib issue --- rm $HIVE_HOME/lib/guava-19.0.jar cp $HADOOP_HOME/share/hadoop/common/lib/guava-27.0-jre.jar $HIVE_HOME/lib/ config and run hive --- ``` # vim /usr/local/hive/conf/hive-site.xml javax.jdo.option.ConnectionDriverName: com.mysql.cj.jdbc.Driver javax.jdo.option.ConnectionURL: jdbc:mysql://localhost/hive_metastore javax.jdo.option.ConnectionUserName: hive javax.jdo.option.ConnectionPassword: hive hive.metastore.uris: thrift://127.0.0.1:9083 hive.metastore.db.type: mysql <property> <name>system:java.io.tmpdir</name> <value>/tmp/hive/java</value> </property> <property> <name>system:user.name</name> <value>${user.name}</value> </property> schematool -dbType mysql -initSchema /usr/local/hive/bin/hive --service metastore & ``` ### if change location: hive --service metatool -updateLocation hdfs://atw-lab-pc1-vm3:9000 hdfs://atw-lab-pc1-vm1:9000 Docker === ``` sudo apt-get update sudo apt-get install \ apt-transport-https \ ca-certificates \ curl \ gnupg \ lsb-release curl -x "http://user:password@10.18.131.1:8080" -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg echo \ "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \ $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null sudo apt-get update sudo apt-get install docker-ce docker-ce-cli containerd. udo systemctl start docker # set proxy sudo vim /etc/systemd/system/docker.service.d/http-proxy.conf [Service] Environment="HTTP_PROXY=http://labpc18:hmy4q6s01KyqR6L7hWh3@10.18.131.1:8080" Environment="HTTPS_PROXY=http://labpc18:hmy4q6s01KyqR6L7hWh3@10.18.131.1:8080" sudo systemctl daemon-reload sudo systemctl restart docker ``` KAFKA === ``` # vim docker-compose.yaml environment: KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,CONNECTIONS_FROM_HOST://atw-lab-pc1-vm3:19092 ``` docker-compose up -d docker exec -it poc_test_kafka_1 bash /opt/kafka/bin/kafka-topics.sh --bootstrap-server localhost:9092 --create --replication-factor 1 --partitions 4 --topic poc ./src/sendRMQSrcConfig.sh localhost:8083 Python === kafka-python hdfs pyspark ~~watchdog~~ ~~pika~~ uvicorn fastapi sudo apt install python3-pip pip3 install pyspark --proxy=http://labpc18:hmy4q6s01KyqR6L7hWh3@10.18.131.1:8080 sudo apt install docker-compose vim consumer.py vim consumer_hive.py ``` hdfsclient = InsecureClient('http://atw-lab-pc1-vm1:9870', user='hdoop') ``` DB --- Metadata ``` spark.sql("CREATE TABLE message_type (message_type STRING)") spark.sql("INSERT INTO message_type VALUES ('motion'), ('qa'), ('predict')") ``` ![](https://i.imgur.com/Q5nhyIy.png) ``` # spark.sql("CREATE TABLE motion (equipment_model STRING, module STRING, column_num INT, definition STRING)") spark.sql("CREATE TABLE motion_meta (version INT, equip_model STRING, equip_module STRING, definition STRING)") #spark.sql("INSERT OVERWRITE motion VALUES ('wb', 'bs', 292, 'feature6 int,feature7 string,feature8 int,feature9 double,feature10 double,feature11 double,feature12 double,feature13 double,feature14 double,feature15 double,feature16 double,feature17 double,feature18 double,feature19 double,feature20 double,feature21 double,feature22 double,feature23 double,feature24 double,feature25 double,feature26 double,feature27 int,feature28 double,feature29 int,feature30 int,feature31 int,feature32 int,feature33 int,feature34 int,feature35 int,feature36 int,feature37 int,feature38 int,feature39 int,feature40 string,feature41 double,feature42 double,feature43 int,feature44 int,feature45 int,feature46 int,feature47 double,feature48 int,feature49 int,feature50 int,feature51 int,feature52 int,feature53 int,feature54 int,feature55 int,feature56 int,feature57 int,feature58 int,feature59 int,feature60 int,feature61 int,feature62 int,feature63 int,feature64 int,feature65 int,feature66 int,feature67 int,feature68 int,feature69 int,feature70 int,feature71 int,feature72 int,feature73 int,feature74 int,feature75 int,feature76 int,feature77 int,feature78 int,feature79 int,feature80 int,feature81 int,feature82 int,feature83 int,feature84 int,feature85 int,feature86 int,feature87 int,feature88 double,feature89 double,feature90 double,feature91 double,feature92 double,feature93 double,feature94 double,feature95 double,feature96 double,feature97 double,feature98 int,feature99 int,feature100 int,feature101 int,feature102 int,feature103 int,feature104 int,feature105 int,feature106 int,feature107 int,feature108 int,feature109 int,feature110 int,feature111 int,feature112 int,feature113 double,feature114 double,feature115 int,feature116 int,feature117 int,feature118 int,feature119 int,feature120 int,feature121 int,feature122 int,feature123 int,feature124 int,feature125 double,feature126 double,feature127 double,feature128 double,feature129 double,feature130 double,feature131 double,feature132 double,feature133 double,feature134 double,feature135 int,feature136 int,feature137 int,feature138 int,feature139 int,feature140 int,feature141 int,feature142 int,feature143 int,feature144 int,feature145 int,feature146 int,feature147 int,feature148 int,feature149 int,feature150 double,feature151 double,feature152 int,feature153 int,feature154 int,feature155 int,feature156 int,feature157 int,feature158 int,feature159 int,feature160 int,feature161 int,feature162 double,feature163 double,feature164 double,feature165 double,feature166 double,feature167 double,feature168 double,feature169 double,feature170 double,feature171 double,feature172 int,feature173 int,feature174 int,feature175 int,feature176 int,feature177 int,feature178 int,feature179 int,feature180 int,feature181 int,feature182 int,feature183 int,feature184 int,feature185 int,feature186 int,feature187 double,feature188 double,feature189 int,feature190 int,feature191 int,feature192 int,feature193 int,feature194 int,feature195 int,feature196 int,feature197 int,feature198 int,feature199 double,feature200 double,feature201 double,feature202 double,feature203 double,feature204 double,feature205 double,feature206 double,feature207 double,feature208 double,feature209 int,feature210 int,feature211 int,feature212 int,feature213 int,feature214 int,feature215 int,feature216 int,feature217 int,feature218 int,feature219 int,feature220 int,feature221 int,feature222 int,feature223 int,feature224 double,feature225 double,feature226 int,feature227 int,feature228 int,feature229 int,feature230 int,feature231 int,feature232 int,feature233 int,feature234 int,feature235 int,feature236 double,feature237 double,feature238 double,feature239 double,feature240 double,feature241 double,feature242 double,feature243 double,feature244 double,feature245 double,feature246 int,feature247 int,feature248 int,feature249 int,feature250 int,feature251 int,feature252 int,feature253 int,feature254 int,feature255 int,feature256 int,feature257 int,feature258 int,feature259 int,feature260 int,feature261 double,feature262 double,feature263 int,feature264 int,feature265 int,feature266 int,feature267 int,feature268 int,feature269 int,feature270 int,feature271 int,feature272 double,feature273 double,feature274 int,feature275 int,feature276 int,feature277 double,feature278 double,feature279 int,feature280 int,feature281 int,feature282 double,feature283 double,feature284 double,feature285 double,feature286 double,feature287 double,feature288 double,feature289 double')") spark.sql("INSERT OVERWRITE motion_meta VALUES (0, 'testing', 'testing','{'grp_1':{'feat1': 'int'}}')") spark.sql("select * from motion").show() ``` ![](https://i.imgur.com/7xT2Xa4.png) ``` spark.sql("CREATE TABLE IF NOT EXISTS row_strip_detail (strip_detail_uid STRING, lf_uid STRING, unit_no INT, row_no INT, col_no INT, group_no INT, die_no INT, wire_no INT)") spark.sql("CREATE TABLE IF NOT EXISTS measure_item (ms_item_uid int, measure_item string, type string)") spark.sql("CREATE TABLE IF NOT EXISTS leadframe (lf_uid string, lf_id string, im_uid string)") spark.sql("CREATE TABLE IF NOT EXISTS input_magazine (im_uid string, input_mag_no int, l_uid string)") spark.sql("CREATE TABLE IF NOT EXISTS lot (l_uid string, lot_id string, rp_uid string, mc_uid string)") spark.sql("CREATE TABLE IF NOT EXISTS machine (mc_uid string, machine_id string)") spark.sql("CREATE TABLE IF NOT EXISTS recipe (rp_uid string, recipe_name string)") ``` ``` #delete spark.sql("drop table bs_motion") spark.sql("drop table row_strip_detail ") spark.sql("drop table measure_item") spark.sql("drop table leadframe ") spark.sql("drop table input_magazine ") spark.sql("drop table lot ") spark.sql("drop table machine ") spark.sql("drop table recipe ") ``` ``` spark.sql("INSERT INTO measure_item VALUES (0,'ball_shear','wire'),(1,'wire_pull','wire'),(2,'neck_pull','wire'),(3,'stitch_pull','wire'),(4,'inner_ball_size_x','wire'),(5,'inner_ball_size_y','wire'),(6,'placement','die'),(7,'tilt','die')") ``` ``` spark.sql("CREATE TABLE IF NOT EXISTS qa_item ( STRING, ms_item_uid STRING, strip_detail_uid STRING, rp_uid STRING, l_uid STRING, mc_uid STRING, value INT, result STRING, spec_lcl INT, spec_ucl INT, spec_lsl INT, spec_usl INT) STORED AS ORC") spark.sql("CREATE TABLE IF NOT EXISTS inner_ball_size_x (date STRING, strip_detail_uid STRING, rp_uid STRING, l_uid STRING, mc_uid STRING, value INT, result STRING, spec_lcl INT, spec_ucl INT, spec_lsl INT, spec_usl INT) STORED AS ORC") spark.sql("CREATE TABLE IF NOT EXISTS inner_ball_size_y (date STRING, strip_detail_uid STRING, rp_uid STRING, l_uid STRING, mc_uid STRING, value INT, result STRING, spec_lcl INT, spec_ucl INT, spec_lsl INT, spec_usl INT) STORED AS ORC") spark.sql("CREATE TABLE IF NOT EXISTS ball_shear (date STRING, strip_detail_uid STRING, rp_uid STRING, l_uid STRING, mc_uid STRING, value INT, result STRING, spec_lcl INT, spec_ucl INT, spec_lsl INT, spec_usl INT) STORED AS ORC") --- # delete spark.sql("drop table inner_ball_size_x ") spark.sql("drop table inner_ball_size_y ") spark.sql("drop table ball_shear ") ``` ``` spark.sql("CREATE TABLE IF NOT EXISTS ball_shear (value INT, result STRING, spec_lcl INT, spec_ucl INT, spec_lsl INT, spec_usl INT) PARTITIONED BY (date STRING, strip_detail_uid STRING, rp_uid STRING, l_uid STRING, mc_uid STRING) CLUSTERED BY (value, result) INTO 4 BUCKETS STORED AS ORC") spark.sql("CREATE TABLE IF NOT EXISTS qa_wire_based_stats (wire_no INT, pass_num INT, reject_num INT, min FLOAT, max FLOAT, bin_list ARRAY<ARRAY<FLOAT>>, bin_count ARRAY<FLOAT>) PARTITIONED BY (measure_item STRING, date INT, hour INT, machine_id STRING, recipe_name STRING, lot_id STRING) CLUSTERED BY (wire_no) INTO 100 BUCKETS STORED AS ORC") spark.sql("CREATE TABLE IF NOT EXISTS qa_unit_based_stats (unit_no INT, pass_num INT, reject_num INT, min FLOAT, max FLOAT, bin_list ARRAY<ARRAY<FLOAT>>, bin_count ARRAY<FLOAT>) PARTITIONED BY (measure_item STRING, date INT, hour INT, machine_id STRING, recipe_name STRING, lot_id STRING) CLUSTERED BY (unit_no) INTO 100 BUCKETS STORED AS ORC") spark.sql("CREATE TABLE IF NOT EXISTS predict_wire_based_stats (wire_no INT, pass_num INT, reject_num INT, min FLOAT, max FLOAT, bin_list ARRAY<ARRAY<FLOAT>>, bin_count ARRAY<FLOAT>) PARTITIONED BY (model_version INT, measure_item STRING, date INT, hour INT, machine_id STRING, recipe_name STRING, lot_id STRING) CLUSTERED BY (wire_no) INTO 100 BUCKETS STORED AS ORC") spark.sql("CREATE TABLE IF NOT EXISTS predict_unit_based_stats (unit_no INT, pass_num INT, reject_num INT, min FLOAT, max FLOAT, bin_list ARRAY<ARRAY<FLOAT>>, bin_count ARRAY<FLOAT>) PARTITIONED BY (model_version INT, measure_item STRING, date INT, hour INT, machine_id STRING, recipe_name STRING, lot_id STRING) CLUSTERED BY (unit_no) INTO 100 BUCKETS STORED AS ORC") spark.sql("CREATE TABLE IF NOT EXISTS anomaly_bb_stats (pass_num INT, reject_num INT) PARTITIONED BY (model_version INT, date INT, hour INT, machine_id STRING, recipe_name STRING, lot_id STRING) STORED AS ORC") spark.sql("CREATE TABLE IF NOT EXISTS anomaly_bs_stats (pass_num INT, reject_num INT) PARTITIONED BY (model_version INT, date INT, hour INT, machine_id STRING, recipe_name STRING, lot_id STRING) STORED AS ORC") ``` Migrate

Syntax	Example	Reference
# Header	Header	基本排版
- Unordered List	Unordered List
1. Ordered List	Ordered List
- [ ] Todo List	Todo List
> Blockquote	Blockquote
Bold font	Bold font
Italics font	Italics font
~~Strikethrough~~	~~Strikethrough~~
19^th^	19^th
H~2~O	H₂O
++Inserted text++	Inserted text
==Marked text==	Marked text
[link text](https:// "title")	Link
![image alt](https:// "title")	Image
`Code`	`Code`	在筆記中貼入程式碼
```javascript var i = 0; ```	`var i = 0;`
:smile:		Emoji list
{%youtube youtube_id %}	Externals
$L^aT_eX$	L^aT_eX
:::info This is a alert area. :::	This is a alert area.