# FL 觀察檔案生成
跑了兩個訓練(hello-pt 和 hello-pt-tb),MONAI 跟 hello-tf2 環境需要另外安裝
```
pip3 install monai
pip3 install tensorflow
```
## Server:
訓練的時候會生成一個資料夾(名稱是job_id),結束訓練時會**自動銷毀**
主要的檔案路徑:
```
/poc/server/transfer/job-id/workspace
```
* FL_global_model.pt -- 全域模型(/app_server)
* cross_val_results.json -- 紀錄各站點訓練的準確率(? (/cross_site_val)


## Client:
訓練的時候會生成一個資料夾(名稱是job_id),結束訓練時會不會銷毀,有自己的本地model
主要的檔案路徑:
```
/poc/site-n/transfer/job-id
```
* local_model.pt -- 本地模型(/models)

## Admin:
需先使用download_job指令將server的模型下載到admin的資料夾內
主要的檔案路徑: (訓練完的檔案內容和server的完全一樣)
```
/poc/admin/transfer/job-id/workspace/
```

## 新的nvflare_reservation_slurm.sh
```shell
#Example
# bash nvflare_reservation.sh hpc-site-1,hpc-site-2 hospital-ncku-1,hospital-ncku-2
## argv for hpc and hospital
hpcList=$1
hospitalList=$2
## slurmQueue
function slurmQueue(){
#FILE="2.txt"
if [ "$queue" == "gpu" ]; then
$(cat > $FILE <<-END
#!/bin/bash
#SBATCH -A MST110490 # Account name/project number
#SBATCH -J nvflare_job # Job name
#SBATCH -p ngs1gpu # Partition Name 等同PBS裡面的 -q Queue name
#SBATCH -c 6 # 使用的core數 請參考Queue資源設定
#SBATCH --mem=90g # 使用的記憶體量 請參考Queue資源設定
#SBATCH --gres=gpu:1 # 使用的GPU數 請參考Queue資源設定
#SBATCH --mail-user=summerhill001@gmail.com # email
#SBATCH --mail-type=BEGIN,END # 指定送出email時機 可為NONE, BEGIN, END, FAIL, REQUEUE, ALL
#SBATCH -o log/%j.logi # Path to the standard output and error files relative to the working directory
END
)
else
$(cat > $FILE <<-END
#!/bin/bash
#SBATCH -A MST110490 # Account name/project number
#SBATCH -J nvflare_job # Job name
#SBATCH -p ngs7G # Partition Name 等同PBS裡面的 -q Queue name
#SBATCH -c 2 # 使用的core數 請參考Queue資源設定
#SBATCH --mem=7g # 使用的記憶體量 請參考Queue資源設定
#SBATCH --mail-user=summerhill001@gmail.com # email
#SBATCH --mail-type=BEGIN,END # 指定送出email時機 可為NONE, BEGIN, END, FAIL, REQUEUE, ALL
#SBATCH -o log/%j.logi # Path to the standard output and error files relative to the working directory
END
)
fi
}
## session num
nvflare_session=$(date '+%Y%m%d%H%M%S')
reservationFolder=/work/$(whoami)/nvflare/reservation_${nvflare_session}
mkdir -p ${reservationFolder}
touch ${reservationFolder}/serverDomain.txt
# 新增一個照時間命名的資料夾
local_folder_name="nvflare"_$(date '+%Y%m%d%H%M%S')
mkdir $local_folder_name
#######
## S3 function
$HOME/local/bin/aws --endpoint-url=http://s3.twcc.ai s3 sync ${reservationFolder} s3://nvflare2/reservation_${nvflare_session}
url_serverDomain=$($HOME/local/bin/aws --endpoint-url=http://s3.twcc.ai s3 presign s3://nvflare2/reservation_${nvflare_session}/serverDomain.txt --expires-in 604800)
#echo $url_serverDomain
## make server script
wget https://covid-19.nchc.org.tw/mynvflare/template_server.sh -O server.sh
sed -i "1i url=\"$url_serverDomain\"" server.sh
sed -i "s|nvflare/res|nvflare2/res|g" server.sh
FILE="/tmp/queue.txt"; queue="cpu"; slurmQueue;
mv server.sh server.sh.tmp
cat /tmp/queue.txt server.sh.tmp > server.sh
dos2unix server.sh
rm server.sh.tmp
$HOME/local/bin/aws --endpoint-url=http://s3.twcc.ai s3 cp server.sh s3://nvflare2/reservation_${nvflare_session}/server.sh
url_server_script=$($HOME/local/bin/aws --endpoint-url=http://s3.twcc.ai s3 presign s3://nvflare2/reservation_${nvflare_session}/server.sh --expires-in 604800)
#echo $url_server_script
nodeArr+=($url_server_script)
# 新增
mv server.sh $local_folder_name #移動檔案到資料夾
## make admin script
wget https://covid-19.nchc.org.tw/mynvflare/template_admin.sh -O admin.sh
sed -i "1i url=\"$url_serverDomain\"" admin.sh
dos2unix admin.sh
$HOME/local/bin/aws --endpoint-url=http://s3.twcc.ai s3 cp admin.sh s3://nvflare2/reservation_${nvflare_session}/admin.sh
url_admin_script=$($HOME/local/bin/aws --endpoint-url=http://s3.twcc.ai s3 presign s3://nvflare2/reservation_${nvflare_session}/admin.sh --expires-in 604800)
echo $url_admin_script
nodeArr+=($url_admin_script)
#新增
mv admin.sh $local_folder_name #移動檔案到資料夾
## make hpc-site script
if [ "$hpcList" != "none" ]; then
hpcArray=(${hpcList//,/ })
#hpcArray=("hpc-site-1" "hpc-site-2" "hpc-site-3")
for Site_name in ${hpcArray[@]}; do
#Site_name=hpc-site-1
wget https://covid-19.nchc.org.tw/mynvflare/template_hpc.sh -O ${Site_name}.sh
sed -i "1i url=\"$url_serverDomain\"" ${Site_name}.sh
sed -i "s|hpc-site-1|$Site_name|g" ${Site_name}.sh
FILE="/tmp/queue.txt"; queue="gpu"; slurmQueue;
mv ${Site_name}.sh ${Site_name}.sh.tmp
cat /tmp/queue.txt ${Site_name}.sh.tmp > ${Site_name}.sh
dos2unix ${Site_name}.sh
rm ${Site_name}.sh.tmp
$HOME/local/bin/aws --endpoint-url=http://s3.twcc.ai s3 cp ${Site_name}.sh s3://nvflare2/reservation_${nvflare_session}/${Site_name}.sh
url_hpcsite_script=$($HOME/local/bin/aws --endpoint-url=http://s3.twcc.ai s3 presign s3://nvflare2/reservation_${nvflare_session}/${Site_name}.sh --expires-in 604800)
#echo ${url_hpcsite_script}
nodeArr+=($url_hpcsite_script)
#新增
mv ${Site_name}.sh $local_folder_name #移動檔案到資料夾
done
fi
## make hospital-site script
if [ "$hospitalList" != "none" ]; then
hospitalArray=(${hospitalList//,/ })
#hospitalArray=("hospital-site-1" "hospital-site-2" "hospital-site-3")
for Site_name in ${hospitalArray[@]}; do
#Site_name=hospital-site-1
wget https://covid-19.nchc.org.tw/mynvflare/template_hospital.sh -O ${Site_name}.sh
sed -i "1i url=\"$url_serverDomain\"" ${Site_name}.sh
sed -i "s|hospital-site-1|$Site_name|g" ${Site_name}.sh
dos2unix ${Site_name}.sh
$HOME/local/bin/aws --endpoint-url=http://s3.twcc.ai s3 cp ${Site_name}.sh s3://nvflare2/reservation_${nvflare_session}/${Site_name}.sh
url_hospitalsite_script=$($HOME/local/bin/aws --endpoint-url=http://s3.twcc.ai s3 presign s3://nvflare2/reservation_${nvflare_session}/${Site_name}.sh --expires-in 604800)
#echo $url_hospitalsite_script
nodeArr+=($url_hospitalsite_script)
done
fi
for nodeurl in ${nodeArr[@]}; do
echo $nodeurl
done
```