https://vghteams-my.sharepoint.com/personal/ycchu5_vghtpe_gov_tw/_layouts/15/onedrive.aspx?id=%2Fpersonal%2Fycchu5%5Fvghtpe%5Fgov%5Ftw%2FDocuments%2F2024%2C%20%E6%9C%B1%E5%8E%9F%E5%98%89%2C%20%E7%A0%94%E7%A9%B6%E8%A8%88%E7%95%AB%2F2024%2C%20%E9%BB%83%E7%8F%8F%E5%80%AA%2C%20%E5%85%AC%E4%BA%8B%E5%8C%85&ga=1
# ICD & PCS
> 重開機需要重新mount ddn及isilon
> isilon : mount bigdata.vghtpe.local:/ifs/isilond /isilon_share
> ddn : sudo mount -t lustre 172.31.255.200@o2ib0:172.31.255.201@o2ib0:/FK /aifs
## 1. 所有data路徑
dgx
1. ICD : /aifs/isc5223/FL_disc_2020_2021/icd-10/
data : /aifs/isc5223/FL_disc_2020_2021/icd-10/data/
label : /aifs/isc5223/FL_disc_2020_2021/icd-10/label/
2. PCS : /aifs/isc5223/FL_OPCODE/icd10/
data : /aifs/isc5223/FL_OPCODE/icd10/data/
label : /aifs/isc5223/FL_OPCODE/icd10/label/
## 2. 開啟jupyter lab
### 2.1 build docker
> `docker run -it --rm --name ICD_test --gpus all --shm-size=300g -v /aifs/isc5223/:/mount/src -w /mount/src -p 8089:8888 -d fl:cuda12.2_py310 jupyter lab --ip 0.0.0.0 --allow-root`
### 2.2 開啟jupyter lab (jupyterlab pwd:!QAZ2wsx)
`http://<docker build在哪台IP>:8089/lab?`
### 2.3 train指令
> CUDA_VISIBLE_DEVICES : 指定第幾顆GPU
> --model : 要train model名稱
> --task : output路徑名稱
> 
> 修改input路徑 : args.input_dir = args.input_dir / 'CM'
> label路徑 : codelist = np.loadtxt(f'label/CM.csv', str, delimiter='\n') #載入診斷碼清單
> 
> 修改input路徑 : dataset = load_dataset('csv', data_files={'train': 'data/CM/train.csv', 'test': 'data/CM/test.csv'}, delimiter="\t", streaming=True)
> label路徑 : codelist = np.loadtxt(f'label/CM.csv', str) # 載入診斷碼清單
#### 單顆GPU (run_trainer_icdname.py)
> `CUDA_VISIBLE_DEVICES=0 python run_trainer_icdname.py --model BioMegatron345mUncased --fp16 --task CM_BioMegatron345mUncased --train --num_train_epochs 1000 --save_total_limit 1`
#### 多GPU (run_trainer_FL_gpu_large_streaming.py)
> `torchrun --nproc_per_node 8 --nnodes 1 run_trainer_FL_gpu_large_streaming.py --fp16 --model BioMegatron345mUncased --task CM_BioMegatron345mUncased --train --num_train_epochs 1000 --save_total_limit 1 --per_device_train_batch_size 16 --per_device_eval_batch_size 16`
# Parabircks
## 1. 路徑
dgx01(10.221.253.64) : /raid/nvidia/
## 2. docker
```
run_env.sh
run_jupyter.sh
```
jupyter lab : http://10.221.253.64:8089/lab
# Dr.Gupta會使用的server(10.221.252.12)
* 環境 : anaconda
* 資料路徑(isilon) :
liver CT : /data/liver_CT/HCCCT/
# 補值SQL
> PFKEY需查BDCPFILE
1. 2023, 陳涵栩, Acromegaly(median補)
* 計算vital中位數group by 年份
```
with a as (
select *,"RDATE"::date "date",date_part('y',"RDATE")::varchar "y_date" from "BDC_TECH"."BDCVITAL"
where "CHARTID" in (病歷號)
and "ITEM" in ('WT','SBP','DBP') and "OUTCOME" <> 0
and "RDATE">='2008-01-01' and "RDATE"<='2023-12-31'
)
select "CHARTID","y_date","ITEM",percentile_disc(0.5) WITHIN GROUP (ORDER BY "OUTCOME") "q2_OUTCOME"
from a
group by "CHARTID","y_date","ITEM"
order by "CHARTID","y_date","ITEM"
;
```
* 計算lab data中位數group by 年份(從lab找)
```
with a as (
select "CHARTID", date_part('y',"SDATE")::varchar "y_date","ITEM",(regexp_match("OUTCOME",'(\d+(\.\d+)?)'))[1]::numeric "OUTCOME" from "BDC_TECH"."LAB"
where "CHARTID" in (病歷號)
and (("PFKEY" = '90109A06' and "ITEM" = 'HBA1C')
or ("ITEM" = 'CHOL'))
and (regexp_match("OUTCOME",'(\d+(\.\d+)?)'))[1]::numeric <> 0
and "SDATE">='2008-01-01' and "SDATE"<='2023-12-31')
select "CHARTID","y_date","ITEM",percentile_disc(0.5) WITHIN GROUP (ORDER BY "OUTCOME") "q2_OUTCOME" from a
group by "CHARTID","y_date","ITEM"
order by "CHARTID","y_date","ITEM"
;
```
* 計算lab data中位數group by 年份(從個別lab table找)
```
with a as (
select "CHARTID", date_part('y',"SDATE")::varchar "y_date",'HBA1C' "ITEM",(regexp_match("OUTCOME",'(\d+(\.\d+)?)'))[1]::numeric "OUTCOME" from "BDC_TECH"."BDCLHBA1C"
where "CHARTID" in (病歷號)
and "PFKEY" = '90109A06' and (regexp_match("OUTCOME",'(\d+(\.\d+)?)'))[1]::numeric <> 0
and "SDATE">='2008-01-01' and "SDATE"<='2023-12-31'
union
select "CHARTID", date_part('y',"SDATE")::varchar "y_date",'TC' "ITEM",(regexp_match("OUTCOME",'(\d+(\.\d+)?)'))[1]::numeric "OUTCOME" from "BDC_TECH"."BDCLCHOL"
where "CHARTID" in (病歷號)
and (regexp_match("OUTCOME",'(\d+(\.\d+)?)'))[1]::numeric <> 0
and "SDATE">='2008-01-01' and "SDATE"<='2023-12-31'
)
select "CHARTID","y_date","ITEM",percentile_disc(0.5) WITHIN GROUP (ORDER BY "OUTCOME") "q2_OUTCOME" from a
group by "CHARTID","y_date","ITEM"
order by "CHARTID","y_date","ITEM"
;
```