(10/31)Meeting 6

# Meeting 6 :::info Statistica Sinica 7(1997) Article: [**STATISTICAL ANALYSIS OF REMOVAL EXPERIMENTS WITH THE USE OF AUXILLARY VARIABLES**](https://hub.hku.hk/bitstream/10722/45345/1/27454.pdf) 參考(information matrix) https://www.statlect.com/glossary/information-matrix ::: ### :small_blue_diamond: **Date: 10/31** ### :small_blue_diamond: **Time: 13:30 ~ ?** ## 雲端 https://drive.google.com/drive/folders/1_H-hiQwKZdaZzbxsiy8ojrdJ7wTPOMID?usp=sharing ## 函式simu ``` library(Rlab) library(plotrix) library(ggplot2) library(stats4) simu = function(){ #設定變數 #捕捉次數: n n = 8 sex = rbern(300, prob = 0.5) wt = rnorm(300, mean = 3, sd = 1) env = array(rnorm(300*n, mean = 2, sd = 1), c(300, n)) z_i_j = array(0, c(300, n)) population = 0 num = 0 for(i in 1:n){ z_i_j[,i] = -7 - sex + 1.5*wt + env[,i] } #n: 一次Simulation中捕捉 n 次 #z_ij: log linear model Simulation = function(n, z_i_j){ p_ij = exp(z_i_j)/(1 + exp(z_i_j)) Be_Caught = array(0, c(300, n)) num = 0 #每個體被捕捉機率(n次) for(i in 1:300){ for(j in 1:n){ Be_Caught[i, j] = rbern(1, prob = p_ij[i, j]) } } #因為不放回抽樣, 改善Be_Caught[i, j], 使得每列只有一個1, 其餘為0 for(i in 1:300){ for(j in 1:n){ if(Be_Caught[i, j] == 1){ num = num + 1 Be_Caught[i, ] = 0 Be_Caught[i, j] = 1 break } } } result = list(Be_Caught = Be_Caught, Be_Caught_num = num) return(result) } test = Simulation(n, z_i_j) #捕捉數量 num = test$Be_Caught_num #被捕捉的01矩陣 Be_Caught = test$Be_Caught #個體被捕捉的行列值 Index_Be_Caught = 0 for(j in 1:n){ Index_Be_Caught = append(Index_Be_Caught, c(which(Be_Caught[,j]==1))) } Index_Be_Caught = Index_Be_Caught[-1] #排序 Order_Be_Caught = sort(Index_Be_Caught) Order_Be_Caught #mle1 mLogL1 = function(b1, b2, b3, b4){ ans = 0 z_ij = array(0, c(300, n)) for(j in 1:n){ z_ij[,j] = b1 + b2*sex + b3*wt + b4*env[,j] } p_ij = exp(z_ij)/(1 + exp(z_ij)) q_ij = 1 - p_ij q_i = rep(1, num) #個體在實驗中沒被抓到的機率 for(i in 1:num){ index_indvidual = Order_Be_Caught[i] q_i[i] = q_i[i]*prod(q_ij[index_indvidual, ]) ans = ans - log(1 - q_i[i]) } #loglikhihood time_be_Caught = 1 num_order_bc = length(which(Be_Caught[, time_be_Caught] == 1)) for(i in 1:num){ if(num_order_bc == 0){ time_be_Caught = time_be_Caught + 1 # cat("\n",i,"次 time_be_Caught = ", time_be_Caught) num_order_bc = length(which(Be_Caught[, time_be_Caught] == 1)) } n3 = Index_Be_Caught[i] # cat("\n",i,"次 n3 = ", n3) for(j in 1:n){ if(j != time_be_Caught){ ans = ans + log(q_ij[n3, j]) # cat("\n",i,"次 q = ", j) }else{ ans = ans + log(p_ij[n3, j]) # cat("\n",i,"次 p = ", j) break } } num_order_bc = num_order_bc - 1 # cat("\n",i,"次 ans = ", ans) } return(-ans) } mLogL1(-7, -1, 1.5, 1) #mle1 mle.result = mle(minuslog = mLogL1, start = list(b1 = -7, b2 = -1, b3 = 1.5, b4 = 1)) # summary(mle.result) # cat("\n mle.result =") # print(summary(mle.result)) #帶入beta估計 #1 b1 = mle.result@coef[1] b2 = mle.result@coef[2] b3 = mle.result@coef[3] b4 = mle.result@coef[4] # b1 = -7 # b2 = -1 # b3 = 1.5 # b4 = 1 z_i_j_estimate = array(0, c(300, n)) for(i in 1:n){ z_i_j_estimate[,i] = b1 + b2*sex + b3*wt + b4*env[,i] } p_i_j_estimate = exp(z_i_j_estimate)/(1 + exp(z_i_j_estimate)) q_i_j_estimate = 1 - p_i_j_estimate num = length(Order_Be_Caught) population = 0 #horvits for(i in 1:num){ n3 = Order_Be_Caught[i] population = population + (1 - prod(q_i_j_estimate[n3,]))^-1 # cat("\n population =", population ) } # cat("\n population =", population ) v = population result = data.frame(num, v) return(result) } #跑simulation 1000 次 test1 = simu() test1 for(i in 1:1000){ cat("\n 第", i,"次") test1 = rbind(test1, simu()) } #Simulation results for ν = 300 #v num_mean = mean(test1$num) num_sd = sd(test1$num) v_mean = mean(test1$v) v_sd = sd(test1$v) #v(<1000) which(test1$v > 1000) v_1000 = test1$v[-which(test1$v > 1000)] v_1000_mean = mean(v_1000) v_1000_sd = sd(v_1000) result = data.frame(num_mean, num_sd, v_mean, v_sd, v_1000_mean, v_1000_sd) result plot(test$v, xlab = "t = 4", ylab = "v") hist(v_1000, xlim = c(200,1000)) ggplot(test, aes(v), bins = 30)+geom_histogram(bins=30) #-------- #個體在實驗中沒被抓到的機率 for(i in 1:num){ n3 = Order_Be_Caught[i] q_i_estimate[i] = q_i_estimate[i]*prod(q_i_j_estimate[n3,]) } #個體在實驗中沒被抓到的機率 for(i in 1:num){ for(j in 1:n){ n3 = Order_Be_Caught[i] q_i_estimate[i] = q_i_estimate[i]*(q_i_j_estimate[n3,j]) } } ``` ## 函式simu內部 ``` library(Rlab) library(plotrix) library(stats4) #設定變數 #捕捉次數: n n = 8 sex = rbern(300, prob = 0.5) wt = rnorm(300, mean = 3, sd = 1) env = array(rnorm(300*n, mean = 2, sd = 1), c(300, n)) z_i_j = array(0, c(300, n)) population = 0 num = 0 for(i in 1:n){ z_i_j[,i] = -7 - sex + 1.5*wt + env[,i] } #n: 一次Simulation中捕捉 n 次 #z_ij: log linear model Simulation = function(n, z_i_j){ p_ij = exp(z_i_j)/(1 + exp(z_i_j)) Be_Caught = array(0, c(300, n)) num = 0 #每個體被捕捉機率(n次) for(i in 1:300){ for(j in 1:n){ Be_Caught[i, j] = rbern(1, prob = p_ij[i, j]) } } #因為不放回抽樣, 改善Be_Caught[i, j], 使得每列只有一個1, 其餘為0 for(i in 1:300){ for(j in 1:n){ if(Be_Caught[i, j] == 1){ num = num + 1 Be_Caught[i, ] = 0 Be_Caught[i, j] = 1 break } } } result = list(Be_Caught = Be_Caught, Be_Caught_num = num) return(result) } test = Simulation(n, z_i_j) #捕捉數量 num = test$Be_Caught_num #被捕捉的01矩陣 Be_Caught = test$Be_Caught #個體被捕捉的行列值 Index_Be_Caught = 0 for(j in 1:n){ Index_Be_Caught = append(Index_Be_Caught, c(which(Be_Caught[,j]==1))) } Index_Be_Caught = Index_Be_Caught[-1] #排序 Order_Be_Caught = sort(Index_Be_Caught) Order_Be_Caught #mle1 mLogL1 = function(b1, b2, b3, b4){ ans = 0 z_ij = array(0, c(300, n)) for(j in 1:n){ z_ij[,j] = b1 + b2*sex + b3*wt + b4*env[,j] } p_ij = exp(z_ij)/(1 + exp(z_ij)) q_ij = 1 - p_ij q_i = rep(1, num) #個體在實驗中沒被抓到的機率 for(i in 1:num){ index_indvidual = Order_Be_Caught[i] q_i[i] = q_i[i]*prod(q_ij[index_indvidual, ]) ans = ans - log(1 - q_i[i]) } #loglikhihood time_be_Caught = 1 num_order_bc = length(which(Be_Caught[, time_be_Caught] == 1)) for(i in 1:num){ if(num_order_bc == 0){ time_be_Caught = time_be_Caught + 1 # cat("\n",i,"次 time_be_Caught = ", time_be_Caught) num_order_bc = length(which(Be_Caught[, time_be_Caught] == 1)) } n3 = Index_Be_Caught[i] # cat("\n",i,"次 n3 = ", n3) for(j in 1:n){ if(j != time_be_Caught){ ans = ans + log(q_ij[n3, j]) # cat("\n",i,"次 q = ", j) }else{ ans = ans + log(p_ij[n3, j]) # cat("\n",i,"次 p = ", j) break } } num_order_bc = num_order_bc - 1 # cat("\n",i,"次 ans = ", ans) } return(-ans) } mLogL1(-7, -1, 1.5, 1) #mle1 mle.result = mle(minuslog = mLogL1, start = list(b1 = -7, b2 = -1, b3 = 1.5, b4 = 1)) summary(mle.result) # cat("\n mle.result =") # print(summary(mle.result)) #帶入beta估計 #1 b1 = mle.result@coef[1] b2 = mle.result@coef[2] b3 = mle.result@coef[3] b4 = mle.result@coef[4] # b1 = -7 # b2 = -1 # b3 = 1.5 # b4 = 1 z_i_j_estimate = array(0, c(300, n)) for(i in 1:n){ z_i_j_estimate[,i] = b1 + b2*sex + b3*wt + b4*env[,i] } p_i_j_estimate = exp(z_i_j_estimate)/(1 + exp(z_i_j_estimate)) q_i_j_estimate = 1 - p_i_j_estimate num = length(Order_Be_Caught) population = 0 #horvits for(i in 1:num){ n3 = Order_Be_Caught[i] population = population + (1 - prod(q_i_j_estimate[n3,]))^-1 # cat("\n population =", population ) } cat("\n population =", population ) v = population ``` ## 結果 # 8次 ## 1 ![](https://hackmd.io/_uploads/ByM-EGTMa.png) ## 2 ![](https://hackmd.io/_uploads/HkvRGbAGp.png) # 6次 ## 1 ![](https://hackmd.io/_uploads/H1vABzafa.png) ## 2 ![](https://hackmd.io/_uploads/BJBjQWCMp.png) # 4次 ## 1 ![](https://hackmd.io/_uploads/HJLZHMaGp.png) ## 2 ![](https://hackmd.io/_uploads/rkXMNbCMT.png)