Statistics - Part 1 with Lyes LAKHAL

# Statistics - Part 1 with Lyes LAKHAL ###### tags: emPLANT+, 第一年法國 UniLaSalle, 上學期 **Use R Studio** R程式轉換語言 https://metalanguageinuse.com/how-to-change-language-setting-to-english-in-r-rstudio-a-temporary-measure/ 課程時間表 ![](https://i.imgur.com/LDocRkO.png) ![](https://i.imgur.com/gYQIGYY.png) ``` #read csv files from my dell computer x = read.csv(file.choose()) x ``` ![](https://i.imgur.com/R7uY6K7.png) Practice 1 * 題目: [https://www.r-bloggers.com/2016/09/examining-data-exercises](https://www.r-bloggers.com/2016/09/examining-data-exercises/) Class 7 * 不同檢定方式的差別整理: [https://mropengate.blogspot.com/2015/03/hypothesis-testing-p-value.html](https://mropengate.blogspot.com/2015/03/hypothesis-testing-p-value.html) * 統計假說檢定參考資料:[https://slidesplayer.com/slide/16909216/](https://slidesplayer.com/slide/16909216/) Class 11 統計分析 * dnorm, pnorm, and qnorm 的差異: [https://www.jianshu.com/p/a24eb1b94177](https://www.jianshu.com/p/a24eb1b94177) ![](https://i.imgur.com/iX3QuHA.png) ![](https://i.imgur.com/MElaWiW.png) ![](https://i.imgur.com/79hsg14.png) ![](https://i.imgur.com/V0WjS1p.png) ![](https://i.imgur.com/RadEnBZ.png) ![](https://i.imgur.com/oGXbm6X.png) ![](https://i.imgur.com/YAN4HEo.png) ![](https://i.imgur.com/mqoinL1.png) ![](https://i.imgur.com/qxC76aB.png) **1. 虛無假設 (null hypothesis, H0):** Null = nothing! 虛無假設表示 “什麼也沒發生” 的假設。通常虛無假設是表示觀測值完全反應隨機變異的結果 **2. 對立假設 (alternaive hypothesis, Ha):** 某種效應的確存在! 因此觀測值是此效應加上隨機變異的結果 ![](https://i.imgur.com/HIc4iCC.png) ![](https://i.imgur.com/ZNrSmQu.png) ![](https://i.imgur.com/JRSrF2j.png) **3. 連續變數資料分析** ![](https://i.imgur.com/2WEk1ZF.png) D: 單獨 P: 連續如果要計算連續的不能用很多個 d 相加會有誤差如果要計算連續的區間要相減的話要記得減一個數字不能用要的數值它會涵蓋在裡面 quantitative (定量的) qualitative (定性的) --- ## Code in Class ### **1. Class 1 & 2** ``` # 2021-2022 Univariate statistics – emPLANT & 4A Plant Breeding # vector (向量), numeric (數字) x1 <- c(1, 2, 3) x1 # numeric (數字) x2 <- 10.5 x2 class(x2) # logical(邏輯，是/否) x3 <- 1 y3 = 2 z3 = x3 > y3 z3 class(z3) # "&" means and, "|" means or, and "!" means negation(否定) # Character fname <- "Judy" fname class(fname) lname <- "Lin" lname paste(fname, lname) sprintf("%s has %d dollars", "Sam", 100) # %s mean give character # %d mean give numeric # 輸入數值到指定空格 substr("Mary has a little lamb.", start=3, stop=12) # replace sub("little", "big", "Mary has a little lamb.") # 計算長度 length(c("aa", "bb", "cc", "dd", "ee")) #合併 n1 <- c("aa", "bb", "cc") s1 <- c(2,3,5) s2 = c("aa", "bb", "cc", "dd", "ee") c1 <- c(n1, s1) c1 # 乘法計算 a1 = c(1, 3, 5, 7) b1 = c(1, 2, 4, 8) d1 <- 5 * a1 d1 # 加法計算 u1 = c(10, 20, 30) v1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9) e1 <- u1 + v1 e1 # 不太可能有辦法將兩個擁有不同數量的數值相加，u會一直重複直到達到v的量值 # 指定 s3 = c("aa", "bb", "cc", "dd", "ee") s3[3] s3[c(2, 3)] s3[c(2, 3, 3)] s[c(2, 1, 3)] s[2:4] # 減掉特定值 s3[-3] s3[10] # Matrix (矩陣) A1 = matrix( c(2, 4, 3, 1, 5, 7), nrow=2, ncol=3, byrow = TRUE) A1 # nrow直行3，ncol橫列2 # Fill the byrow，並非必要(可以選擇性的輸入)，如果不輸入的話系統會自動帶入Fill the bycol A1[2, 3] # element at 2nd row, 3rd column A1[2, ] # the 2nd row # []裡面的數值，前面代表直行row，後面代表橫列column A1[ ,3] # the 3rd column A1[ ,c(1,3)] # 命名直行row與橫列column dimnames(A1) = list( c("row1", "row2"), c("col1", "col2", "col3")) A1 # print A1 A1["row2", "col3"] # element at 2nd row, 3rd column B1 = matrix( c(2, 4, 3, 1, 5, 7), nrow=3, ncol=2) B1 # B1 has 3 rows and 2 columns t(B1) # transpose of B (矩陣轉向) C1 = matrix( c(7, 4, 2), nrow=3, ncol=1) C1 # C has 3 rows # 矩陣結合(相同直行row) f1 <- cbind(B1, C1) f1 # cbind將矩陣連結，兩矩陣需要有相同的直行row，若是不符合規定則無法將兩不同矩陣連結 D1 = matrix( c(6, 2), nrow=1, ncol=2) D1 # 矩陣結合(相同橫列column) g1 <- rbind(B1, D1) g1 # rbind將矩陣連結，兩矩陣需要有相同的橫列column，若是不符合規定則無法將兩不同矩陣連結 # 將矩陣轉成向量 h1 <- c(B1) h1 # List (表) n2 = c(2, 3, 5) s4 = c("aa", "bb", "cc", "dd", "ee") b2 = c(TRUE, FALSE, TRUE, FALSE, FALSE) x1 = list(n2, s4, b2, 3) # x contains copies of n2, s4, b2 x1 # 指定 x1[2] x1[c(2, 4)] # 取代 x1[[2]][1] = "ta" x1[[2]] # 如果需要將數值置換掉記得要加上”” v1 = list(bob=c(2, 3, 5), john=c("aa", "bb")) v1 v1["bob"] v1$bob v1[c("john", "bob")] # Data frame (數據框) # 必須要有相同的數字值 n3 = c(2, 3, 5) s5 = c("aa", "bb", "cc") b3 = c(TRUE, FALSE, TRUE) df1 = data.frame(n3, s5, b3) df1 mtcars # 呼叫資料可以用直行橫列或是資料項目的名稱 nrow(mtcars) # number of data rows ncol(mtcars) # number of columns head(mtcars) # 橫列用雙引號"[[]]" mtcars[[9]] mtcars[["am"]] mtcars$am mtcars[,"am"] # 直行用單引號"[]" mtcars[1] mtcars["mpg"] mtcars[c("mpg", "hp")] mtcars[24,] mtcars[c(3, 24),] mtcars["Camaro Z28",] mtcars[c("Datsun 710", "Camaro Z28"),] # 1比對值 L1 = mtcars$am == 0 L1 mtcars[L1,] mtcars[L1,]$mpg ``` ### **2. Class 3** ``` head(faithful) duration = faithful$eruptions range(duration) step=0.1 r = range(duration) minval = r[1] maxval = r[2] minval maxval breaks = seq(minval-step, maxval+step, by=step) breaks duration.cut = cut(duration, breaks, right=FALSE) breaks duration duration.cut duration.freq = table(duration.cut) duration.freq cbind(duration.freq) duration = faithful$eruptions hist(duration, right=FALSE) colors = c("red", "yellow", "green", "violet", "orange", "blue", "pink", "cyan") hist(duration, right=FALSE, col=colors, main="Old Faithful Eruptions", xlab ="Duration minutes") duration = faithful$eruptions breaks = seq(1.5, 5.5, by=0.5) duration.cut = cut(duration, breaks, right=FALSE) duration.freq = table(duration.cut) SS = nrow(faithful) duration.relfreq = duration.freq / SS duration.relfreq duration.relfreq cbind(duration.freq, duration.relfreq) duration = faithful$eruptions breaks = seq(1.5, 5.5, by=0.5) duration.cut = cut(duration, breaks, right=FALSE) duration.freq = table(duration.cut) duration.cumfreq = cumsum(duration.freq) duration.cumfreq cbind(duration.cumfreq) duration = faithful$eruptions breaks = seq(1.5, 5.5, by=0.5) duration.cut = cut(duration, breaks, right=FALSE) duration.cut duration.freq = table(duration.cut) duration.cumfreq = cumsum(duration.freq) duration.cumfreq cumfreq0 = c(0, duration.cumfreq) cumfreq0 plot(breaks, cumfreq0, main="Old Faithful Eruptions", xlab="Duration minutes", ylab ="Cumulative eruptions") lines(breaks, cumfreq0) duration = faithful$eruptions breaks = seq(1.5, 5.5, by=0.5) duration.cut = cut(duration, breaks, right=FALSE) duration.freq = table(duration.cut) duration.freq duration.cumfreq = cumsum(duration.freq) ss = nrow(faithful) duration.cumrelfreq = duration.cumfreq /ss duration.cumrelfreq cbind(duration.cumrelfreq) duration=faithful$eruptions waiting=faithful$waiting plot(duration, waiting, xlab="Eruption duration", ylab="Time waiting") ``` ![](https://i.imgur.com/npPkg0R.png) ### **3. Class 4** ``` duration = faithful$eruptions a1 <- mean(duration) duration = faithful$eruptions a2 <- median(duration) a1 a2 duration = faithful$eruptions a3 <- quantile(duration) a3 duration = faithful$eruptions quantile(duration, c(.32, .57, .98)) duration = faithful$eruptions max(duration) min(duration) a4 <- max(duration) - min(duration) a4 duration = faithful$eruptions boxplot(duration, horizontal=TRUE) duration = faithful$eruptions a5 <- var(duration) a5 duration = faithful$eruptions a6 <- sd(duration) a6 z <- (1/3)*((4-2.5)*(7-4.5)+(2-2.5)*(4-4.5)+(3-2.5)*(2-4.5)+(1-2.5)*(5-4.5)) z duration = faithful$eruptions waiting = faithful$waiting a7 <- cov(duration, waiting) a7 library(e1071) duration = faithful$eruptions moment(duration, order=3, center=TRUE) ``` ### **4. Class 5** ``` d1 <- dbinom(4, size=12, prob=0.2) d1 d2 <- dbinom(0, size=12, prob=0.2) + dbinom(1, size=12, prob=0.2) + dbinom(2, size=12, prob=0.2) + dbinom(3, size=12, prob=0.2) + dbinom(4, size=12, prob=0.2) pbinom(4, size=12, prob=0.2) d2 p1 <- ppois(16, lambda=12) p2 <- ppois(16, lambda=12, lower=FALSE) p1 p2 e1 <- pexp(2, rate=1/3) e1 n1 <- pnorm(84, mean=72, sd=15.2, lower.tail=FALSE) n1 n2 <- pnorm(84, mean=72, sd=15.2) n2 c1 <- qchisq(.95, df=7) c1 t1 <- qt(c(.025, .975), df=5) t1 f1 <- qf(.95, df1=5, df2=2) f1 ``` ### **5. Class 6 & 7** ``` library(MASS) survey help(survey) height.survey <- survey$Height height.survey xbar <- mean(height.survey, na.rm=TRUE) xbar library(MASS) height.response = na.omit(survey$Height) n = length(height.response) sigma <- 9.48 # population standard deviation sem = sigma/sqrt(n) sem # standard error of the mean alpha <- 0.05 E = qnorm(1- alpha/2)*sem E # margin of error mean1 = mean(height.response) mean1 + c(-E, E) # sample mean library(MASS) height.response = na.omit(survey$Height) n1 = length(height.response) s = sd(height.response) n1 s SE = s/sqrt(n) SE E = qt(.975, df=n-1)*SE E # qt function 是學生式 t 檢定 xbar2 = mean(height.response) xbar2 + c(-E, E) library(MASS) height.response = na.omit(survey$Height) n2 = length(height.response) s2 = sd(height.response) n2 s2 SE1 = s/sqrt(n) SE1 alpha2 = 0.01 E2 = qt(.99, df=n-1)*SE E2 xbar3 = mean(height.response) xbar3 + c(-E, E) t.test(height.response) alpha3 <- 0.05 zstar = qnorm(.975) sigma = 9.48 E = 1.2 zstar^2 * sigma^2/ E^2 library(MASS) gender.response = na.omit(survey$Sex) n4 = length(gender.response) k4 = sum(gender.response == "Female") n4 k4 pbar = k/n pbar SE = sqrt(pbar*(1-pbar)/n) SE prop.test(k, n) zstar = qnorm(.975) zstar p = 0.5 E = 0.05 p1 <- zstar^2*p*(1-p)/E^2 p1 sigma5 <- 120 mu05 <- 10000 xbar5 <- 9900 alpha5 <- 0.05 n5 <- 30 a5 <- (xbar5-mu05)/(sigma5/sqrt(n5)) a5 alpha5 = .05 z.alpha5 = qnorm(1-alpha5) z.alpha5 pval5 = pnorm(a5) pval5 sigma6 <- 0.25 mu06 <- 2 xbar6 <- 2.1 alpha6 <- 0.05 n6 <- 35 a6 <- (xbar6-mu06)/(sigma6/sqrt(n6)) a6 alpha6 = .05 z.alpha6 = qnorm(1-alpha6) z.alpha6 pval6 = pnorm(a6, lower.tail=FALSE) pval6 sigma7 <- 2.5 mu07 <- 15.4 xbar7 <- 14.6 alpha7 <- 0.05 n7 <- 35 a7 <- (xbar7-mu07)/(sigma7/sqrt(n7)) a7 alpha7 = .05 z.alpha7 = qnorm(1-alpha7/2) c(-z.alpha7, z.alpha7) sigma8 <- 125 mu08 <- 10000 xbar8 <- 9900 alpha8 <- 0.05 n8 <- 30 a8 <- (xbar8-mu08)/(sigma8/sqrt(n8)) a8 t.alpha8 = qt(1-alpha8, df = n8-1) t.alpha8 c(-t.alpha8, t.alpha8) ``` ### **6. Class 8** ``` xbar1 <- 9900 mu01 <- 10000 sd1 <- 125 n1 <- 30 t1 <- (xbar1-mu01)/(sd1/sqrt(n1)) t1 alpha1 <- 0.05 t.alpha1 <- qt(1-alpha1, df=n1-1) t.alpha2 <- c(t.alpha1, -t.alpha1) t.alpha2 xbar2 <- 2.1 mu02 <- 2.0 sd2 <- 0.3 n2 <- 35 t2 <- (xbar2-mu02)/(sd2/sqrt(n2)) t2 alpha2 <- 0.05 t.alpha2 <- qt(1-alpha2, df=n2-1) t.alpha3 <- c(t.alpha2, -t.alpha2) t.alpha3 xbar3 <- 14.6 mu03 <- 15.4 sd3 <- 2.5 n3 <- 35 t3 <- (xbar3-mu03)/(sd3/sqrt(n3)) t3 t.alpha3 <- 0.05 t.alpha4 <- qt(1-(t.alpha3/2), df = n3-1) t.alpha5 <- c(t.alpha4, -t.alpha4) t.alpha5 pbar <- 85/148 p04 <- 0.6 n4 <- 148 z4 <- (pbar-p04)/sqrt(p04*(1-p04)/n4) z4 z.alpha6 <- 0.05 z.alpha7 <- qnorm(1-t.alpha6) z.alpha8 <- c(z.alpha7, -z.alpha7) z.alpha8 ``` ### **7. Class 9** ``` library(MASS) head(immer) t.test(immer$Y1, immer$Y2, paired=TRUE) mtcars mtcars$mpg mtcars$am L = mtcars$am == 0 L mpg.auto = mtcars[L,]$mpg mpg.auto mpg.manual = mtcars[!L,]$mpg mpg.manual t.test(mpg.auto, mpg.manual) t.test(mpg ~ am, data=mtcars) ``` --- ## Code of Exercises ### **1. Exercises 1** ``` #question 1 islands N <- length(islands) N M1 <- mean(islands) M1 M2 <- median(islands) M2 BP <- boxplot(islands,horizontal=TRUE) R <- range(islands) R max(islands) min(islands) SD <- sd(islands) SD A <- quantile(islands, c(.0005, .95)) A ``` ### **2. Exercises 2** ``` #question 1 #A car dealer knows that from past experience he can make a sale to 20% of the customers that he interacts with. What is the probability that, in five randomly selected interactions, he will make a sale to: #a.) Exactly three customers? #b.) At most one customer? #c.) At least one customer? #d.) Calculate the probability for each number of sales ? (k = 0, 1, 2, 3, 4, 5) n1 <- 5 p01 <- 0.2 x1 <- 3 x2 <- c(0,1) x3 <- c(1,2,3,4,5) d1 <- dbinom(x1, size=n1, prob=p01) d1 d2 <- dbinom(x2, size=n1, prob=p01) d2 #sum together d2f <- 0.32768+0.40960 d2f d3 <- dbinom(x3, size=n1, prob=p01) d3 #sum together d3f <- 0.40960+0.20480+0.05120+0.00640+0.00032 d3f d4 <- dbinom(0, size=n1, prob=p01) d4 d5 <- dbinom(1, size=n1, prob=p01) d5 d6 <- dbinom(2, size=n1, prob=p01) d6 d7 <- dbinom(3, size=n1, prob=p01) d7 d8 <- dbinom(4, size=n1, prob=p01) d8 d9 <- dbinom(5, size=n1, prob=p01) d9 #question 2 #A machine is set up such that the average content of juice per bottle equals µ. A sample of 100 bottles yields an average content of 48cl. Calculate a 90% and a 95% confidence interval for the average content: #(i) Assuming that the population standard deviation σ = 5 cl. #(ii) Assuming that the population standard deviation σ is unknown. We suppose that the standard divination of the sample is equal to **s = 7**. n2 <- 100 xbar2 <- 48 sigma2 <- 5 alpha21 <- 0.1 alpha22 <- 0.05 sem2 <- sigma2/sqrt(n2) sem2 E21 <- qnorm(1-alpha21/2)*sem2 E21 E22 <- qnorm(1-alpha22/2)*sem2 E22 IE21 <- xbar2+c(-E21, E21) IE21 IE22 <- xbar2+c(-E22, E22) IE22 #we suppose that the standard divination of the sample is equal to s = 7 ml n2 <- 100 xbar2 <- 48 s2 <- 7 alpha21 <- 0.1 alpha22 <- 0.05 sem2 <- sigma2/sqrt(n2) sem2 E23 <- qt(1-alpha21/2, df = n2-1)*sem2 E23 E24 <- qt(1-alpha22/2, df = n2-1)*sem2 E24 IE23 <- xbar2+c(-E23, E23) IE23 IE24 <- xbar2+c(-E24, E24) IE24 #question 3 #A machine is set up such that the average content of juice per bottle equals μ. A sample of 100 bottles yields an average content of 48.8 cl. Test the hypothesis that the average content per bottle is 50 cl. at the 5% significance level: H0: n=50 #(i) Assuming that the population standard deviation σ = 5 cl. #(ii) Assuming that the population standard deviation σ is unknown. We suppose that the standard divination of the sample is equal to **s = 7 cl**. xbar3 <- 48.8 mu03 <- 50 n3 <- 100 sigma31 <- 5 z3 <- (xbar3-mu03)/(sigma31/sqrt(n3)) z3 alpha3 <- .05 z.alpha3 <- qnorm(1-alpha3/2) z.alpha3 z.alpha4 <- c(z.alpha3, -z.alpha3) z.alpha4 #teacher's answer xbar3 <- 48.8 # sample mean mu03 <- 50 # hypothesized value n3 <- 100 # sample size sigma32 <- 7 z3 <- (xbar3-mu03)/(sigma32/sqrt(n3)) z3 alpha3 <- .05 z.alpha5 <- qt(1-alpha3/2, df=n3-1) z.alpha5 z.alpha6 <- c(z.alpha5, -z.alpha5) z.alpha6 #question 4 #A machine is set up such that the average content of juice per bottle equals μ. A sample of 36 bottles yields an average content of 48.5 cl. Can you reject the hypothesis that the average content per bottle is less than or equal to 45 cl (upper tail) in favor of the alternative that it exceeds 45 cl (5% significance level) ? Assume that the population standard deviation σ = 5 cl. H0 <= 45 n4 <- 36 # sample size xbar4 <- 48.5 # sample mean mu04 <- 45 # hypothesized value sigma4 <- 5 # population standard deviation alpha4 <- 0.05 z4 <- (xbar4-mu04)/(sigma4/sqrt(n4)) z4 # u0 alpha4 <- 0.05 t.alpha5 <- qnorm(1-alpha4) t.alpha5 t.alpha6 <- c(t.alpha5, -t.alpha5) t.alpha6 # u #question 5 #60 customers reply they are satisfied with the service they received. The sample size is 80 customers. Calculate a 95% confidence interval for the proportion of satisfied customers. n5 <- 80 k5 <- 60 pbar <- k5/n5 alpha5 <- 0.05 z.half.alpha7 <- qnorm(1-alpha5/2) A5 <- sqrt(pbar*(1-pbar)/n5) B5 <- z.half.alpha7*A5 IE5 <- pbar + c(-B5, +B5) IE5 ``` ![](https://i.imgur.com/5bfK4Ao.png) ### **3. Exercises 3** ``` #question 1 #In the past the average length of an outgoing telephone call from a business office has been 143 seconds. A manager wishes to check whether that average has **decreased** after the introduction of policy changes. A sample of 100 telephone calls produced a mean of 133 seconds, with a standard deviation of 35 seconds. (from the sample not from the population) Perform the relevant test at the 1% level of significance. H0: mu <= mu0 (lower tail) ? n1 <- 100 # sample size xbar1 <- 133 # sample mean mu01 <- 143 # hypothesized value s1 <- 35 z1 <- (xbar1-mu01)/(s1/sqrt(n1)) z1 alpha1 <- 0.01 t.alpha1 <- qt(1-alpha1, df = n1-1) t.alpha1 #question 2 #The recommended daily calorie intake for teenage girls is 2 200 calories/day. A nutritionist at a state university believes the average daily caloric intake of girls in that **state to be lower**. Test that Hypothesis, at the 5% level of significance, against the null hypothesis that the population average is 2 200 calories/day using the following sample data:n = 36 (sample size) ; xbar = 2150 (sample mean) ; s = 203 (sample standard deviation) H1: u < u0 = 2200, H0: u > u0 (lower tail) ? n2 <- 36 xbar2 <- 2150 mu02 <- 2200 s2 <- 203 z2 <- (xbar2-mu02)/(s2/sqrt(n2)) z2 alpha1 <- 0.05 t.alpha1 <- qt(1-alpha1, df = n2-1) t.alpha2 <- c(-t.alpha1, t.alpha1) #since t > -t.alpha, then H0 is to be accepted #question 3 #A random sample is drawn from a population of known standard deviation 11,3. Construct a 90% confidence interval for the population mean based on the information given (not all the information given need be used). #a.) n = 36 ; xbar = 105.2 ; s = 11.2 ? #b.) n = 100 ; xbar =105.2 ; s = 11.2 ? n31 <- 36 xbar31 <- 105.2 sd31 <- 11.3 se31 <- 11.2 a31 <- qnorm(0.9,sd31) a31 answer1 <- mean(n31*xbar31/sd31*se31) answer1 n32 <- 100 xbar32 <- 105.2 sd32 <- 11.3 se32 <- 11.2 a32 <- qnorm(0.9, sd32) a32 answer2 <- mean(n32*xbar32/sd32*se32) answer2 #question 4 #A government agency was charged by the legislature with estimating the length of time it takes citizens to fill out various forms. Two hundred randomly selected adults were timed as they filled out a particular form. The times required had mean 12.8 minutes with standard deviation 1.7 minutes. Construct a 90% confidence interval for the mean time taken for all adults to fill out this form ? n4 <- 200 xbar4 <- 12.8 sigma4 <- 1.7 sem4 <- sigma4/sqrt(n4) alpha4 <- 0.1 a4 <- qnorm(1-alpha4/2)*sem4 a4 answer4 <- xbar4 + c(a4, -a4) answer4 #question 5 #The amount X of beverage in a can labeled 12 ounces is normally distributed with mean 12.1 ounces and standard deviation 0.05 ounce. A can is selected at random. #a.) Find the probability that the can contains at least 12 ounces ? #b.) Find the probability that the can contains between 11.9 and 12.1 ounces ? d5111 <- 1- pnorm(12, mean=12.1, sd=0.05) d5112 <- pnorm(12, mean=12.1, sd=0.05, lower.tail = FALSE) d511 <- c(d5111, d5112) d511 d5121 <- pnorm(12.1, mean=12.1, sd=0.05) d5122 <- pnorm(11.9, mean=12.1, sd=0.05) d512 <- d5121-d5122 d512 ``` *pnorm calculate the left side (左尾檢定) ### **4. Exercises 4** ``` # question 1 # total number of the population is 80, 60 people reply yes to the project, find the 95% ? n1 <- 80 k1 <- 60 pbar1 <- k1/n1 SE1 <- sqrt(pbar1*(1-pbar1)/n1) alpha1 <- 0.05 z.alpha1.by2 <- qnorm(1-alpha1/2) a1 <- pbar1*(1-pbar1) b1 <- sqrt(a1/n1) c1 <- z.alpha1.by2*b1 a11 <- c(pbar-c1, pbar+c1) a11 #question 2 # Cereals offer gifts 1/6, father buy 20, #a.) the probability find 4 in 20 ? #b.) the probability find 0 in 20 ? #c.) 3 toys in the package. the probability that find 2 toys in 5 package ? n2 <- 20 pbar2 <- 1/6 a21 <- dbinom(4,size=n2, prob=pbar2) a21 a22 <- dbinom(0,size=n2, prob=pbar2) a22 n23 <- 5 pbar23 <- 3/20 a23 <- dbinom(2,size=n23, prob=pbar23) a23 ``` ### **5. Exercises 5** ``` # question 1 # 15 multiple choice need to get 10 or more to find. exactly equal to 10? more than 10 ? n1 <- 15 pbar1 <- 1/4 a <- dbinom(10,size=n1, prob=pbar1) a b1 <- pbinom(15, size=n1, prob=pbar1) b1 b2 <- pbinom(10, size=n1, prob=pbar1) b2 b <- b1-b2 b c <- pbinom(10,size=n1, prob=pbar1, lower.tail = FALSE) c d <- 1 - pbinom(10,size=n1, prob=pbar1) d # question 2 # average 10 person in 2 hours. exactly have 9 customers? have 10~15 ? e <- dpois(9, lambda=10) e f1 <- ppois(9, lambda=10) f1 f2 <- ppois(15, lambda=10) f2 f <- f2-f1 f ``` --- ![](https://i.imgur.com/vRENQNE.png) ![](https://i.imgur.com/4eZKUAf.png) ![](https://i.imgur.com/pKFhSMg.png) ![](https://i.imgur.com/MKQiSRK.png)