#모든 시트를 하나의 리스트로 불러오는 함수read_excel_allsheets<-function(file,tibble=FALSE){sheets<-readxl::excel_sheets(file)x<-lapply(sheets,function(X)readxl::read_excel(file,sheet=X))if(!tibble)x<-lapply(x,as.data.frame)names(x)<-sheetsx}
12장
12장 연습문제 불러오기
#data_chap12에 연습문제 12장 모든 문제 저장data_chap12<-read_excel_allsheets("data_chap12.xls")#연습문제 각각 데이터 생성for(xin1:length(data_chap12)){assign(paste0('ex12_',c(1,4,5,6))[x],data_chap12[x])}#연습문제 데이터 형식을 리스트에서 데이터프레임으로 변환for(xin1:length(data_chap12)){assign(paste0('ex12_',c(1,4,5,6))[x],data.frame(data_chap12[x]))}
nf<-subset(ex12_1$exam12_1.plasma_calcium,ex12_1$exam12_1.Hormone=="No Hormone"&ex12_1$exam12_1.Sex=="Female")nm<-subset(ex12_1$exam12_1.plasma_calcium,ex12_1$exam12_1.Hormone=="No Hormone"&ex12_1$exam12_1.Sex=="Male")yf<-subset(ex12_1$exam12_1.plasma_calcium,ex12_1$exam12_1.Hormone=="Yes Hormone"&ex12_1$exam12_1.Sex=="Female")ym<-subset(ex12_1$exam12_1.plasma_calcium,ex12_1$exam12_1.Hormone=="Yes Hormone"&ex12_1$exam12_1.Sex=="Male")ex12_1a<-data.frame(no_hormone_female=nf,no_hormone_male=nm,yes_hormone_female=yf,yes_hormone_male=ym)ex12_1a%>%kbl(caption="Dataset of ex12_1")%>%kable_styling(bootstrap_options=c("striped","hover","condensed","responsive"))
Dataset of ex12_1
no_hormone_female
no_hormone_male
yes_hormone_female
yes_hormone_male
16.3
15.3
38.1
34.0
20.4
17.4
26.2
22.8
12.4
10.9
32.3
27.8
15.8
10.3
35.8
25.0
9.5
6.7
30.2
29.3
cell_totals<-c(sum(nf),sum(nm),sum(yf),sum(ym))cell_means<-c(mean(nf),mean(nm),mean(yf),mean(ym))cell<-data.frame(rbind(cell_totals,cell_means))names(cell)<-c("X_11","X_12","X_21","X_22")cell%>%kbl(caption="Cell totals and Cell means")%>%kable_styling(bootstrap_options=c("striped","hover","condensed","responsive"))
Cell totals and Cell means
X_11
X_12
X_21
X_22
cell_totals
74.40
60.60
162.60
138.90
cell_means
14.88
12.12
32.52
27.78
EXAMPLE 12.1a
예제에서 나온 식으로 제곱합 및 자유도를 구하는 함수를 직접 작성하여 ANOVA table을 작성하여 보았다.
f12_1a<-function(x){N<-(length(x)*nrow(x))grand_mean<-sum(x)/Nno_hormone_means<-sum(x[,1:2])/10hormone_means<-sum(x[,3:4])/10female_means<-sum(x[,c(1,3)])/10male_means<-sum(x[,c(2,4)])/10total_SS<-sum((x-grand_mean)^2)total_DF<-N-1cells<-vector()for(iin1:4){cells[i]<-(mean(ex12_1a[,i])-grand_mean)^2}cells_SS<-sum(5*cells)cells_DF<-3within_cells_SS<-total_SS-cells_SSwithin_cells_DF<-total_DF-cells_DFFactor_A_SS<-2*5*((no_hormone_means-grand_mean)^2+(hormone_means-grand_mean)^2)Factor_A_DF<-1Factor_B_SS<-2*5*((female_means-grand_mean)^2+(male_means-grand_mean)^2)Factor_B_DF<-1AXB_interaction_SS<-cells_SS-Factor_A_SS-Factor_B_SSAXB_interaction_DF<-cells_DF-Factor_A_DF-Factor_B_DFSS<-c(total_SS,cells_SS,Factor_A_SS,Factor_B_SS,round(AXB_interaction_SS,4),round(within_cells_SS,4))DF<-c(total_DF,cells_DF,Factor_A_DF,Factor_B_DF,AXB_interaction_DF,within_cells_DF)source<-c("Total","Cells","Factor A (hormone)","Facor B (sex)","A X B","Within-Cells (Error)")MS<-c("","",Factor_A_SS/Factor_A_DF,Factor_B_SS/Factor_B_DF,round(AXB_interaction_SS/AXB_interaction_DF,4),within_cells_SS/within_cells_DF)res<-data.frame(source,SS,DF,MS)return(res)}
f12_1a(ex12_1a)%>%kbl(caption="Analysis of Variance Summary table")%>%kable_styling(bootstrap_options=c("striped","hover","condensed","responsive"))
f12_2_1<-function(x){f<-f12_1a(x)$SS[3]/as.numeric(f12_1a(x)$MS[6])f_0.05<-qf(0.95,1,16)p_value<-1-pf(f,1,16)if(f<f_0.05){reject_H0<-"No"}else{reject_H0<-"Yes"}res<-data.frame(f,f_0.05,p_value,reject_H0)return(res)}f12_2_1(ex12_1a)%>%kbl(caption="Result of hypotheses for Hormone treatment")%>%kable_styling(bootstrap_options=c("striped","hover","condensed","responsive"))
Result of hypotheses for Hormone treatment
f
f_0.05
p_value
reject_H0
73.58457
4.493999
2e-07
Yes
F-값이 73.6으로 임계값 4.49보다 큰 값이 나왔고, p-value가 0.05보다 매우 작으므로 유의수준 5%하에 귀무가설을 기각한다.
f12_2_2<-function(x){f<-f12_1a(x)$SS[4]/as.numeric(f12_1a(x)$MS[6])f_0.05<-qf(0.95,1,16)p_value<-1-pf(f,1,16)if(f<f_0.05){reject_H0<-"No"}else{reject_H0<-"Yes"}res<-data.frame(f,f_0.05,p_value,reject_H0)return(res)}f12_2_2(ex12_1a)%>%kbl(caption="Result of hypotheses for sex")%>%kable_styling(bootstrap_options=c("striped","hover","condensed","responsive"))
Result of hypotheses for sex
f
f_0.05
p_value
reject_H0
3.73268
4.493999
0.0712638
No
F-값이 3.73으로 임계값 4.49보다 작은 값이 나왔고, p-value가 0.05보다 크므로 유의수준 5%하에 귀무가설을 기각할 수 없다.
f12_2_3<-function(x){f<-f12_1a(x)$SS[5]/as.numeric(f12_1a(x)$MS[6])f_0.05<-qf(0.95,1,16)p_value<-1-pf(f,1,16)if(f<f_0.05){reject_H0<-"No"}else{reject_H0<-"Yes"}res<-data.frame(f,f_0.05,p_value,reject_H0)return(res)}f12_2_3(ex12_1a)%>%kbl(caption="Result of hypotheses for interaction")%>%kable_styling(bootstrap_options=c("striped","hover","condensed","responsive"))
Result of hypotheses for interaction
f
f_0.05
p_value
reject_H0
0.2601529
4.493999
0.6169788
No
F-값이 0.26으로 임계값 4.49보다 작은 값이 나왔고, p-value가 0.05보다 크므로 유의수준 5%하에 귀무가설을 기각할 수 없다.
stripchart() 함수는 sample size가 작을 때 box plot을 대신하기 좋다.
interaction.plot(ex12_1$exam12_1.Hormone,ex12_1$exam12_1.Sex,ex12_1$exam12_1.plasma_calcium,col=c("orange","#8f7450"),bty="l",main="interaction plot",xlab="Hormone",ylab="mean of calcium",trace.label="Sex")
교호작용 그래프를 보면 서로 겹치는 부분이 없어 성별과 호르몬 간 교호작용이 존재하지 않음을 확인할 수 있으며, 성별이 여자이고 호르몬치료를 받았을 때 혈청 내 칼슘의 농도가 최대가 됨을 알 수 있다.
EXAMPLE 12.2a
“Machine Formulas”라는 사용하여 Example 12.1a에서 구한 결과와 같은 값을 구하여 보자.
f12_2a<-function(x){N<-(length(x)*nrow(x))Xi<-sum(x)Xi2<-sum(x^2)C<-sum(x)^2/Nno_hormone<-sum(x[,1:2])hormone<-sum(x[,3:4])females<-sum(x[,c(1,3)])males<-sum(x[,c(2,4)])total_SS<-Xi2-Caa<-vector()for(iin1:4){aa[i]<-sum(ex12_1a[i])^2}cells_SS7<-(sum(aa)/5)-Cwithin_cells_SS<-total_SS-cells_SS7Factor_A_SS<-((no_hormone^2+hormone^2)/10)-CFactor_B_SS<-((females^2+males^2)/10)-CAXB_interaction_SS<-cells_SS7-Factor_A_SS-Factor_B_SSres<-data.frame(total_SS,cells_SS7,within_cells_SS,Factor_A_SS,Factor_B_SS,AXB_interaction_SS)return(res)}f12_2a(ex12_1a)%>%kbl(caption="Result of Machine formula")%>%kable_styling(bootstrap_options=c("striped","hover","condensed","responsive"))
##
## Cochran's Q Test
##
## H0: There is no difference in the effectiveness of treatments.
## HA: There is a difference in the effectiveness of treatments.
##
## Q = 6.94736842105263
##
## Degrees of Freedom = 4
##
## Significance Level = 0.05
## The p-value is 0.138695841622589
##
##
모든 종류의 옷을 입었을 때 모기에 물린 사람의 데이터는 삭제한다.
Cochran’s Q Test를 수행한 결과, 검정통계량 값은 6.947, p-value는 0.14로 계산되었다.
p-value가 0.05보다 크므로, 유의수준 5%하에 모기의 공격을 받은 사람의 비율은 옷의 종류에 따라 차이가 있다고 결론 내릴 수 없다.
/*정규성 검정*/odsgraphicsoff;odsexcludeall;odsnoresults;PROCUNIVARIATEDATA=ex.ex12_1normalplot;CLASShormone;BYsex;VARplasma_calcium;ODSOUTPUTTestsForNormality=TestsForNormality;RUN;odsgraphicson;odsexcludenone;odsresults;PROCSORTDATA=TestsForNormality;BYdescendingTest;RUN;title" ex12_2 : 정규성 가정";PROCPRINTDATA=TestsForNormalitylabel;WHERETest="Shapiro-Wilk";RUN;title;/*등분산성 검정*/odsgraphicsoff;odsexcludeall;odsnoresults;PROCGLMDATA=ex.ex12_1;CLASShormone;MODELplasma_calcium=hormone/p;MEANShormone/HOVTEST=BARTLETT;ODSOUTPUTBartlett=Bartlett;RUN;PROCGLMDATA=ex.ex12_1;CLASSsex;MODELplasma_calcium=sex/p;MEANSsex/HOVTEST=BARTLETT;ODSOUTPUTBartlett=Bartlett2;RUN;odsgraphicson;odsexcludenone;odsresults;title"ex12_2 : 등분산 가정 (Bartlett's Test for Homogeneity of weight Variance)";PROCPRINTDATA=Bartlettlabel;RUN;PROCPRINTDATA=Bartlett2label;RUN;title;
SAS 출력
ex12_2 : 정규성 가정
OBS
Sex
VarName
Hormone
적합도 검정
적합도 통계량에 대한 레이블
적합도 통계량 값
p-값 레이블
p-값의 부호
p-값
1
Female
plasma_calcium
No Hormone
Shapiro-Wilk
W
0.979256
Pr < W
0.9306
2
Female
plasma_calcium
Yes Hormone
Shapiro-Wilk
W
0.982753
Pr < W
0.9488
3
Male
plasma_calcium
No Hormone
Shapiro-Wilk
W
0.958824
Pr < W
0.7998
4
Male
plasma_calcium
Yes Hormone
Shapiro-Wilk
W
0.978417
Pr < W
0.9260
ex12_2 : 등분산 가정 (Bartlett's Test for Homogeneity of weight Variance)
OBS
Effect
Dependent
Source
DF
Chi-Square
Pr > ChiSq
1
Hormone
plasma_calcium
Hormone
1
0.1999
0.6548
ex12_2 : 등분산 가정 (Bartlett's Test for Homogeneity of weight Variance)
/*정규성 검정*/odsgraphicsoff;odsexcludeall;odsnoresults;PROCUNIVARIATEDATA=ex.ex12_4normalplot;VARTime;ODSOUTPUTTestsForNormality=TestsForNormality;RUN;odsgraphicson;odsexcludenone;odsresults;PROCSORTDATA=TestsForNormality;BYdescendingTest;RUN;title" ex12_2 : 정규성 가정";PROCPRINTDATA=TestsForNormalitylabel;WHERETest="Shapiro-Wilk";RUN;title;/*등분산성 검정*/odsgraphicsoff;odsexcludeall;odsnoresults;PROCGLMDATA=ex.ex12_4;CLASSTreatment;MODELTime=Treatment/p;MEANSTreatment/HOVTEST=BARTLETT;ODSOUTPUTBartlett=Bartlett;RUN;odsgraphicson;odsexcludenone;odsresults;title"ex12_2 : 등분산 가정 (Bartlett's Test for Homogeneity of weight Variance)";PROCPRINTDATA=Bartlettlabel;RUN;title;
SAS 출력
ex12_2 : 정규성 가정
OBS
VarName
적합도 검정
적합도 통계량에 대한 레이블
적합도 통계량 값
p-값 레이블
p-값의 부호
p-값
1
Time
Shapiro-Wilk
W
0.974271
Pr < W
0.9155
ex12_2 : 등분산 가정 (Bartlett's Test for Homogeneity of weight Variance)
title"Example12.5 ";PROCSORTDATA=ex.ex12_4;BYblock;RUN;PROCRANKDATA=ex.ex12_4out=ex12_4;BYBlock;VARtime;RANKSRtime;RUN;PROCPRINT;title2'Original and Ranked Values of Yield';RUN;PROCANOVADATA=ex12_4;CLASSBlockTreatment;MODELRtime=BlockTreatment;title2'Friedman''s Two-way Nonparametric ANOVA';RUN;