############################# ####Prvá simulačná štúdia#### ############################# library(mvtnorm)#načítanie knižníc library(geepack) library(gee) set.seed(9) K<-1000 #K volíme ako 100, 500 a 1000 a<-1000 #počet simulácií beta<-c(0.5,0.3) alpha<-0.8 #inicializácia A_ex<-matrix(0,nrow = a, ncol=4) colnames(A_ex)<-c("estimate","S.E","Wald","P-value") A_ar<-matrix(0,nrow = a, ncol=4) colnames(A_ar)<-c("estimate","S.E","Wald","P-value") A_ind<-matrix(0,nrow = a, ncol=4) colnames(A_ind)<-c("estimate","S.E","Wald","P-value") A_glm<-matrix(0,nrow = a, ncol=4) colnames(A_glm)<-c("estimate","S.E","Wald","P-value") A_uns<-matrix(0,nrow = a, ncol=4) colnames(A_uns)<-c("estimate","S.E","Wald","P-value") alpha_odhad<-matrix(0,nrow=a,ncol=2) #generovanie datasetu for (m in 1:a) { id <- rep(1:K) pocet<-sample(2:5,K,replace=TRUE,prob=c(0.1,0.2,0.3,0.4)) #generovanie počtu pozorovaní pre klienta n<-sum(pocet) pomocna_X <- rep(rbinom(K,1,0.5)) #generuje binárnu premennú s p=0.5 data_matica<-matrix(0, nrow= n, ncol=3) colnames(data_matica)<-c("id","X1","epsilon") sigma <- matrix(c(1,alpha,alpha,alpha,alpha ,alpha,1,alpha,alpha,alpha ,alpha,alpha,1,alpha,alpha ,alpha,alpha,alpha,1,alpha ,alpha,alpha,alpha,alpha,1), ncol=5) #štruktúra kovariančnej matice mu<-c(0,0,0,0,0) k=1 for (i in 1:K) { #generovanie nebalancovanych dat k_od = k for (j in 0:(pocet[i]-1)) { data_matica[k,1]<-id[i] data_matica[k,2]<-pomocna_X[i] k<-k+1 } k_do = k - 1 epsilon2 <- rmvnorm(n=1, mean=mu[1:pocet[i]], sigma=sigma[1:pocet[i],1:pocet[i]]) data_matica[k_od:k_do,3]<-epsilon2 } X0<-matrix(1,nrow=n,ncol = 1) X1<-data_matica[,2] X<-matrix(c(X0,X1),ncol=2) #regresná matica Y<-X%*%beta+data_matica[,3] sim_data<-data.frame(Y,data_matica[,1],X1) colnames(sim_data)<-c("Y","id","X1") #odhad modelu m1<-summary(geeglm(Y~X1, family=gaussian, data=sim_data, id=id, corstr = "exchangeable",scale.fix=T)) m2<-summary(geeglm(Y~X1, family=gaussian, data=sim_data, id=id, corstr = "ar1",scale.fix=T)) m3<-summary(geeglm(Y~X1, family=gaussian, data=sim_data, id=id, corstr = "independence",scale.fix=T)) m4<-summary(glm(Y~X1, family=gaussian, data=sim_data)) m5<-summary(geeglm(Y~X1, family=gaussian, data=sim_data, id=id, corstr = "unstructured",scale.fix=T)) A_ex[m,]<-as.matrix(m1$coefficients[2,]) A_ar[m,]<-as.matrix(m2$coefficients[2,]) A_ind[m,]<-as.matrix(m3$coefficients[2,]) A_glm[m,]<-as.matrix(m4$coefficients[2,]) A_uns[m,]<-as.matrix(m5$coefficients[2,]) } ##vyhodnotenie## #medze intervalu dolna_mez_ar<-A_ar[,1]-qnorm(0.975, mean=0)*A_ar[,2] horna_mez_ar<-A_ar[,1]+qnorm(0.975, mean=0)*A_ar[,2] dolna_mez_ex<-A_ex[,1]-qnorm(0.975, mean=0)*A_ex[,2] horna_mez_ex<-A_ex[,1]+qnorm(0.975, mean=0)*A_ex[,2] dolna_mez_ind<-A_ind[,1]-qnorm(0.975, mean=0)*A_ind[,2] horna_mez_ind<-A_ind[,1]+qnorm(0.975, mean=0)*A_ind[,2] dolna_mez_glm<-A_glm[,1]-qnorm(0.975, mean=0)*A_glm[,2] horna_mez_glm<-A_glm[,1]+qnorm(0.975, mean=0)*A_glm[,2] dolna_mez_uns<-A_uns[,1]-qnorm(0.975, mean=0)*A_uns[,2] horna_mez_uns<-A_uns[,1]+qnorm(0.975, mean=0)*A_uns[,2] pokrytie_ar<-matrix(0,nrow = n, ncol=1) pokrytie_ex<-matrix(0,nrow = n, ncol=1) pokrytie_ind<-matrix(0,nrow = n, ncol=1) pokrytie_glm<-matrix(0,nrow = n, ncol=1) pokrytie_uns<-matrix(0,nrow = n, ncol=1) #indikátor pokrytia inervalu for(i in 1:n){ if(dolna_mez_ar[i]<0.3 & horna_mez_ar[i]>0.3){pokrytie_ar[i]<-1} if(dolna_mez_ex[i]<0.3 & horna_mez_ex[i]>0.3){pokrytie_ex[i]<-1} if(dolna_mez_ind[i]<0.3 & horna_mez_ind[i]>0.3){pokrytie_ind[i]<-1} if(dolna_mez_glm[i]<0.3 & horna_mez_glm[i]>0.3){pokrytie_glm[i]<-1} if(dolna_mez_uns[i]<0.3 & horna_mez_uns[i]>0.3){pokrytie_uns[i]<-1} } #výpočet MSE MSE_ar<-1/n*sum((A_ar[,1]-0.3)^2)*100 MSE_ex<-1/n*sum((A_ex[,1]-0.3)^2)*100 MSE_ind<-1/n*sum((A_ind[,1]-0.3)^2)*100 MSE_glm<-1/n*sum((A_glm[,1]-0.3)^2)*100 MSE_uns<-1/n*sum((A_uns[,1]-0.3)^2)*100 #výpočet priemernej dĺžky intervalu interval_ar<-sum(qnorm(0.975, mean=0)*A_ar[,2])/(n/2) interval_ex<-sum(qnorm(0.975, mean=0)*A_ex[,2])/(n/2) interval_ind<-sum(qnorm(0.975, mean=0)*A_ind[,2])/(n/2) interval_glm<-sum(qnorm(0.975, mean=0)*A_glm[,2])/(n/2) interval_uns<-sum(qnorm(0.975, mean=0)*A_uns[,2])/(n/2) #pokrytie intervalu sum(pokrytie_ar)/n sum(pokrytie_ex)/n sum(pokrytie_ind)/n sum(pokrytie_glm)/n sum(pokrytie_uns)/n ############################## ####Druhá simulačná štúdia#### ############################## library(geepack) library(tictoc) library(actuar) library(MuMIn) library(gee) set.seed(9) K<-3000 #počet skupín a<-1000 #počet simulácií beta<-c(0.8,0.5,-0.1,-0.3,0.4) A_ex<-matrix(0,nrow = a, ncol=5) colnames(A_ex)<-c("estimate","S.E","Wald","P-value","QIC") A_ar<-matrix(0,nrow = a, ncol=5) colnames(A_ar)<-c("estimate","S.E","Wald","P-value","QIC") A_ind<-matrix(0,nrow = a, ncol=5) colnames(A_ind)<-c("estimate","S.E","Wald","P-value","QIC") A_skut<-matrix(0,nrow = a, ncol=5) colnames(A_skut)<-c("estimate","S.E","Wald","P-value","QIC") A_glm<-matrix(0,nrow = a, ncol=5) colnames(A_glm)<-c("estimate","S.E","Wald","P-value","QIC") QIC_matrix<-matrix(0,nrow = a, ncol=4) colnames(QIC_matrix)<-c("AR","EX","IND","SKUT") #inicialiacia odhad_intercept_ex<-matrix(0,nrow = a, ncol=5) odhad_intercept_ar<-matrix(0,nrow = a, ncol=5) odhad_intercept_ind<-matrix(0,nrow = a, ncol=5) odhad_intercept_uns<-matrix(0,nrow = a, ncol=5) odhad_intercept_glm<-matrix(0,nrow = a, ncol=4) odhad_pohlavie_ex<-matrix(0,nrow = a, ncol=5) odhad_pohlavie_ar<-matrix(0,nrow = a, ncol=5) odhad_pohlavie_ind<-matrix(0,nrow = a, ncol=5) odhad_pohlavie_uns<-matrix(0,nrow = a, ncol=5) odhad_pohlavie_glm<-matrix(0,nrow = a, ncol=4) odhad_vek_ex<-matrix(0,nrow = a, ncol=5) odhad_vek_ar<-matrix(0,nrow = a, ncol=5) odhad_vek_ind<-matrix(0,nrow = a, ncol=5) odhad_vek_uns<-matrix(0,nrow = a, ncol=5) odhad_vek_glm<-matrix(0,nrow = a, ncol=4) odhad_dodavka_ex<-matrix(0,nrow = a, ncol=5) odhad_dodavka_ar<-matrix(0,nrow = a, ncol=5) odhad_dodavka_ind<-matrix(0,nrow = a, ncol=5) odhad_dodavka_uns<-matrix(0,nrow = a, ncol=5) odhad_dodavka_glm<-matrix(0,nrow = a, ncol=4) odhad_motorka_ex<-matrix(0,nrow = a, ncol=5) odhad_motorka_ar<-matrix(0,nrow = a, ncol=5) odhad_motorka_ind<-matrix(0,nrow = a, ncol=5) odhad_motorka_uns<-matrix(0,nrow = a, ncol=5) odhad_motorka_glm<-matrix(0,nrow = a, ncol=4) tic() for (m in 1:a) { X_pohlavie<- matrix(0,nrow=K,ncol=1) #inicializacia X_vek<- matrix(0,nrow=K,ncol=1) X_vozidlo<- matrix(0,nrow=K,ncol=2) X_rok<-matrix(0,nrow=K,ncol=1) colnames(X_vozidlo)<-c("D","M") vek<-c(18:70) id <- rep(1:K) pocet<-sample(2:4,K,replace=TRUE,prob=c(0.1,0.4,0.5)) #generovanie počtu pozorovaní pre klienta n<-sum(pocet) X_pohlavie <- cbind(rep(rbinom(K,1,0.5))) #generovanie pohlavia pomocna_2<-sample(c("M","A","D"),K, replace=TRUE, prob=c(0.3,0.6,0.1)) #generovanie vozidla pomocna<-sample(18:70,K,replace=TRUE) #generovanie veku intercept<-matrix(1, nrow=n, ncol=1) Z_pomocna<-rexp(K,rate=1) Z<-matrix(0,nrow=n,ncol=1) data_matica<-matrix(0, nrow= n, ncol=6) colnames(data_matica)<-c("id","vek","pohlavie","D","M","rok") k=1 for (i in 1:K) { #generovanie nebalancovanych dat for (j in 0:(pocet[i]-1)) { data_matica[k,1]<-id[i] data_matica[k,2]<-pomocna[i]+j data_matica[k,3]<-X_pohlavie[i] if(pomocna_2[i]=="D"){data_matica[k,4]<-1 data_matica[k,5]<-0} if(pomocna_2[i]=="M"){data_matica[k,4]<-0 data_matica[k,5]<-1} if(pomocna_2[i]=="A"){data_matica[k,4]<-0 data_matica[k,5]<-0} data_matica[k,6]<-j+1 Z[k]<-Z_pomocna[i] k<-k+1 } } X<-matrix(c(intercept,data_matica[,3],data_matica[,2]-18,data_matica[,4],data_matica[,5]),ncol=5) lambda<-exp(X%*%beta)*Z Y<-rpois(n,lambda) data_poisson<-data.frame(Y,data_matica) #odhad modelu m1<-summary(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "exchangeable", maxiter=10)) m2<-summary(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "AR-M", Mv=1, maxiter=10)) m3<-summary(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "independence", maxiter=10)) m4<-summary(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "unstructured", maxiter=10)) m5<-summary(glm(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson)) #výpočet QIC QIC_ex<-QIC(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "exchangeable", maxiter=10)) QIC_ar<-QIC(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "AR-M", Mv=1, maxiter=10)) QIC_ind<-QIC(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "independence", maxiter=10)) QIC_skut<-QIC(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "unstructured", maxiter=10)) #uloženie odhadov odhad_intercept_ex[m,]<-m1$coefficients[1,] odhad_intercept_ar[m,]<-m2$coefficients[1,] odhad_intercept_ind[m,]<-m3$coefficients[1,] odhad_intercept_uns[m,]<-m4$coefficients[1,] odhad_intercept_glm[m,]<-m5$coefficients[1,] odhad_pohlavie_ex[m,]<-m1$coefficients[2,] odhad_pohlavie_ar[m,]<-m2$coefficients[2,] odhad_pohlavie_ind[m,]<-m3$coefficients[2,] odhad_pohlavie_uns[m,]<-m4$coefficients[2,] odhad_pohlavie_glm[m,]<-m5$coefficients[2,] odhad_vek_ex[m,]<-m1$coefficients[3,] odhad_vek_ar[m,]<-m2$coefficients[3,] odhad_vek_ind[m,]<-m3$coefficients[3,] odhad_vek_uns[m,]<-m4$coefficients[3,] odhad_vek_glm[m,]<-m5$coefficients[3,] odhad_dodavka_ex[m,]<-m1$coefficients[4,] odhad_dodavka_ar[m,]<-m2$coefficients[4,] odhad_dodavka_ind[m,]<-m3$coefficients[4,] odhad_dodavka_uns[m,]<-m4$coefficients[4,] odhad_dodavka_glm[m,]<-m5$coefficients[4,] odhad_motorka_ex[m,]<-m1$coefficients[5,] odhad_motorka_ar[m,]<-m2$coefficients[5,] odhad_motorka_ind[m,]<-m3$coefficients[5,] odhad_motorka_uns[m,]<-m4$coefficients[5,] odhad_motorka_glm[m,]<-m5$coefficients[5,] #uloženie hodnoty QIC A_ex[m,5]<-QIC_ex A_ar[m,5]<-QIC_ar A_ind[m,5]<-QIC_ind A_skut[m,5]<-QIC_skut #výber najmenšej hodnoty QIC if (min(A_ex[m,5],A_ar[m,5],A_ind[m,5],A_skut[m,5])==A_ex[m,5]){QIC_matrix[m,2]<-1} if (min(A_ex[m,5],A_ar[m,5],A_ind[m,5],A_skut[m,5])==A_ar[m,5]){QIC_matrix[m,1]<-1} if (min(A_ex[m,5],A_ar[m,5],A_ind[m,5],A_skut[m,5])==A_ind[m,5]){QIC_matrix[m,3]<-1} if (min(A_ex[m,5],A_ar[m,5],A_ind[m,5],A_skut[m,5])==A_skut[m,5]){QIC_matrix[m,4]<-1} } ##vyhodnotenie## a<-(0.4) #tuto sa hodnota mení podľa toho, ktorý parameter sa vyhodnocuje A_ar<-odhad_motorka_ar A_ex<-odhad_motorka_ex A_ind<-odhad_motorka_ind A_uns<-odhad_motorka_uns A_glm<-odhad_motorka_glm A_QIC<-matrix(0,nrow=1000,ncol=5) for(i in 1:1000){ if(QIC_matrix[i,1]==1){A_QIC[i,]<-A_ar[i,]} if(QIC_matrix[i,2]==1){A_QIC[i,]<-A_ex[i,]} if(QIC_matrix[i,3]==1){A_QIC[i,]<-A_ind[i,]} if(QIC_matrix[i,4]==1){A_QIC[i,]<-A_uns[i,]} } #medze intervalov dolna_mez_ar<-A_ar[,1]-qnorm(0.975, mean=0)*A_ar[,4] horna_mez_ar<-A_ar[,1]+qnorm(0.975, mean=0)*A_ar[,4] dolna_mez_ex<-A_ex[,1]-qnorm(0.975, mean=0)*A_ex[,4] horna_mez_ex<-A_ex[,1]+qnorm(0.975, mean=0)*A_ex[,4] dolna_mez_ind<-A_ind[,1]-qnorm(0.975, mean=0)*A_ind[,4] horna_mez_ind<-A_ind[,1]+qnorm(0.975, mean=0)*A_ind[,4] dolna_mez_uns<-A_uns[,1]-qnorm(0.975, mean=0)*A_uns[,4] horna_mez_uns<-A_uns[,1]+qnorm(0.975, mean=0)*A_uns[,4] dolna_mez_glm<-A_glm[,1]-qnorm(0.975, mean=0)*A_glm[,2] horna_mez_glm<-A_glm[,1]+qnorm(0.975, mean=0)*A_glm[,2] dolna_mez_QIC<-A_QIC[,1]-qnorm(0.975, mean=0)*A_QIC[,4] horna_mez_QIC<-A_QIC[,1]+qnorm(0.975, mean=0)*A_QIC[,4] pokrytie_ar<-matrix(0,nrow = 1000, ncol=1) pokrytie_ex<-matrix(0,nrow = 1000, ncol=1) pokrytie_ind<-matrix(0,nrow = 1000, ncol=1) pokrytie_uns<-matrix(0,nrow = 1000, ncol=1) pokrytie_glm<-matrix(0,nrow = 1000, ncol=1) pokrytie_QIC<-matrix(0,nrow = 1000, ncol=1) for(i in 1:1000){ if(dolna_mez_ar[i]a){pokrytie_ar[i]<-1} if(dolna_mez_ex[i]a){pokrytie_ex[i]<-1} if(dolna_mez_ind[i]a){pokrytie_ind[i]<-1} if(dolna_mez_uns[i]a){pokrytie_uns[i]<-1} if(dolna_mez_glm[i]a){pokrytie_glm[i]<-1} if(dolna_mez_QIC[i]a){pokrytie_QIC[i]<-1} } MSE_ar<-1/1000*sum((A_ar[,1]-a)^2)*100 MSE_ex<-1/1000*sum((A_ex[,1]-a)^2)*100 MSE_ind<-1/1000*sum((A_ind[,1]-a)^2)*100 MSE_glm<-1/1000*sum((A_glm[,1]-a)^2)*100 MSE_uns<-1/1000*sum((A_uns[,1]-a)^2)*100 MSE_QIC<-1/1000*sum((A_QIC[,1]-a)^2)*100 interval_ar<-sum(qnorm(0.975, mean=0)*A_ar[,4])/500 interval_ex<-sum(qnorm(0.975, mean=0)*A_ex[,4])/500 interval_ind<-sum(qnorm(0.975, mean=0)*A_ind[,4])/500 interval_uns<-sum(qnorm(0.975, mean=0)*A_uns[,4])/500 interval_glm<-sum(qnorm(0.975, mean=0)*A_glm[,2])/500 interval_QIC<-sum(qnorm(0.975, mean=0)*A_QIC[,4])/500 sum(pokrytie_ar)/1000 sum(pokrytie_ex)/1000 sum(pokrytie_ind)/1000 sum(pokrytie_uns)/1000 sum(pokrytie_glm)/1000 sum(pokrytie_QIC)/1000