#############################
####Prvá simulačná štúdia####
#############################
library(mvtnorm)#načítanie knižníc
library(geepack)
library(gee)
set.seed(9)
K<-1000 #K volíme ako 100, 500 a 1000
a<-1000 #počet simulácií
beta<-c(0.5,0.3)
alpha<-0.8
#inicializácia
A_ex<-matrix(0,nrow = a, ncol=4)
colnames(A_ex)<-c("estimate","S.E","Wald","P-value")
A_ar<-matrix(0,nrow = a, ncol=4)
colnames(A_ar)<-c("estimate","S.E","Wald","P-value")
A_ind<-matrix(0,nrow = a, ncol=4)
colnames(A_ind)<-c("estimate","S.E","Wald","P-value")
A_glm<-matrix(0,nrow = a, ncol=4)
colnames(A_glm)<-c("estimate","S.E","Wald","P-value")
A_uns<-matrix(0,nrow = a, ncol=4)
colnames(A_uns)<-c("estimate","S.E","Wald","P-value")
alpha_odhad<-matrix(0,nrow=a,ncol=2)
#generovanie datasetu
for (m in 1:a) {
id <- rep(1:K)
pocet<-sample(2:5,K,replace=TRUE,prob=c(0.1,0.2,0.3,0.4)) #generovanie počtu pozorovaní pre klienta
n<-sum(pocet)
pomocna_X <- rep(rbinom(K,1,0.5)) #generuje binárnu premennú s p=0.5
data_matica<-matrix(0, nrow= n, ncol=3)
colnames(data_matica)<-c("id","X1","epsilon")
sigma <- matrix(c(1,alpha,alpha,alpha,alpha
,alpha,1,alpha,alpha,alpha
,alpha,alpha,1,alpha,alpha
,alpha,alpha,alpha,1,alpha
,alpha,alpha,alpha,alpha,1), ncol=5) #štruktúra kovariančnej matice
mu<-c(0,0,0,0,0)
k=1
for (i in 1:K) { #generovanie nebalancovanych dat
k_od = k
for (j in 0:(pocet[i]-1)) {
data_matica[k,1]<-id[i]
data_matica[k,2]<-pomocna_X[i]
k<-k+1
}
k_do = k - 1
epsilon2 <- rmvnorm(n=1, mean=mu[1:pocet[i]], sigma=sigma[1:pocet[i],1:pocet[i]])
data_matica[k_od:k_do,3]<-epsilon2
}
X0<-matrix(1,nrow=n,ncol = 1)
X1<-data_matica[,2]
X<-matrix(c(X0,X1),ncol=2) #regresná matica
Y<-X%*%beta+data_matica[,3]
sim_data<-data.frame(Y,data_matica[,1],X1)
colnames(sim_data)<-c("Y","id","X1")
#odhad modelu
m1<-summary(geeglm(Y~X1, family=gaussian, data=sim_data, id=id, corstr = "exchangeable",scale.fix=T))
m2<-summary(geeglm(Y~X1, family=gaussian, data=sim_data, id=id, corstr = "ar1",scale.fix=T))
m3<-summary(geeglm(Y~X1, family=gaussian, data=sim_data, id=id, corstr = "independence",scale.fix=T))
m4<-summary(glm(Y~X1, family=gaussian, data=sim_data))
m5<-summary(geeglm(Y~X1, family=gaussian, data=sim_data, id=id, corstr = "unstructured",scale.fix=T))
A_ex[m,]<-as.matrix(m1$coefficients[2,])
A_ar[m,]<-as.matrix(m2$coefficients[2,])
A_ind[m,]<-as.matrix(m3$coefficients[2,])
A_glm[m,]<-as.matrix(m4$coefficients[2,])
A_uns[m,]<-as.matrix(m5$coefficients[2,])
}
##vyhodnotenie##
#medze intervalu
dolna_mez_ar<-A_ar[,1]-qnorm(0.975, mean=0)*A_ar[,2]
horna_mez_ar<-A_ar[,1]+qnorm(0.975, mean=0)*A_ar[,2]
dolna_mez_ex<-A_ex[,1]-qnorm(0.975, mean=0)*A_ex[,2]
horna_mez_ex<-A_ex[,1]+qnorm(0.975, mean=0)*A_ex[,2]
dolna_mez_ind<-A_ind[,1]-qnorm(0.975, mean=0)*A_ind[,2]
horna_mez_ind<-A_ind[,1]+qnorm(0.975, mean=0)*A_ind[,2]
dolna_mez_glm<-A_glm[,1]-qnorm(0.975, mean=0)*A_glm[,2]
horna_mez_glm<-A_glm[,1]+qnorm(0.975, mean=0)*A_glm[,2]
dolna_mez_uns<-A_uns[,1]-qnorm(0.975, mean=0)*A_uns[,2]
horna_mez_uns<-A_uns[,1]+qnorm(0.975, mean=0)*A_uns[,2]
pokrytie_ar<-matrix(0,nrow = n, ncol=1)
pokrytie_ex<-matrix(0,nrow = n, ncol=1)
pokrytie_ind<-matrix(0,nrow = n, ncol=1)
pokrytie_glm<-matrix(0,nrow = n, ncol=1)
pokrytie_uns<-matrix(0,nrow = n, ncol=1)
#indikátor pokrytia inervalu
for(i in 1:n){
if(dolna_mez_ar[i]<0.3 & horna_mez_ar[i]>0.3){pokrytie_ar[i]<-1}
if(dolna_mez_ex[i]<0.3 & horna_mez_ex[i]>0.3){pokrytie_ex[i]<-1}
if(dolna_mez_ind[i]<0.3 & horna_mez_ind[i]>0.3){pokrytie_ind[i]<-1}
if(dolna_mez_glm[i]<0.3 & horna_mez_glm[i]>0.3){pokrytie_glm[i]<-1}
if(dolna_mez_uns[i]<0.3 & horna_mez_uns[i]>0.3){pokrytie_uns[i]<-1}
}
#výpočet MSE
MSE_ar<-1/n*sum((A_ar[,1]-0.3)^2)*100
MSE_ex<-1/n*sum((A_ex[,1]-0.3)^2)*100
MSE_ind<-1/n*sum((A_ind[,1]-0.3)^2)*100
MSE_glm<-1/n*sum((A_glm[,1]-0.3)^2)*100
MSE_uns<-1/n*sum((A_uns[,1]-0.3)^2)*100
#výpočet priemernej dĺžky intervalu
interval_ar<-sum(qnorm(0.975, mean=0)*A_ar[,2])/(n/2)
interval_ex<-sum(qnorm(0.975, mean=0)*A_ex[,2])/(n/2)
interval_ind<-sum(qnorm(0.975, mean=0)*A_ind[,2])/(n/2)
interval_glm<-sum(qnorm(0.975, mean=0)*A_glm[,2])/(n/2)
interval_uns<-sum(qnorm(0.975, mean=0)*A_uns[,2])/(n/2)
#pokrytie intervalu
sum(pokrytie_ar)/n
sum(pokrytie_ex)/n
sum(pokrytie_ind)/n
sum(pokrytie_glm)/n
sum(pokrytie_uns)/n
##############################
####Druhá simulačná štúdia####
##############################
library(geepack)
library(tictoc)
library(actuar)
library(MuMIn)
library(gee)
set.seed(9)
K<-3000 #počet skupín
a<-1000 #počet simulácií
beta<-c(0.8,0.5,-0.1,-0.3,0.4)
A_ex<-matrix(0,nrow = a, ncol=5)
colnames(A_ex)<-c("estimate","S.E","Wald","P-value","QIC")
A_ar<-matrix(0,nrow = a, ncol=5)
colnames(A_ar)<-c("estimate","S.E","Wald","P-value","QIC")
A_ind<-matrix(0,nrow = a, ncol=5)
colnames(A_ind)<-c("estimate","S.E","Wald","P-value","QIC")
A_skut<-matrix(0,nrow = a, ncol=5)
colnames(A_skut)<-c("estimate","S.E","Wald","P-value","QIC")
A_glm<-matrix(0,nrow = a, ncol=5)
colnames(A_glm)<-c("estimate","S.E","Wald","P-value","QIC")
QIC_matrix<-matrix(0,nrow = a, ncol=4)
colnames(QIC_matrix)<-c("AR","EX","IND","SKUT")
#inicialiacia
odhad_intercept_ex<-matrix(0,nrow = a, ncol=5)
odhad_intercept_ar<-matrix(0,nrow = a, ncol=5)
odhad_intercept_ind<-matrix(0,nrow = a, ncol=5)
odhad_intercept_uns<-matrix(0,nrow = a, ncol=5)
odhad_intercept_glm<-matrix(0,nrow = a, ncol=4)
odhad_pohlavie_ex<-matrix(0,nrow = a, ncol=5)
odhad_pohlavie_ar<-matrix(0,nrow = a, ncol=5)
odhad_pohlavie_ind<-matrix(0,nrow = a, ncol=5)
odhad_pohlavie_uns<-matrix(0,nrow = a, ncol=5)
odhad_pohlavie_glm<-matrix(0,nrow = a, ncol=4)
odhad_vek_ex<-matrix(0,nrow = a, ncol=5)
odhad_vek_ar<-matrix(0,nrow = a, ncol=5)
odhad_vek_ind<-matrix(0,nrow = a, ncol=5)
odhad_vek_uns<-matrix(0,nrow = a, ncol=5)
odhad_vek_glm<-matrix(0,nrow = a, ncol=4)
odhad_dodavka_ex<-matrix(0,nrow = a, ncol=5)
odhad_dodavka_ar<-matrix(0,nrow = a, ncol=5)
odhad_dodavka_ind<-matrix(0,nrow = a, ncol=5)
odhad_dodavka_uns<-matrix(0,nrow = a, ncol=5)
odhad_dodavka_glm<-matrix(0,nrow = a, ncol=4)
odhad_motorka_ex<-matrix(0,nrow = a, ncol=5)
odhad_motorka_ar<-matrix(0,nrow = a, ncol=5)
odhad_motorka_ind<-matrix(0,nrow = a, ncol=5)
odhad_motorka_uns<-matrix(0,nrow = a, ncol=5)
odhad_motorka_glm<-matrix(0,nrow = a, ncol=4)
tic()
for (m in 1:a) {
X_pohlavie<- matrix(0,nrow=K,ncol=1) #inicializacia
X_vek<- matrix(0,nrow=K,ncol=1)
X_vozidlo<- matrix(0,nrow=K,ncol=2)
X_rok<-matrix(0,nrow=K,ncol=1)
colnames(X_vozidlo)<-c("D","M")
vek<-c(18:70)
id <- rep(1:K)
pocet<-sample(2:4,K,replace=TRUE,prob=c(0.1,0.4,0.5)) #generovanie počtu pozorovaní pre klienta
n<-sum(pocet)
X_pohlavie <- cbind(rep(rbinom(K,1,0.5))) #generovanie pohlavia
pomocna_2<-sample(c("M","A","D"),K, replace=TRUE, prob=c(0.3,0.6,0.1)) #generovanie vozidla
pomocna<-sample(18:70,K,replace=TRUE) #generovanie veku
intercept<-matrix(1, nrow=n, ncol=1)
Z_pomocna<-rexp(K,rate=1)
Z<-matrix(0,nrow=n,ncol=1)
data_matica<-matrix(0, nrow= n, ncol=6)
colnames(data_matica)<-c("id","vek","pohlavie","D","M","rok")
k=1
for (i in 1:K) { #generovanie nebalancovanych dat
for (j in 0:(pocet[i]-1)) {
data_matica[k,1]<-id[i]
data_matica[k,2]<-pomocna[i]+j
data_matica[k,3]<-X_pohlavie[i]
if(pomocna_2[i]=="D"){data_matica[k,4]<-1
data_matica[k,5]<-0}
if(pomocna_2[i]=="M"){data_matica[k,4]<-0
data_matica[k,5]<-1}
if(pomocna_2[i]=="A"){data_matica[k,4]<-0
data_matica[k,5]<-0}
data_matica[k,6]<-j+1
Z[k]<-Z_pomocna[i]
k<-k+1
}
}
X<-matrix(c(intercept,data_matica[,3],data_matica[,2]-18,data_matica[,4],data_matica[,5]),ncol=5)
lambda<-exp(X%*%beta)*Z
Y<-rpois(n,lambda)
data_poisson<-data.frame(Y,data_matica)
#odhad modelu
m1<-summary(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "exchangeable", maxiter=10))
m2<-summary(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "AR-M", Mv=1, maxiter=10))
m3<-summary(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "independence", maxiter=10))
m4<-summary(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "unstructured", maxiter=10))
m5<-summary(glm(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson))
#výpočet QIC
QIC_ex<-QIC(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "exchangeable", maxiter=10))
QIC_ar<-QIC(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "AR-M", Mv=1, maxiter=10))
QIC_ind<-QIC(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "independence", maxiter=10))
QIC_skut<-QIC(gee(Y~pohlavie+I(vek-18)+D+M, family=poisson, data=data_poisson, id=data_poisson$id, corstr = "unstructured", maxiter=10))
#uloženie odhadov
odhad_intercept_ex[m,]<-m1$coefficients[1,]
odhad_intercept_ar[m,]<-m2$coefficients[1,]
odhad_intercept_ind[m,]<-m3$coefficients[1,]
odhad_intercept_uns[m,]<-m4$coefficients[1,]
odhad_intercept_glm[m,]<-m5$coefficients[1,]
odhad_pohlavie_ex[m,]<-m1$coefficients[2,]
odhad_pohlavie_ar[m,]<-m2$coefficients[2,]
odhad_pohlavie_ind[m,]<-m3$coefficients[2,]
odhad_pohlavie_uns[m,]<-m4$coefficients[2,]
odhad_pohlavie_glm[m,]<-m5$coefficients[2,]
odhad_vek_ex[m,]<-m1$coefficients[3,]
odhad_vek_ar[m,]<-m2$coefficients[3,]
odhad_vek_ind[m,]<-m3$coefficients[3,]
odhad_vek_uns[m,]<-m4$coefficients[3,]
odhad_vek_glm[m,]<-m5$coefficients[3,]
odhad_dodavka_ex[m,]<-m1$coefficients[4,]
odhad_dodavka_ar[m,]<-m2$coefficients[4,]
odhad_dodavka_ind[m,]<-m3$coefficients[4,]
odhad_dodavka_uns[m,]<-m4$coefficients[4,]
odhad_dodavka_glm[m,]<-m5$coefficients[4,]
odhad_motorka_ex[m,]<-m1$coefficients[5,]
odhad_motorka_ar[m,]<-m2$coefficients[5,]
odhad_motorka_ind[m,]<-m3$coefficients[5,]
odhad_motorka_uns[m,]<-m4$coefficients[5,]
odhad_motorka_glm[m,]<-m5$coefficients[5,]
#uloženie hodnoty QIC
A_ex[m,5]<-QIC_ex
A_ar[m,5]<-QIC_ar
A_ind[m,5]<-QIC_ind
A_skut[m,5]<-QIC_skut
#výber najmenšej hodnoty QIC
if (min(A_ex[m,5],A_ar[m,5],A_ind[m,5],A_skut[m,5])==A_ex[m,5]){QIC_matrix[m,2]<-1}
if (min(A_ex[m,5],A_ar[m,5],A_ind[m,5],A_skut[m,5])==A_ar[m,5]){QIC_matrix[m,1]<-1}
if (min(A_ex[m,5],A_ar[m,5],A_ind[m,5],A_skut[m,5])==A_ind[m,5]){QIC_matrix[m,3]<-1}
if (min(A_ex[m,5],A_ar[m,5],A_ind[m,5],A_skut[m,5])==A_skut[m,5]){QIC_matrix[m,4]<-1}
}
##vyhodnotenie##
a<-(0.4) #tuto sa hodnota mení podľa toho, ktorý parameter sa vyhodnocuje
A_ar<-odhad_motorka_ar
A_ex<-odhad_motorka_ex
A_ind<-odhad_motorka_ind
A_uns<-odhad_motorka_uns
A_glm<-odhad_motorka_glm
A_QIC<-matrix(0,nrow=1000,ncol=5)
for(i in 1:1000){
if(QIC_matrix[i,1]==1){A_QIC[i,]<-A_ar[i,]}
if(QIC_matrix[i,2]==1){A_QIC[i,]<-A_ex[i,]}
if(QIC_matrix[i,3]==1){A_QIC[i,]<-A_ind[i,]}
if(QIC_matrix[i,4]==1){A_QIC[i,]<-A_uns[i,]}
}
#medze intervalov
dolna_mez_ar<-A_ar[,1]-qnorm(0.975, mean=0)*A_ar[,4]
horna_mez_ar<-A_ar[,1]+qnorm(0.975, mean=0)*A_ar[,4]
dolna_mez_ex<-A_ex[,1]-qnorm(0.975, mean=0)*A_ex[,4]
horna_mez_ex<-A_ex[,1]+qnorm(0.975, mean=0)*A_ex[,4]
dolna_mez_ind<-A_ind[,1]-qnorm(0.975, mean=0)*A_ind[,4]
horna_mez_ind<-A_ind[,1]+qnorm(0.975, mean=0)*A_ind[,4]
dolna_mez_uns<-A_uns[,1]-qnorm(0.975, mean=0)*A_uns[,4]
horna_mez_uns<-A_uns[,1]+qnorm(0.975, mean=0)*A_uns[,4]
dolna_mez_glm<-A_glm[,1]-qnorm(0.975, mean=0)*A_glm[,2]
horna_mez_glm<-A_glm[,1]+qnorm(0.975, mean=0)*A_glm[,2]
dolna_mez_QIC<-A_QIC[,1]-qnorm(0.975, mean=0)*A_QIC[,4]
horna_mez_QIC<-A_QIC[,1]+qnorm(0.975, mean=0)*A_QIC[,4]
pokrytie_ar<-matrix(0,nrow = 1000, ncol=1)
pokrytie_ex<-matrix(0,nrow = 1000, ncol=1)
pokrytie_ind<-matrix(0,nrow = 1000, ncol=1)
pokrytie_uns<-matrix(0,nrow = 1000, ncol=1)
pokrytie_glm<-matrix(0,nrow = 1000, ncol=1)
pokrytie_QIC<-matrix(0,nrow = 1000, ncol=1)
for(i in 1:1000){
if(dolna_mez_ar[i]a){pokrytie_ar[i]<-1}
if(dolna_mez_ex[i]a){pokrytie_ex[i]<-1}
if(dolna_mez_ind[i]a){pokrytie_ind[i]<-1}
if(dolna_mez_uns[i]a){pokrytie_uns[i]<-1}
if(dolna_mez_glm[i]a){pokrytie_glm[i]<-1}
if(dolna_mez_QIC[i]a){pokrytie_QIC[i]<-1}
}
MSE_ar<-1/1000*sum((A_ar[,1]-a)^2)*100
MSE_ex<-1/1000*sum((A_ex[,1]-a)^2)*100
MSE_ind<-1/1000*sum((A_ind[,1]-a)^2)*100
MSE_glm<-1/1000*sum((A_glm[,1]-a)^2)*100
MSE_uns<-1/1000*sum((A_uns[,1]-a)^2)*100
MSE_QIC<-1/1000*sum((A_QIC[,1]-a)^2)*100
interval_ar<-sum(qnorm(0.975, mean=0)*A_ar[,4])/500
interval_ex<-sum(qnorm(0.975, mean=0)*A_ex[,4])/500
interval_ind<-sum(qnorm(0.975, mean=0)*A_ind[,4])/500
interval_uns<-sum(qnorm(0.975, mean=0)*A_uns[,4])/500
interval_glm<-sum(qnorm(0.975, mean=0)*A_glm[,2])/500
interval_QIC<-sum(qnorm(0.975, mean=0)*A_QIC[,4])/500
sum(pokrytie_ar)/1000
sum(pokrytie_ex)/1000
sum(pokrytie_ind)/1000
sum(pokrytie_uns)/1000
sum(pokrytie_glm)/1000
sum(pokrytie_QIC)/1000