# Volně přístupný kód G-testu jsme doplnili o možnost simulované p-hodnoty metodou Monte Carlo


G.test <- function(x, y = NULL, correct=c("none", "williams", "yates"), 
                  p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = FALSE, B = 2000) {
  
# Log-likelihood tests of independence & goodness of fit
# Does Williams' and Yates' correction
# Does Monte Carlo simulation of p-values, via Patefield's algorithm
# G & q calculation from Sokal & Rohlf (1995) Biometry 3rd ed.
# TOI Yates' correction taken from Mike Camann's 2x2 G-test fn.
# GOF Yates' correction as described in Zar (2000)
# Part of code taken from stats' chisq.test()
  
  DNAME <- deparse(substitute(x))
  if (is.data.frame(x)) x <- as.matrix(x)
  if (is.matrix(x)) {
    if (min(dim(x)) == 1)
      x <- as.vector(x)
  }
  if (!is.matrix(x) && !is.null(y)) {
    if (length(x) != length(y))
      stop("x and y must have the same length")
    DNAME <- paste(DNAME, "and", deparse(substitute(y)))
    OK <- complete.cases(x, y)
    x <- as.factor(x[OK])
    y <- as.factor(y[OK])
    if ((nlevels(x) < 2) || (nlevels(y) < 2))
      stop("x and y must have at least 2 levels")
    x <- table(x, y)
  }
  if (any(x < 0) || any(is.na(x)))
    stop("all entries of x must be nonnegative and finite")
  if ((n <- sum(x)) == 0)
    stop("at least one entry of x must be positive")
  
  if (simulate.p.value) {
      setMETH <- function() METHOD <<- paste(METHOD,
                       "with simulated p-value\n\t (based on", B,
                       "replicates)")
    almost.1 <- 1 - 64 * .Machine$double.eps
  }
  
  correct <- match.arg(correct)
  
  if (is.matrix(x)) { # If x is matrix, do test of independence
    METHOD <- "Log likelihood ratio test (G-test) of independence"
    nrows<-nrow(x)
    ncols<-ncol(x)
    if (correct=="yates"){ # Do Yates' correction?
      if(dim(x)[1]!=2L || dim(x)[2]!=2L) # Check for 2x2 matrix
        stop("Yates' correction requires a 2 x 2 matrix")
      if((x[1,1]*x[2,2])-(x[1,2]*x[2,1]) > 0)
      {
        x <- x + 0.5
        diag(x) <- diag(x) - 1
        
      } else {
        x <- x - 0.5
        diag(x) <- diag(x) + 1
      }
    }
    
    sr <- apply(x,1,sum)
    sc <- apply(x,2,sum)
    E <- outer(sr,sc,"*")/n
    
    dimnames(E) <- dimnames(x)
    if (simulate.p.value) { # Monte Carlo p-value
      setMETH()
      tbls <- r2dtable(B, sr, sc)
      tmp <- NULL
      for (t in tbls) {
        g <- 0
        for (i in 1:nrows){
          for (j in 1:ncols){
            if (t[i,j] != 0) g <- g + t[i,j] * log(t[i,j]/E[i,j])
          }
        }
      tmp <- c(tmp, 2*g)
      }
      
      g <- 0
      for (i in 1:nrows){
         for (j in 1:ncols){
                 if (x[i,j] != 0) g <- g + x[i,j] * log(x[i,j]/E[i,j])
         }
      }
      STATISTIC <- G <- 2 * g
      PARAMETER <- NA
      PVAL <- (1 + sum(tmp >= almost.1 * STATISTIC)) / (B + 1)
    }
    else { # No Monte Carlo p-value
      g <- 0
      for (i in 1:nrows){
        for (j in 1:ncols){
          if (x[i,j] != 0) g <- g + x[i,j] * log(x[i,j]/E[i,j])
        }
      }
        q <- 1
        if (correct=="williams"){ # Do Williams' correction
          row.tot <- col.tot <- 0
          for (i in 1:nrows){ row.tot <- row.tot + 1/(sum(x[i,])) }
          for (j in 1:ncols){ col.tot <- col.tot + 1/(sum(x[,j])) }
          q <- 1 + ((n*row.tot-1)*(n*col.tot-1))/(6*n*(ncols-1)*(nrows-1))
        }
        STATISTIC <- G <- 2 * g / q
        PARAMETER <- (nrow(x)-1)*(ncol(x)-1)
        PVAL <- 1-pchisq(STATISTIC,df=PARAMETER)
        if(correct=="none")
          METHOD <- "Log likelihood ratio test (G-test) of independence without correction"
        if(correct=="williams")
          METHOD <- "Log likelihood ratio test (G-test) of independence with Williams' correction"
        if(correct=="yates")
          METHOD <- "Log likelihood ratio test (G-test) of independence with Yates' continuity correction"
      }
  }
  else { # x is not a matrix, so we do Goodness of Fit
    METHOD <- "Log likelihood ratio goodness of fit test (G-test)"
    
    if (length(dim(x)) > 2L) 
      stop("invalid 'x'")
    if (length(x) == 1L)
      stop("x must at least have 2 elements")
    if (length(x) != length(p))
      stop("'x' and 'p' must have the same number of elements")
    if (any(p < 0)) 
      stop("probabilities must be non-negative.")
    if (abs(sum(p) - 1) > sqrt(.Machine$double.eps)) {
      if (rescale.p) 
        p <- p/sum(p)
      else stop("probabilities must sum to 1.")
    }
    
    E <- n * p
    names(E) <- names(x)
    g <- 0
    for (i in 1:length(x)){
      if (x[i] != 0) g <- g + x[i] * log(x[i]/E[i])
    }
    q <- 1
    if (correct=="williams"){ # Do Williams' correction
      q <- 1+(length(x)+1)/(6*n)
    }
    STATISTIC <- G <- 2*g/q
    names(E) <- names(x)
    if(simulate.p.value) {
      setMETH()
      nx <- length(x)
      sm <- matrix(sample.int(nx, B*n, TRUE, prob = p),nrow = n)
      ss <- apply(sm, 2L, function(x) {
        g <- 0
        for (i in 1L:nx) {
          if (x[i] != 0) g <- g + x[i] * log(x[i]/E[i])
        }
      })
      PARAMETER <- NA
      PVAL <- (1 + sum(ss >= almost.1 * STATISTIC))/(B + 1)
    }
    else {
      PARAMETER <- length(x) - 1
      PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE)
    }
    if (correct=="yates"){ # Do Yates' correction
      if(length(x)!=2)
        stop("Yates' correction requires 2 data values")
      if ( (x[1]-E[1]) > 0.25) {
        x[1] <- x[1]-0.5
        x[2] <- x[2]+0.5
      }
      else if ( (E[1]-x[1]) > 0.25) {
        x[1] <- x[1]+0.5
        x[2] <- x[2]-0.5
      }
    }
    
    PARAMETER <- length(x) - 1
    PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE)
    if(correct=="williams")
      METHOD <- "Log likelihood ratio goodness of fit test (G-test) with Williams' correction"
    if(correct=="yates")
      METHOD <- "Log likelihood ratio goodness of fit test (G-test) with Yates' continuity correction"
  }
  names(STATISTIC) <- "G"
  names(PARAMETER) <- "X-squared df"
  names(PVAL) <- "p.value"
  structure(list(statistic=STATISTIC,parameter=PARAMETER,p.value=PVAL,
                 method=METHOD,data.name=DNAME, observed=x, expected=E),
            class="htest")
}