# Script file for omnibus test for multivariate normality # # For test statistic E_p of Doornik and Hansen 1994 # Recipe in their section 3 # # Citation: Doornik, J.A., and H. Hansen. 1994. An omnibus test # for univariate and multivariate normality. Working paper, # Nuffield College, Oxford University, Oxford, UK # URL: http://www.nuff.ox.ac.uk/Uwers/Doornik/papers/normal2.pdf # R Version (1.3) # 27 November 2012 # A. M. Ellison # # Version 1.2 # 9 February 2007 # A. M. Ellison # # Previous versions: # Version 1.1 # 10 December 2004 # # # Takes a p x n matrix of n observations on a p-dimensional vector # # Example: fishers.iris.setosa is an n row x p column matrix with # n = 50 observations and p = 4 measured variable dimensions. # # require(moments) #R library moments has functions for skewness and kurtosis not in base or stats use.data <- iris[iris$Species=="setosa",1:4] nobs <- as.double(nrow(use.data)) nvars <- as.double(ncol(use.data)) # In version 1.1, the previous 2 lines did not have the function "as.double" in them. # As a result, both nobs and nvars were stored as integers, not double-precision reals. # If nobs was large (order 10E3), then the computation of beta in step 9 returned NA, resulting # in NA's throughout the remainder of the output. # Changing nobs and nvars to double-precision reals eliminates the integer overflow. # Thanks to Matt Michel, Notre Dame University, for finding this bug when he tried to test for # MV-normality on a dataset with 8 variables and 615 observations. # Step 1: generate sample mean vector and covariance matrix mean.vector <- t(as.matrix(cov.wt(use.data)$center)) covariance.matrix <- cov.wt(use.data)$cov # Step 2: create a matrix with the reciprocals of the standard # deviation on the diagonal v.matrix <- diag(1/sqrt(diag(covariance.matrix))) # Step 3: generate the correlation matrix correlation.matrix <- cor(use.data) # Step 4: generate the lambda matrix as the diagonal matrix of # the eigenvalues of correlation.matrix lambda.matrix <- diag(eigen(correlation.matrix)$values) # Step 5: generate the H matrix, which is the matrix of the # corresponding eigenvectors h.matrix <- eigen(correlation.matrix)$vectors # Step 6: generate the Xhatprime matrix, which equals the matrix # of observations minus the means. We need the transpose (hence # prime) of this matrix: p x n, # which has p variables and n observations. xhat.matrix <- use.data-t(matrix(rep(mean.vector, nobs),nvars)) xhatprime.matrix <- t(xhat.matrix) # Step 7: Generate the p x n matrix of transformed variables, rprime.matrix # The following line was incorrect in version 1.0. # rprime.matrix <- h.matrix %*% sqrt(lambda.matrix) %*% t(h.matrix) %*% v.matrix %*% xhatprime.matrix # The error (sqrt(lambda.matrix) should be solve(lambda.matrix)^(1/2) was identified by # Ricardo Accioly, Brazil # here is the correct step 7 rprime.matrix <- h.matrix %*% solve(lambda.matrix)^(1/2) %*% t(h.matrix) %*% v.matrix %*% xhatprime.matrix # Step 8: computer skewness and kurtosis, row-wise, on rprime.matrix. # Doornik & Hansen define skewness as the third central moment divided by the # second central moment (population variance) to the 3/2 power, which is the # moment method for skewness in S-Plus. They define kurtosis as the fourth central moment # divided by the squared second central moment, in contrast to the normal # (momdent) method in which 3 is subracted from this quotient. b1prime <- apply(rprime.matrix, 1, skewness) b2prime <- apply(rprime.matrix, 1, kurtosis)+3 # Step 9: transform skewness into z1, following Appendix of Doornik and Hansen beta <- (3*(nobs^2+27*nobs-70)*(nobs+1)*(nobs+3))/((nobs-2)*(nobs+5)*(nobs+7)*(nobs+9)) w2 <- (-1)+sqrt(2*(beta-1)) delta <- 1/sqrt(log(sqrt(w2))) y1 <- b1prime*sqrt(((w2-1)/2)*(((nobs+1)*(nobs+3))/(6*(nobs-2)))) z1prime <- delta*log(y1 + sqrt(y1^2+1)) z1prime <- matrix(z1prime,nrow=1) # Step 10: transform kurtosis into z2, following Appendix of Doornik and Hansen del <- (nobs-3)*(nobs+1)*(nobs^2+15*nobs-4) aye <- (nobs-2)*(nobs+5)*(nobs+7)*(nobs^2+27*nobs-70)/(6*del) cee <- (nobs-7)*(nobs+5)*(nobs+7)*(nobs^2+2*nobs-5)/(6*del) kap <- (nobs+5)*(nobs+7)*(nobs^3+37*nobs^2+11*nobs-313)/(12*del) alp <- aye + (b1prime^2)*cee chi <- (b2prime-1-b1prime^2)*2*kap chi <- abs(chi) z2prime <- (((chi/(2*alp))^(1/3))-1+(1/(9*alp)))*sqrt(9*alp) z2prime <- matrix(z2prime,nrow=1) # Step 11: Calculate the test statistic Ep Ep <- z1prime %*% t(z1prime) + z2prime %*% t(z2prime) # Step 12: Calculate the p-value of Ep as chi-square with 2*p degrees of freedom dof <- 2*nvars sig.Ep <- 1-pchisq(Ep,2*nvars) Ep dof sig.Ep