Code for generation of correlation matrices with average constraints

  • This R-code was made by Jan Tuitman and makes it possible to generate correlation matrices that satisfy average constraints. See TUITMAN, J., VANDUFFEL, S., YAO, J. (2020). Correlation matrices with Average Constraints. Published version in Statistics and Probability letters. Please consult the paper first. More information and background available from Steven.vanduffel@vub.be

    blocks.r
    17-Aug-2016 14:44 954

    random_On <- function(n){

    A=matrix(,n,n)
    for(i in 1:n){
    for(j in 1:n){
    A[i,j]=rnorm(1)
    }
    }

    qrdata=qr(A)
    Q=qr.Q(qrdata)
    R=qr.R(qrdata)

    for(i in 1:n){
    if(R[i,i] < 1){
    for(j in 1:n){
    Q[j,i]=-Q[j,i]
    }
    }
    }

    return(Q)

    }

    gen_C_from_Ti <- function(Ti){

    k=length(Ti)
    n <- 0
    for(i in 1:k){
    n = n + nrow(Ti[[i]])
    }

    T<-matrix(,n,n)
    cnt<-0

    for(i in 1:k){
    Mi <- random_On(n)
    Tii <- Ti[[i]]
    ni <- nrow(Tii)
    for(j in 1:ni){
    cnt=cnt+1
    tij=Tii[j,]
    for(l in (ni+1):n){
    tij[l]=0
    }
    for(l in 1:n){
    T[cnt,l]=(Mi%*%tij)[l]
    }

    }
    }

    C <- T%*%t(T)

    return(C)

    }

    gen_C_from_Ci <- function(Ci){

    k=length(Ci)
    Ti=Ci

    for(i in 1:k){
    Ti[[i]] = t(chol(Ci[[i]]))
    }

    C = gen_C_from_Ti(Ti)

    return(C)
    }

    blocksvar.r
    17-Aug-2016 14:44 4.2K

    library(far)

    my_sum <- function(v,i,j){

    s <- 0

    if(i<=j){
    for (k in i:j){
    s <- s + v[k]
    }
    }

    return(s)
    }

    gen_li_forward_gaussian <- function(n,sigmai,S){

    li <- sigmai[1]
    if(2 <= n-1){
    for (i in 2:(n-1)){
    a <- max(S-my_sum(sigmai,i+1,n),sigmai[n]-my_sum(sigmai,i+1,n-1)-S,abs(li[i-1]-sigmai[i]))
    b <- min(S+my_sum(sigmai,i+1,n),li[i-1]+sigmai[i])
    ex <- li[i-1]+sigmai[i]*(S-li[i-1])/(my_sum(sigmai,i,n))
    if(ex >= a & ex <= b){
    done <- FALSE
    while(done == FALSE){
    r=rnorm(1,ex,min(ex-a,b-ex)/2)
    if(r >= a & r <= b){
    done <- TRUE
    }
    }
    }
    else{
    done <- FALSE
    while(done == FALSE){
    r=rnorm(1,(a+b)/2,(b-a)/4)
    if(r >= a & r <= b){
    done <- TRUE
    }
    }
    }
    li[i] <- r
    }
    }
    li[n] <- S
    return(li)
    }

    gen_T_from_li <- function(n,sigmai,li){

    rows <- vector(mode="list", length=n)
    s <- vector(mode="list", length=n)

    x <- rnorm(n)
    x <- x/sqrt(sum(x^2)) # random vector of length 1
    rows[[1]] <- x
    s[[1]] <- sigmai[1]*x

    for (i in 2:n){
    p <- (li[i]^2-li[i-1]^2-sigmai[i]^2)/(2*sigmai[i])
    x <- rnorm(n)
    y <- x - s[[i-1]]*sum(x*s[[i-1]])/sum(s[[i-1]]^2) # y is a random vector orthogonal to s[[i-1]]
    z <- s[[i-1]]*p/sum(s[[i-1]]^2) # is already what it should be,
    # but z not of length 1
    if(sum(z^2)<=1){
    ti <- z+y*sqrt((1-sum(z^2))/sum(y^2)) # add the right multiple of y to z, so that
    # ti is of length 1
    }
    else{
    ti <- z
    }
    rows[[i]]<-ti
    s[[i]]<-s[[i-1]]+sigmai[i]*ti
    }

    T <- matrix(,n,n)
    for (i in 1:n){
    for (j in 1:n){
    T[i,j] <- rows[[i]][j] # the matrix T with rows given by 'rows'
    }
    }

    return(T)
    }

    random_On <- function(n){

    A=matrix(,n,n)
    for(i in 1:n){
    for(j in 1:n){
    A[i,j]=rnorm(1)
    }
    }

    qrdata=qr(A)
    Q=qr.Q(qrdata)
    R=qr.R(qrdata)

    for(i in 1:n){
    if(R[i,i] < 1){
    for(j in 1:n){
    Q[j,i]=-Q[j,i]
    }
    }
    }

    return(Q)

    }

    random_On_v_to_w <- function(n,v,w){

    V = orthonormalization(v) # Gramm-Schmidt orthogonalization from package "far"
    W = orthonormalization(w)

    g = random_On(n-1)

    M = matrix(0,nrow=n,ncol=n) # initialisation
    M[1,1] = 1
    for(i in 2:n){
    for(j in 2:n){
    M[i,j] = g[i-1,j-1]
    }
    }

    return(W %*% M %*% solve(V)) # solve is matrix inverse, confusing R syntax!

    }

    gen_w_from_Si_S <-function(k,Si,S){

    li = gen_li_forward_gaussian(k,Si,S)

    W = gen_T_from_li(k,Si,li)

    for(i in 1:k){
    for(j in 1:k){
    W[i,j]=Si[i]*W[i,j]
    }
    }

    return(W)
    }

    gen_C_from_Ti_S<-function(Ti,sigmaij,S){

    k=length(Ti)
    n <- 0
    for(i in 1:k){
    n = n + nrow(Ti[[i]])
    }

    V=matrix(0,nrow=k,ncol=n)
    Si=0
    for(i in 1:k){
    vec=0
    for(j in 1:nrow(Ti[[i]])){
    vec=vec+sigmaij[[i]][j]*Ti[[i]][j,]
    }
    for(j in 1:nrow(Ti[[i]])){
    V[i,j]=vec[j]
    }
    Si[[i]]=sqrt(sum(vec^2))
    }

    W = gen_w_from_Si_S(k,Si,S)

    Wnew=matrix(0,nrow=k,ncol=n)
    for(i in 1:k){
    for(j in 1:k){
    Wnew[i,j] = W[i,j]
    }
    }

    M = random_On(n)
    W = Wnew %*% M

    T = matrix(0,nrow=n,ncol=n)
    ct=0
    for(i in 1:k){
    Mi=random_On_v_to_w(n,V[i,],W[i,])
    for(j in 1:nrow(Ti[[i]])){
    ct = ct+1
    tij=matrix(0,nrow=n,ncol=1)
    for(l in 1:nrow(Ti[[i]])){
    tij[l,1]=Ti[[i]][j,l]
    }
    T[ct,]=Mi%*%tij
    }
    }

    C = T%*%t(T)

    return(C)

    }

    gen_C_from_Ci_S<-function(Ci,sigmaij,S){

    k = length(Ci)
    Ti = Ci

    for(i in 1:k){
    Ti[[i]] = t(chol(Ci[[i]]))
    }

    C = gen_C_from_Ti_S(Ti,sigmaij,S)

    return(C)

    }

    test_cor <- function(n,sigmai,C,S){

    s <- 0
    for (i in 1:n){
    for (j in 1:n){
    s <- s+C[i,j]*sigmai[i]*sigmai[j]
    }
    }
    return(S-abs(s)^(1/2)) # should be close to zero
    }

    cor.r
    17-Aug-2016 14:44 4.0K

    gen_li_forward_uniform <- function(n,rho){

    L <- sqrt(n+rho*(n^2-n))

    li <- 1
    if(2 <= n-2){
    for (i in 2:(n-2)){
    a <- max(L-(n-i),abs(li[i-1]-1))
    b <- min(L+(n-i),li[i-1]+1)
    li[i] <- runif(1,min=a,max=b) # uniform distribution
    }
    }
    a <- max(abs(li[n-2]-1),abs(L-1))
    b <- min(li[n-2]+1,L+1)
    li[n-1] <- runif(1,min=a,max=b)
    li[n] <- L
    return(li)
    }

    gen_li_backward_uniform <- function(n,rho){

    li <- 0
    for (i in 2:n){
    li[i] <- 0 # initialisation
    }

    L <- sqrt(n+rho*(n^2-n))

    li[n] <- L
    if(2 <= n-1){
    for (i in (n-1):2){
    a <- abs(li[i+1]-1)
    b <- min(li[i+1]+1,i)
    li[i] <- runif(1,min=a,max=b)
    }
    }
    li[1] <- 1
    return(li)
    }

    gen_li_forward_gaussian <- function(n,rho){

    L <- sqrt(n+rho*(n^2-n))

    li <- 1
    if(2 <= n-2){
    for (i in 2:(n-2)){
    a <- max(L-(n-i),abs(li[i-1]-1))
    b <- min(L+(n-i),li[i-1]+1)
    ex <- li[i-1]+(L-li[i-1])/(n-i+1)
    if(ex >= a & ex <= b){
    done <- FALSE
    while(done == FALSE){
    r=rnorm(1,ex,min(ex-a,b-ex)/2)
    if(r >= a & r <= b){
    done <- TRUE
    }
    }
    } else{
    done <- FALSE
    while(done == FALSE){
    r=rnorm(1,(a+b)/2,(b-a)/4)
    if(r >= a & r <= b){
    done <- TRUE
    }
    }
    }
    li[i] <- r
    }
    }
    a <- max(abs(li[n-2]-1),abs(L-1))
    b <- min(li[n-2]+1,L+1)
    done <- FALSE
    while(done == FALSE){
    r=rnorm(1,(a+b)/2,(b-a)/4)
    if(r >= a & r <= b){
    done <- TRUE
    }
    }
    li[n-1] <- r
    li[n] <- L
    return(li)
    }

    gen_li_backward_gaussian <- function(n,rho){

    li <- 0
    for(i in (1:n)){
    li[i] <- 0 # initialisation
    }

    L <- sqrt(n+rho*(n^2-n))

    li[n] <- L

    if(2 <= n-1){
    for(i in ((n-1):2)){
    a <- abs(li[i+1]-1)
    b <- min(li[i+1]+1,i)
    ex <- li[i+1]-(li[i+1]-1)/i
    if(ex >=a & ex <= b){
    done <- FALSE
    while(done == FALSE){
    r=rnorm(1,ex,min(ex-a,b-ex)/2)
    if(r >= a & r <= b){
    done <- TRUE
    }
    }
    }
    else{
    done <- FALSE
    while(done == FALSE){
    r=rnorm(1,(a+b)/2,(b-a)/4)
    if(r >= a & r <= b){
    done <- TRUE
    }
    }
    }
    li[i] <- r
    }
    }

    li[1] <- 1

    return(li)

    }

    gen_T_from_li <- function(n,li){

    rows <- vector(mode="list", length=n)
    s <- vector(mode="list", length=n)

    x <- rnorm(n)
    x <- x/sqrt(sum(x^2)) # random vector of length 1
    rows[[1]] <- x
    s[[1]] <- x

    for (i in 2:n){
    p <- (li[i]^2-li[i-1]^2-1)/2
    x <- rnorm(n)
    y <- x - s[[i-1]]*sum(x*s[[i-1]])/sum(s[[i-1]]^2) # y is a random vector orthogonal to s[[i-1]]
    z <- s[[i-1]]*p/sum(s[[i-1]]^2) # is already what it should be,
    # but z not of length 1
    if(sum(z^2)<=1){
    ti <- z+y*sqrt((1-sum(z^2))/sum(y^2)) # add the right multiple of y to z, so that
    # ti is of length 1
    }
    else{
    ti <- z
    }
    rows[[i]]<-ti
    s[[i]]<-s[[i-1]]+ti
    }

    T <- matrix(,n,n)
    for (i in 1:n){
    for (j in 1:n){
    T[i,j] <- rows[[i]][j] # the matrix T with rows given by 'rows'
    }
    }

    return(T)
    }

    gen_C_from_T <- function(T){

    C <- T %*% t(T) # correlation matrix C=T*transpose(T)

    return(C)

    }

    gen_C_from_li <- function(n,li){

    T <- gen_T_from_li(n,li)
    C <- T %*% t(T) # correlation matrix C=T*transpose(T)

    return(C)

    }

    gen_C_from_rho <- function(n,rho){

    li <- gen_li_forward_gaussian(n,rho)
    C <- gen_C_from_li(n,li)

    return(C)

    }

    test_cor <- function(n,rho,C){

    S <- 0
    for (i in 1:n){
    for (j in 1:n){
    if(i != j){S <- S + C[i,j]}
    }
    }
    return(S-rho*(n^2-n)) # should be close to zero
    }

    ######################## EXAMPLES blocks.r #########################

    C1 = matrix(c(1.0000000, 0.3581445, 0.6069251, 0.3581445, 1.0000000, 0.5349304, 0.6069251, 0.5349304, 1.0000000),nrow=3,ncol=3)
    C2 = matrix(c(1.0000000, 0.8115993, 0.1405289, 0.8115993, 1.0000000, 0.0478718, 0.1405289, 0.0478718, 1.0000000),nrow=3,ncol=3)
    C3 = matrix(c(1.00000000, -0.01722136, 0.05192323, -0.01722136, 1.00000000, 0.71529813, 0.05192323, 0.71529813, 1.00000000),nrow=3,ncol=3)

    # C1,C2 and C3 are 3x3 correlation matrices with average correlation 1/2,1/3,1/4, respectively.

    print(C1)
    print(C2)
    print(C3)

    Ci = list(C1,C2,C3)

    C = gen_C_from_Ci(Ci) # 9x9 correlation matrix with blocks C1,C2,C3 along the diagonal

    print(C)

    ######################## EXAMPLES blocksvar.r #########################

    sigma1=c(1,2,3)
    sigma2=c(4,5,6)
    sigma3=c(7,8,9)
    sigmai=c(1,2,3,4,5,6,7,8,9)
    sigmaij=list(sigma1,sigma2,sigma3)

    C1 = matrix(c(1.0000000, 0.3581445, 0.6069251, 0.3581445, 1.0000000, 0.5349304, 0.6069251, 0.5349304, 1.0000000),nrow=3,ncol=3)
    C2 = matrix(c(1.0000000, 0.8115993, 0.1405289, 0.8115993, 1.0000000, 0.0478718, 0.1405289, 0.0478718, 1.0000000),nrow=3,ncol=3)
    C3 = matrix(c(1.00000000, -0.01722136, 0.05192323, -0.01722136, 1.00000000, 0.71529813, 0.05192323, 0.71529813, 1.00000000),nrow=3,ncol=3)
    Ci=list(C1,C2,C3)

    print(C1)
    print(C2)
    print(C3)

    S=5

    C=gen_C_from_Ci_S(Ci,sigmaij,S)

    print(C)

    test_cor(9,sigmai,C,S)

    ######################## EXAMPLES cor.r #########################

    n <- 10 # number of variables
    rho <- 0.4 # average correlation,

    # Example 1: forward uniform example

    l1 <- gen_li_forward_uniform(n,rho)
    C1 <- gen_C_from_li(n,l1)
    #print(C1)
    det(C1)^(1/n)
    test_cor(n,rho,C1)

    # Example 2: backward uniform example

    l2 <- gen_li_backward_uniform(n,rho)
    C2 <- gen_C_from_li(n,l2)
    #print(C2)
    det(C2)^(1/n)
    test_cor(n,rho,C2)

    # Example 3: forward gaussian example

    l3 <- gen_li_forward_gaussian(n,rho)
    C3 <- gen_C_from_li(n,l3)
    #print (C3)
    det(C3)^(1/n)
    test_cor(n,rho,C3)

    # Example 4: backward gaussian example

    l4 <- gen_li_backward_gaussian(n,rho)
    C4 <- gen_C_from_li(n,l4)
    #print (C4)
    det(C4)^(1/n)
    test_cor(n,rho,C4)

    # Example 1: forward uniform example

    sigmai <- 1
    for(i in 2:9){
    sigmai[i] = 1
    }
    sigmai[10] = 3
    n=10
    S=2

    li <- gen_li_forward_uniform(n,sigmai,S)
    C <- gen_C_from_li(n,sigmai,li)
    test_cor(n,sigmai,C,S)

    # Example 2: backward uniform example

    sigmai <- 3
    for(i in 2:10){
    sigmai[i] = 1
    }
    n=10
    S=0

    li <- gen_li_backward_uniform(n,sigmai,S)
    C <- gen_C_from_li(n,sigmai,li)
    test_cor(n,sigmai,C,S)

    # Example 3: forward gaussian example

    sigmai <- 1
    for(i in 2:9){
    sigmai[i] = 1
    }
    sigmai[10] = 5
    n=10
    S=3

    li <- gen_li_forward_gaussian(n,sigmai,S)
    C <- gen_C_from_li(n,sigmai,li)
    test_cor(n,sigmai,C,S)

    # Example 4: backward gaussian example

    sigmai <- 3
    for(i in 2:10){
    sigmai[i] = 1
    }
    n=10
    S=0

    li <- gen_li_backward_gaussian(n,sigmai,S)
    C <- gen_C_from_li(n,sigmai,li)
    test_cor(n,sigmai,C,S)

    example_var.r
    17-Aug-2016 14:44 860

    # Example 1: forward uniform example

    sigmai <- 1
    for(i in 2:9){
    sigmai[i] = 1
    }
    sigmai[10] = 3
    n=10
    S=2

    li <- gen_li_forward_uniform(n,sigmai,S)
    C <- gen_C_from_li(n,sigmai,li)
    test_cor(n,sigmai,C,S)

    # Example 2: backward uniform example

    sigmai <- 3
    for(i in 2:10){
    sigmai[i] = 1
    }
    n=10
    S=0

    li <- gen_li_backward_uniform(n,sigmai,S)
    C <- gen_C_from_li(n,sigmai,li)
    test_cor(n,sigmai,C,S)

    # Example 3: forward gaussian example

    sigmai <- 1
    for(i in 2:9){
    sigmai[i] = 1
    }
    sigmai[10] = 5
    n=10
    S=3

    li <- gen_li_forward_gaussian(n,sigmai,S)
    C <- gen_C_from_li(n,sigmai,li)
    test_cor(n,sigmai,C,S)

    # Example 4: backward gaussian example

    sigmai <- 3
    for(i in 2:10){
    sigmai[i] = 1
    }
    n=10
    S=0

    li <- gen_li_backward_gaussian(n,sigmai,S)
    C <- gen_C_from_li(n,sigmai,li)
    test_cor(n,sigmai,C,S)

    var.r
    17-Aug-2016 14:44 4.2K

    my_sum <- function(v,i,j){

    s <- 0

    if(i<=j){
    for (k in i:j){
    s <- s + v[k]
    }
    }

    return(s)
    }

    gen_li_forward_uniform <- function(n,sigmai,S){

    li <- sigmai[1]
    if(2 <= n-1){
    for (i in 2:(n-1)){
    a <- max(S-my_sum(sigmai,i+1,n),sigmai[n]-my_sum(sigmai,i+1,n-1)-S,abs(li[i-1]-sigmai[i]))
    b <- min(S+my_sum(sigmai,i+1,n),li[i-1]+sigmai[i])
    li[i] <- runif(1,min=a,max=b)
    }
    }
    li[n] <- S
    return(li)
    }

    gen_li_backward_uniform <- function(n,sigmai,S){

    li <- 0
    for (i in 2:n){
    li[i] <- 0 # initialisation
    }

    li[n] <- S
    if(2 <= n-1){
    for (i in (n-1):2){
    a <- max(sigmai[1]-my_sum(sigmai,2,i),abs(li[i+1]-sigmai[i+1]))
    b <- min(li[i+1]+sigmai[i+1],my_sum(sigmai,1,i))
    li[i] <- runif(1,min=a,max=b)
    }
    }
    li[1] <- sigmai[1]
    return(li)
    }

    gen_li_forward_gaussian <- function(n,sigmai,S){

    li <- sigmai[1]

    if(2 <= n-1){
    for (i in 2:(n-1)){
    a <- max(S-my_sum(sigmai,i+1,n),sigmai[n]-my_sum(sigmai,i+1,n-1)-S,abs(li[i-1]-sigmai[i]))
    b <- min(S+my_sum(sigmai,i+1,n),li[i-1]+sigmai[i])
    ex <- li[i-1]+sigmai[i]*(S-li[i-1])/(my_sum(sigmai,i,n))
    if(ex >= a & ex <= b){i
    done <- FALSE
    while(done == FALSE){
    r=rnorm(1,ex,min(ex-a,b-ex)/2)
    if(r >= a & r <= b){
    done <- TRUE
    }
    }
    }
    else{
    done <- FALSE
    while(done == FALSE){
    r=rnorm(1,(a+b)/2,(b-a)/4)
    if(r >= a & r <= b){
    done <- TRUE
    }
    }
    }
    li[i] <- r
    }
    }
    li[n] <- S
    return(li)
    }

    gen_li_backward_gaussian <- function(n,sigmai,S){

    li <- 0
    for(i in (1:n)){
    li[i] <- 0 # initialisation
    }

    li[n] <- S

    if(2 <= n-1){
    for(i in ((n-1):2)){
    a <- max(abs(li[i+1]-sigmai[i+1]),sigmai[1]-my_sum(sigmai,2,i))
    b <- min(li[i+1]+sigmai[i+1],my_sum(sigmai,1,i))
    ex <- li[i+1]-(li[i+1]-sigmai[1])/my_sum(sigmai,1,i)
    if(ex >=a & ex <= b){
    done <- FALSE
    while(done == FALSE){
    r=rnorm(1,ex,min(ex-a,b-ex)/2)
    if(r >= a & r <= b){
    done <- TRUE
    }
    }
    } else{
    done <- FALSE
    while(done == FALSE){
    r=rnorm(1,(a+b)/2,(b-a)/4)
    if(r >= a & r <= b){
    done <- TRUE
    }
    }
    }
    li[i] <- r
    }
    }
    li[1] <- sigmai[1]

    return(li)

    }

    gen_T_from_li <- function(n,sigmai,li){

    rows <- vector(mode="list", length=n)
    s <- vector(mode="list", length=n)

    x <- rnorm(n)
    x <- x/sqrt(sum(x^2)) # random vector of length 1
    rows[[1]] <- x
    s[[1]] <- sigmai[1]*x

    for (i in 2:n){
    p <- (li[i]^2-li[i-1]^2-sigmai[i]^2)/(2*sigmai[i])
    x <- rnorm(n)
    y <- x - s[[i-1]]*sum(x*s[[i-1]])/sum(s[[i-1]]^2) # y is a random vector orthogonal to s[[i-1]]
    z <- s[[i-1]]*p/sum(s[[i-1]]^2) # is already what it should be,
    # but z not of length 1
    if(sum(z^2)<=1){
    ti <- z+y*sqrt((1-sum(z^2))/sum(y^2)) # add the right multiple of y to z, so that
    # ti is of length 1
    } else{
    ti <- z
    }
    rows[[i]]<-ti
    s[[i]]<-s[[i-1]]+sigmai[i]*ti
    }

    T <- matrix(,n,n)
    for (i in 1:n){
    for (j in 1:n){
    T[i,j] <- rows[[i]][j] # the matrix T with rows given by 'rows'
    }
    }

    return(T)
    }

    gen_C_from_T <- function(T){

    C <- T %*% t(T) # correlation matrix C=T*transpose(T)

    return(C)
    }

    gen_C_from_li <- function(n,sigmai,li){

    T <- gen_T_from_li(n,sigmai,li)
    C <- T %*% t(T) # correlation matrix C=T*transpose(T)

    return(C)

    }

    gen_C_from_S <- function(n,sigmai,S){

    li <- gen_li_forward_gaussian(n,sigmai,S)
    C <- gen_C_from_li(n,sigmai,li)

    return(C)

    }

    test_cor <- function(n,sigmai,C,S){

    s <- 0
    for (i in 1:n){
    for (j in 1:n){
    s <- s+C[i,j]*sigmai[i]*sigmai[j]
    }
    }
    return(S-abs(s)^(1/2)) # should be close to zero
    }