## ----------------------------------------------------------------------------- library(sommer) data(DT_example) DT <- DT_example A <- A_example ans1 <- mmes(Yield~1, random= ~ Name + Env + Env:Name + Env:Block, rcov= ~ units, nIters=10, data=DT, verbose = FALSE) summary(ans1)$varcomp (n.env <- length(levels(DT$Env))) vpredict(ans1, h2 ~ V1 / ( V1 + (V3/n.env) + (V5/(2*n.env)) ) ) ## ----------------------------------------------------------------------------- data(DT_cpdata) DT <- DT_cpdata GT <- GT_cpdata MP <- MP_cpdata DT$idd <-DT$id; DT$ide <-DT$id ### look at the data A <- A.mat(GT) # additive relationship matrix D <- D.mat(GT) # dominance relationship matrix E <- E.mat(GT) # epistatic relationship matrix ans.ADE <- mmes(color~1, random=~vsm(ism(id),Gu=A) + vsm(ism(idd),Gu=D), rcov=~units, nIters=10, data=DT,verbose = FALSE) (summary(ans.ADE)$varcomp) vpredict(ans.ADE, h2 ~ (V1) / ( V1+V3) ) # narrow sense vpredict(ans.ADE, h2 ~ (V1+V2) / ( V1+V2+V3) ) # broad-sense ## ----fig.show='hold'---------------------------------------------------------- # data(DT_cornhybrids) # DT <- DT_cornhybrids # DTi <- DTi_cornhybrids # GT <- GT_cornhybrids # ### fit the model # modFD <- mmes(Yield~1, # random=~ vsm(atr(Location,c("3","4")),ism(GCA2)), # rcov= ~ vsm(dsm(Location),ism(units)), nIters=10, # returnParam = F, # data=DT, verbose = FALSE) # summary(modFD) ## ----------------------------------------------------------------------------- # data(DT_cornhybrids) # DT <- DT_cornhybrids # DTi <- DTi_cornhybrids # GT <- as(as(as( GT_cornhybrids, "dMatrix"), "generalMatrix"), "CsparseMatrix") # GT[1:4,1:4] # DT=DT[with(DT, order(Location)), ] # ### fit the model # modFD <- mmes(Yield~1, # random=~ vsm(atr(Location,c("3","4")),ism(GCA2),Gu=GT), # rcov= ~ vsm(dsm(Location),ism(units)), nIters=10, # data=DT, verbose = FALSE) # summary(modFD) ## ----------------------------------------------------------------------------- data(DT_cpdata) DT <- DT_cpdata GT <- GT_cpdata MP <- MP_cpdata ### look at the data A <- A.mat(GT) # additive relationship matrix ans <- mmes(color~1, random=~vsm(ism(id),Gu=A), rcov=~units, nIters=10, data=DT, verbose = FALSE) (summary(ans.ADE)$varcomp) vpredict(ans, h2 ~ (V1) / ( V1+V2) ) ## ----------------------------------------------------------------------------- # data(DT_cornhybrids) # DT <- DT_cornhybrids # DTi <- DTi_cornhybrids # GT <- GT_cornhybrids # # modFD <- mmes(Yield~Location, # random=~GCA1+GCA2+SCA, # rcov=~units, nIters=10, # data=DT, verbose = FALSE) # (suma <- summary(modFD)$varcomp) # Vgca <- sum(suma[1:2,1]) # Vsca <- suma[3,1] # Ve <- suma[4,1] # Va = 4*Vgca # Vd = 4*Vsca # Vg <- Va + Vd # (H2 <- Vg / (Vg + (Ve)) ) # (h2 <- Va / (Vg + (Ve)) ) ## ----------------------------------------------------------------------------- data("DT_halfdiallel") DT <- DT_halfdiallel head(DT) DT$femalef <- as.factor(DT$female) DT$malef <- as.factor(DT$male) DT$genof <- as.factor(DT$geno) #### model using overlay modh <- mmes(sugar~1, random=~vsm(ism(overlay(femalef,malef)) ) + genof, nIters=10, data=DT, verbose = FALSE) summary(modh)$varcomp ## ----------------------------------------------------------------------------- data(DT_wheat) DT <- DT_wheat GT <- GT_wheat[,1:200] colnames(DT) <- paste0("X",1:ncol(DT)) DT <- as.data.frame(DT);DT$id <- as.factor(rownames(DT)) # select environment 1 rownames(GT) <- rownames(DT) K <- A.mat(GT) # additive relationship matrix colnames(K) <- rownames(K) <- rownames(DT) # GBLUP pedigree-based approach set.seed(12345) y.trn <- DT vv <- sample(rownames(DT),round(nrow(DT)/5)) y.trn[vv,"X1"] <- NA head(y.trn) ## GBLUP ans <- mmes(X1~1, random=~vsm(ism(id),Gu=K), rcov=~units,nIters=10, data=y.trn, verbose = FALSE) # kinship based cor(ans$u[vv,] ,DT[vv,"X1"], use="complete") ## rrBLUP ans2 <- mmes(X1~1, random=~vsm(ism(GT), buildGu = FALSE), rcov=~units, getPEV = TRUE, nIters=10, # you do more iterations data=y.trn, verbose = FALSE) # kinship based u <- GT %*% as.matrix(ans2$uList$`vsm(ism(GT), buildGu = FALSE`) # BLUPs for individuals rownames(u) <- rownames(GT) cor(u[vv,],DT[vv,"X1"]) # same correlation # the same can be applied in multi-response models in GBLUP or rrBLUP ## ----------------------------------------------------------------------------- # data(DT_ige) # DT <- DT_ige # Af <- A_ige # An <- A_ige ## Direct genetic effects model # modDGE <- mmes(trait ~ block, # random = ~ focal, # rcov = ~ units, nIters=30, # data = DT, verbose=FALSE) # summary(modDGE)$varcomp ## ----------------------------------------------------------------------------- # data(DT_ige) # DT <- DT_ige # A <- A_ige # # ## Indirect genetic effects model # modIGE <- mmes(trait ~ block, dateWarning = FALSE, # random = ~ focal + neighbour, verbose = FALSE, # rcov = ~ units, nIters=100, # data = DT) # summary(modIGE)$varcomp ## ----------------------------------------------------------------------------- # ### Indirect genetic effects model # modIGE <- mmes(trait ~ block, dateWarning = FALSE, # random = ~ covm( vsm(ism(focal)), vsm(ism(neighbour)) ), # rcov = ~ units, nIters=100, verbose = FALSE, # data = DT) # summary(modIGE)$varcomp ## ----------------------------------------------------------------------------- ### Indirect genetic effects model # Ai <- solve(A_ige + diag(1e-5, nrow(A_ige),nrow(A_ige) )) # Ai <- as(as(as( Ai, "dMatrix"), "generalMatrix"), "CsparseMatrix") # # Indirect genetic effects model with covariance between DGE and IGE using relationship matrices # modIGE <- mmes(trait ~ block, dateWarning = FALSE, # random = ~ covm( vsm(ism(focal), Gu=Ai), vsm(ism(neighbour), Gu=Ai) ), # rcov = ~ units, nIters=100, verbose = FALSE, # data = DT) # summary(modIGE)$varcomp ## ----------------------------------------------------------------------------- data(DT_technow) DT <- DT_technow Md <- (Md_technow*2) - 1 Mf <- (Mf_technow*2) - 1 Ad <- A.mat(Md) Af <- A.mat(Mf) Adi <- solve(Ad + diag(1e-4,ncol(Ad),ncol(Ad))) Adi <- as(as(as( Adi, "dMatrix"), "generalMatrix"), "CsparseMatrix") attr(Adi, 'inverse')=TRUE Afi <- solve(Af + diag(1e-4,ncol(Af),ncol(Af))) Afi <- as(as(as( Afi, "dMatrix"), "generalMatrix"), "CsparseMatrix") attr(Afi, 'inverse')=TRUE # RUN THE PREDICTION MODEL y.trn <- DT vv1 <- which(!is.na(DT$GY)) vv2 <- sample(vv1, 100) y.trn[vv2,"GY"] <- NA anss2 <- mmes(GY~1, henderson=TRUE, random=~vsm(ism(dent),Gu=Adi) + vsm(ism(flint),Gu=Afi), rcov=~units, nIters=15, data=y.trn, verbose = FALSE) summary(anss2)$varcomp # zu1 <- model.matrix(~dent-1,y.trn) %*% anss2$uList$`vsm(ism(dent), Gu = Adi)` # zu2 <- model.matrix(~flint-1,y.trn) %*% anss2$uList$`vsm(ism(flint), Gu = Afi)` # u <- zu1+zu2+as.vector(anss2$b) # cor(u[vv2,], DT$GY[vv2]) ## ----------------------------------------------------------------------------- # data(DT_cpdata) # DT <- DT_cpdata # GT <- GT_cpdata # MP <- MP_cpdata # A <- A.mat(GT) # DT2 <- stackTrait(DT, traits = c("color","Yield")) # head(DT2$long) # A <- A.mat(GT) # additive relationship matrix # # if using henderson=TRUE you need to provide the inverse # Ai <- solve(A + diag(1e-4,ncol(A),ncol(A))) # Ai <- as(as(as( Ai, "dMatrix"), "generalMatrix"), "CsparseMatrix") # #### be patient take some time # ansm <- mmes( valueS ~ trait, # henderson=TRUE, # random=~ vsm(usm(trait), ism(id), Gu=A), # rcov=~ vsm(dsm(trait), ism(units)), # data=DT2$long) # cov2cor(ansm$theta[[1]]) ## ----------------------------------------------------------------------------- library(sommer) data("DT_cpdata") DT <- DT_cpdata M <- GT_cpdata ################ # MARKER MODEL ################ mix.marker <- mmes(color~1, random=~Rowf+vsm(ism(M)), rcov=~units,data=DT, verbose = FALSE) me.marker <- mix.marker$uList$`vsm(ism(M` ################ # PARTITIONED GBLUP MODEL ################ MMT <-tcrossprod(M) ## MM' = additive relationship matrix MMTinv<-solve(MMT) ## inverse MTMMTinv<-t(M)%*%MMTinv # M' %*% (M'M)- mix.part <- mmes(color~1, random=~Rowf+vsm(ism(id), Gu=MMT), rcov=~units,data=DT, verbose = FALSE) #convert BLUPs to marker effects me=M'(M'M)- u me.part<-MTMMTinv%*%matrix(mix.part$uList$`vsm(ism(id), Gu = MMT`,ncol=1) # compare marker effects between both models plot(me.marker,me.part) ## ----------------------------------------------------------------------------- # data("DT_wheat") # rownames(GT_wheat) <- rownames(DT_wheat) # G <- A.mat(GT_wheat) # Y <- data.frame(DT_wheat) # # # make the decomposition # UD<-eigen(G) # get the decomposition: G = UDU' # U<-UD$vectors # D<-diag(UD$values)# This will be our new 'relationship-matrix' # rownames(D) <- colnames(D) <- rownames(G) # X<-model.matrix(~1, data=Y) # here: only one fixed effect (intercept) # UX<-t(U)%*%X # premultiply X and y by U' # UY <- t(U) %*% as.matrix(Y) # multivariate # # # dataset for decomposed model # DTd<-data.frame(id = rownames(G) ,UY, UX =UX[,1]) # DTd$id<-as.character(DTd$id) # # modeld <- mmes(cbind(X1,X2) ~ UX - 1, # random = ~vsm(id,Gu=D), # rcov = ~vsm(units), # data=DTd, verbose = FALSE) # # # dataset for normal model # DTn<-data.frame(id = rownames(G) , DT_wheat) # DTn$id<-as.character(DTn$id) # # modeln <- mmes(cbind(X1,X2) ~ 1, # random = ~vsm(id,Gu=G), # rcov = ~vsm(units), # data=DTn, verbose = FALSE) # # ## compare regular and transformed blups # plot(x=(solve(t(U)))%*%modeld$U$`u:id`$X2[colnames(D)], # y=modeln$U$`u:id`$X2[colnames(D)], xlab="UDU blup", # ylab="blup") ## ----------------------------------------------------------------------------- data(DT_expdesigns) DT <- DT_expdesigns$car1 DT <- aggregate(yield~set+male+female+rep, data=DT, FUN = mean) DT$setf <- as.factor(DT$set) DT$repf <- as.factor(DT$rep) DT$malef <- as.factor(DT$male) DT$femalef <- as.factor(DT$female) #levelplot(yield~male*female|set, data=DT, main="NC design I") ############################## ## Expected Mean Square method ############################## mix1 <- lm(yield~ setf + setf:repf + femalef:malef:setf + malef:setf, data=DT) MS <- anova(mix1); MS ms1 <- MS["setf:malef","Mean Sq"] ms2 <- MS["setf:femalef:malef","Mean Sq"] mse <- MS["Residuals","Mean Sq"] nrep=2 nfem=2 Vfm <- (ms2-mse)/nrep Vm <- (ms1-ms2)/(nrep*nfem) ## Calculate Va and Vd Va=4*Vm # assuming no inbreeding (4/(1+F)) Vd=4*(Vfm-Vm) # assuming no inbreeding(4/(1+F)^2) Vg=c(Va,Vd); names(Vg) <- c("Va","Vd"); Vg ############################## ## REML method ############################## mix2 <- mmes(yield~ setf + setf:repf, random=~femalef:malef:setf + malef:setf, nIters=10, data=DT, verbose = FALSE) vc <- summary(mix2)$varcomp; vc Vfm <- vc[1,"VarComp"] Vm <- vc[2,"VarComp"] ## Calculate Va and Vd Va=4*Vm # assuming no inbreeding (4/(1+F)) Vd=4*(Vfm-Vm) # assuming no inbreeding(4/(1+F)^2) Vg=c(Va,Vd); names(Vg) <- c("Va","Vd"); Vg ## ----------------------------------------------------------------------------- DT <- DT_expdesigns$car2 DT <- aggregate(yield~set+male+female+rep, data=DT, FUN = mean) DT$setf <- as.factor(DT$set) DT$repf <- as.factor(DT$rep) DT$malef <- as.factor(DT$male) DT$femalef <- as.factor(DT$female) #levelplot(yield~male*female|set, data=DT, main="NC desing II") head(DT) N=with(DT,table(female, male, set)) nmale=length(which(N[1,,1] > 0)) nfemale=length(which(N[,1,1] > 0)) nrep=table(N[,,1]) nrep=as.numeric(names(nrep[which(names(nrep) !=0)])) ############################## ## Expected Mean Square method ############################## mix1 <- lm(yield~ setf + setf:repf + femalef:malef:setf + malef:setf + femalef:setf, data=DT) MS <- anova(mix1); MS ms1 <- MS["setf:malef","Mean Sq"] ms2 <- MS["setf:femalef","Mean Sq"] ms3 <- MS["setf:femalef:malef","Mean Sq"] mse <- MS["Residuals","Mean Sq"] nrep=length(unique(DT$rep)) nfem=length(unique(DT$female)) nmal=length(unique(DT$male)) Vfm <- (ms3-mse)/nrep; Vf <- (ms2-ms3)/(nrep*nmale); Vm <- (ms1-ms3)/(nrep*nfemale); Va=4*Vm; # assuming no inbreeding (4/(1+F)) Va=4*Vf; # assuming no inbreeding (4/(1+F)) Vd=4*(Vfm); # assuming no inbreeding(4/(1+F)^2) Vg=c(Va,Vd); names(Vg) <- c("Va","Vd"); Vg ############################## ## REML method ############################## mix2 <- mmes(yield~ setf + setf:repf , random=~femalef:malef:setf + malef:setf + femalef:setf, nIters=10, data=DT, verbose = FALSE) vc <- summary(mix2)$varcomp; vc Vfm <- vc[1,"VarComp"] Vm <- vc[2,"VarComp"] Vf <- vc[3,"VarComp"] Va=4*Vm; # assuming no inbreeding (4/(1+F)) Va=4*Vf; # assuming no inbreeding (4/(1+F)) Vd=4*(Vfm); # assuming no inbreeding(4/(1+F)^2) Vg=c(Va,Vd); names(Vg) <- c("Va","Vd"); Vg ## ----------------------------------------------------------------------------- data(DT_cpdata) DT <- DT_cpdata GT <- GT_cpdata[,1:200] MP <- MP_cpdata #### create the variance-covariance matrix A <- A.mat(GT) # additive relationship matrix n <- nrow(DT) # to be used for degrees of freedom k <- 1 # to be used for degrees of freedom (number of levels in fixed effects) ## ----------------------------------------------------------------------------- ########################### #### Regular GWAS/EMMAX approach ########################### # mix2 <- GWAS(color~1, # random=~vsm(ism(id), Gu=A) + Rowf + Colf, # rcov=~units, M=GT, gTerm = "u:id", # verbose = FALSE, nIters=10, # data=DT) ## ----------------------------------------------------------------------------- ########################### #### GWAS by RRBLUP approach ########################### # Z <- GT[as.character(DT$id),] # mixRRBLUP <- mmes(color~1, # random=~vsm(ism(Z)) + Rowf + Colf, # rcov=~units, nIters=10, # verbose = FALSE, # data=DT) # # a <- mixRRBLUP$uList$`vsm(ism(Z`# marker effects # start=mixRRBLUP$partitions[[1]][1] # end=mixRRBLUP$partitions[[1]][2] # se.a <- sqrt( diag(kronecker(diag(ncol(Z)),mixRRBLUP$theta[[1]]) - mixRRBLUP$Ci[start:end,start:end] ) ) # SE of marker effects # t.stat <- a/se.a # t-statistic # pvalRRBLUP <- dt(t.stat[,1],df=n-k-1) # -log10(pval) ## ----------------------------------------------------------------------------- # ########################### # #### GWAS by GBLUP approach # ########################### # M<- GT # MMT <-tcrossprod(M) ## MM' = additive relationship matrix # MMTinv<-solve(MMT + diag(1e-6, ncol(MMT), ncol(MMT))) ## inverse of MM' # MTMMTinv<-t(M)%*%MMTinv # M' %*% (M'M)- # mixGBLUP <- mmes(color~1, # random=~vsm(ism(id), Gu=MMT) + Rowf + Colf, # rcov=~units, nIters=10, # verbose = FALSE, # data=DT) # a.from.g <-MTMMTinv%*%matrix(mixGBLUP$uList$`vsm(ism(id), Gu = MMT`,ncol=1) # start=mixGBLUP$partitions[[1]][1] # end=mixGBLUP$partitions[[1]][2] # var.g <- kronecker(MMT,mixGBLUP$theta[[1]]) - mixGBLUP$Ci[start:end,start:end] # var.a.from.g <- t(M)%*%MMTinv%*% (var.g) %*% t(MMTinv)%*%M # se.a.from.g <- sqrt(diag(var.a.from.g)) # t.stat.from.g <- a.from.g/se.a.from.g # t-statistic # pvalGBLUP <- dt(t.stat.from.g,df=n-k-1) # -log10(pval) ## ----------------------------------------------------------------------------- ########################### #### Compare results ########################### # plot(mix2$scores[,1], main="GWAS") # plot(-log(pvalRRBLUP), main="GWAS by RRBLUP/SNP-BLUP") # plot(-log(pvalGBLUP), main="GWAS by GBLUP")