#This file contains functions to assess the behavior of #logistic regression models for case-control GWAS when a covariate is available. #The file 'log_regression_covariate_examples.R' explains how to use these functions. #For theoretical details, see #"Including known covariates can reduce power to detect genetic effects in case-control studies." #By Matti Pirinen, Peter Donnelly and Chris Spencer #Nat Genet (2012) #Written by #Matti Pirinen, February 2012 #matti.pirinen@iki.fi # ## ### ######## BINARY COVARIATE ######### ### ## # model.1.loglkhood<-function(par,p,q,phi){ #negative log-likelihood function for logistic regression with #base-line parameter (par[1]) and #additive effect for genotype (par[2]) #loglikelihood is evaluated at the expected counts #in cases defined by 'p' and in controls defined by 'q' #and with the case proportion 'phi' #INPUT #par vector of length 2 giving the argument for the log-likelihood #p genotype frequencies in cases (vector of length 3) #q genotype frequencies in controls (vector of length 3) #phi proportion of cases in the sample #OUTPUT #- log-likelihood at the given argument N=100000 #assuming sample size N=100000 as only the shape matters here, KEEP SAME AS IN gradient function below stopifnot(abs(1-sum(p))<1e-9 & abs(1-sum(q))<1e-9 ) stopifnot(phi>0 & phi <1) stopifnot(length(par)==2) A=par[1]+c(0,1,2)*par[2] #log-odds for the three genotypes -N*(phi*sum(p*log(1/(1+exp(-A))))+(1-phi)*sum(q*log(1/(1+exp(A))))) } model.1.gradient<-function(par,p,q,phi){ #gradient of the negative loglkhood function #INPUT see loglkhood #OUTPUT two dimensional gradient vector N=100000 #KEEP SAME AS IN the loglkhood function above stopifnot(abs(1-sum(p))<1e-9 & abs(1-sum(q))<1e-9 ) stopifnot(phi>0 & phi <1) stopifnot(length(par)==2) A=par[1]+c(0,1,2)*par[2] -N*c(sum((phi*p-(1-phi)*q*exp(A))/(1+exp(A))), sum(c(0,1,2)*(phi*p-(1-phi)*q*exp(A))/(1+exp(A)))) } model.2.loglkhood<-function(par,p,q,phi){ #negative log-likelihood function for logistic regression with #base-line parameter (par[1]) and #additive effect for genotype (par[2]) and #additive effect for binary covariate (par[3]) #loglikelihood is evaluated at the expected counts #in cases defined by 2x3 matrix 'p' and in controls defined by 2x3 matrix 'q' #and with the case proportion 'phi' #INPUT #par vector of length 3 giving the argument for the log-likelihood #p genotype x covariate frequencies in cases (matrix of dimension 2x3) #q genotype x covariate frequencies in controls (matrix of dimension 2x3) #phi proportion of cases in the sample #OUTPUT #- log-likelihood at the given argument N=100000 #assuming sample size N=100000 as only the shape matters here, KEEP SAME AS IN gradient function below stopifnot(sum(dim(p)==c(2,3))==2) stopifnot(sum(dim(q)==c(2,3))==2) stopifnot(abs(1-sum(p))<1e-9 & abs(1-sum(q))<1e-9 ) stopifnot(phi>0 & phi <1) A=par[1]+matrix(c(0,par[2],2*par[2],par[3],par[2]+par[3],2*par[2]+par[3]),byrow=TRUE,nrow=2) -N*(phi*sum(p*log(1/(1+exp(-A))))+(1-phi)*sum(q*log(1/(1+exp(A))))) } model.2.gradient<-function(par,p,q,phi){ #gradient of the loglkhood function #INPUT see loglkhood #OUTPUT two dimensional gradient vector N=100000 #keep same as in loglkhood function above stopifnot(sum(dim(p)==c(2,3))==2) stopifnot(sum(dim(q)==c(2,3))==2) stopifnot(abs(1-sum(p))<1e-9 & abs(1-sum(q))<1e-9 ) stopifnot(phi>0 & phi <1) A=par[1]+matrix(c(0,par[2],2*par[2],par[3],par[2]+par[3],2*par[2]+par[3]),byrow=TRUE,nrow=2) G=matrix(c(0,1,2,0,1,2),byrow=TRUE,nrow=2) X=matrix(c(0,0,0,1,1,1),byrow=TRUE,nrow=2) -N*c(sum((phi*p-(1-phi)*q*exp(A))/(1+exp(A))), sum(G*(phi*p-(1-phi)*q*exp(A))/(1+exp(A))), sum(X*(phi*p-(1-phi)*q*exp(A))/(1+exp(A)))) } binary.covariate<-function(K,freq.G,or.G,freq.X,or.X,ncases,ncontrols,population.controls=FALSE){ #Matti Pirinen February 2012 #Finds population parameters for a given set of disease parameters. #INPUT #K, the (target) prevalence of the disease #freq.G, frequency of risk allele in general population #or.G, odds-ratio for each copy of the risk allele #freq.X, frequency of the risk variant of binary exposure #or.X, odds-ratio of X #ncases, number of cases in the case-control sample (needed for variance computation) #ncontrols, number of controls in the case-control sample (needed for variance computation) #population.controls, boolean, if TRUE then controls have general population frequencies, otherwise # controls have proper control frequencies #OUTPUT #K, the prevalence of the disease (will be close to target prevalence given as input) #ncases, number of cases for which the standard error was computed #ncontrols, number of controls for which the standard error was computed #pop.a, logistic regression base-line parameter in the population (NOT in the case-control sample!) #case.freq 2x3 matrix of case frequencies, element i,j is for X=i-1, G=j-1 #control.freq 2x3 matrix of control frequencies, element i,j is for X=i-1, G=j-1 #population.controls, if TRUE, then control.freq are population frequencies #marg.or.G marginal odds-ratio for each copy of risk allele (i.e. effect of G omitting X from the model) #joint.or.G odds-ratio for each copy of risk allele when adjusted for covariate X #marg.or.X marginal odds-ratio for copy of risk exposure X=1 (i.e. effect of X omitting G from the model) #joint.or.X odds-ratio for risk exposure X=1 when adjusted for genotype G #marg.se.G standard error of marg.or.G #joint.se.G standard error of joint.or.G #NCP.GX.G ratio of non-centrality parameters for X-adjusted G to marginal G (called SSM in the paper) stopifnot(K>0 & K<1) stopifnot(freq.G>0 & freq.G<1) stopifnot(freq.X>0 & freq.X<1) stopifnot(or.G>0 & or.X>0) stopifnot(ncases>0 & ncontrols>0) stopifnot(is.logical(population.controls)) fx=freq.X fg=freq.G #there are six classes of individuals (X=0,1 x G=0,1,2) #the frequencies of classes (0,0),(0,1),(0,2),(1,0),(1,1),(1,2) in the general population are pop.freq=c((1-fx)*(1-fg)^2,(1-fx)*2*fg*(1-fg),(1-fx)*fg^2,fx*(1-fg)^2,fx*2*fg*(1-fg),fx*fg^2) #the additive contribution to the log-odds of having the disease for these classes are a=c(0,log(or.G),2*log(or.G),log(or.X),log(or.X)+log(or.G),log(or.X)+2*log(or.G)) #choose base-line log-odds alpha in such a way that the population prevalence of the disease is approx. K alpha.found=0;iter=1; alpha=seq(-30,10,by=0.001) while(!alpha.found & iter<100){ K.table=rep(NA,length(alpha)) for(i in 1:length(alpha)){K.table[i]=sum(1/(1+exp(-(alpha[i]+a)))*pop.freq)} #this is prevalence for alpha[i] i=which.min(abs(K-K.table)) if(abs(K-K.table[i])<0.01*K){alpha.found=1;K=K.table[i];alpha=alpha[i];} #you can choose relative error tolerance, here 0.01 if(!alpha.found){ if(i==1) alpha=seq(-30-iter*40,10-iter*40,by=0.001) if(i==length(alpha)) alpha=seq(-30+iter*40,10+iter*40,by=0.001) if(i>1 && i<length(alpha)) alpha=seq(alpha[i-1],alpha[i+1],length.out=10000) } iter=iter+1; } stopifnot(alpha.found==1) #Note K is not anymore exactly the K given as parameter to this function, but close to it #print(paste("Found a=",alpha," corresponding to K=",K,sep="")) p=matrix(1/(1+exp(-(alpha+a)))*pop.freq/K,ncol=3,byrow=TRUE) #case frequencies as 2x3 matrix q=matrix(1/(1+exp(alpha+a))*pop.freq/(1-K),ncol=3,byrow=TRUE) #control frequencies as 2x3 matrix pop.freq=matrix(pop.freq,ncol=3,byrow=TRUE) #change pop.freq to 2x3 matrix #check the frequencies stopifnot(sum(abs(pop.freq-K*p-(1-K)*q))<1e-9) #general population is a mixture of cases and controls with prevalence K stopifnot(abs(sum(p)-1)<1e-9 & abs(sum(q)-1)<1e-9) #p and q sum to 1 #each copy of risk allele increases odds by or.G: stopifnot(sum(abs(or.G-c(p[1,2]/p[1,1]*q[1,1]/q[1,2],p[1,3]/p[1,2]*q[1,2]/q[1,3],p[2,2]/p[2,1]*q[2,1]/q[2,2],p[2,3]/p[2,2]*q[2,2]/q[2,3])))<1e-9) #covariate X increases odds by or.X: stopifnot(sum(abs(or.X-c(p[2,1]/p[1,1]*q[1,1]/q[2,1],p[2,2]/p[1,2]*q[1,2]/q[2,2],p[2,3]/p[1,3]*q[1,3]/q[2,3])))<1e-9) N=ncases+ncontrols #phi = proportion of cases phi=ncases/N if(population.controls){q=pop.freq;}#use population freqs for controls instead of proper control freqs #joint odds-ratio for G and X as maximum likelihood estimate from the corresponding logistic regression model #these will be the same as given as parameters IF proper controls are used i.e. population.controls=FALSE optim(c(alpha,log(or.G),log(or.X)),fn=model.2.loglkhood,gr=model.2.gradient,p,q,phi,method='BFGS')->optim.res stopifnot(optim.res$convergence==0) joint.or.G=exp(optim.res$par[2]) joint.or.X=exp(optim.res$par[3]) #marginal odds-ratio for X marg.or.X=sum(p[2,])/sum(q[2,])*sum(q[1,])/sum(p[1,]) #marginal odds-ratio for G as maximum likelihood estimate from the corresponding logistic regression model: optim(c(alpha,log(or.G)),fn=model.1.loglkhood,gr=model.1.gradient,colSums(p),colSums(q),phi,method='BFGS')->optim.res stopifnot(optim.res$convergence==0) marg.or.G=exp(optim.res$par[2]) #phi.0 = proportion of cases given X=0 phi.0=sum(p[1,])*phi/(sum(q[1,])*(1-phi)+sum(p[1,])*phi) #phi.1 = proportion of cases given X=1 phi.1=sum(p[2,])*phi/(sum(q[2,])*(1-phi)+sum(p[2,])*phi) #assuming small genetic effects (see Genet Epidemiol 35:278-290) #asymptotic variance is 1/(N*var(genotype)*phi*(1-phi)) when omitting X t=phi*p+(1-phi)*q #frequencies in the case-control sample var.geno=sum(t[,2])+4*sum(t[,3])-(sum(t[,2])+2*sum(t[,3]))^2 var.G=1/(N*var.geno*phi*(1-phi)) #and 0.5*harmonic mean of the subgroup variances when stratifying by X N.0=ncases*sum(p[1,])+ncontrols*sum(q[1,]) N.1=N-N.0 var.geno.0=(t[1,2]+4*t[1,3])/sum(t[1,])-((t[1,2]+2*t[1,3])/sum(t[1,]))^2 var.geno.1=(t[2,2]+4*t[2,3])/sum(t[2,])-((t[2,2]+2*t[2,3])/sum(t[2,]))^2 var.GX=1/(N.0*phi.0*(1-phi.0)*var.geno.0+N.1*phi.1*(1-phi.1)*var.geno.1) NCP.ratio=log(joint.or.G)^2/var.GX/log(marg.or.G)^2*var.G return(list(K=K,ncases=ncases,ncontrols=ncontrols,pop.a=alpha,case.freq=p,control.freq=q,population.controls=population.controls, marg.or.G=marg.or.G,joint.or.G=joint.or.G,marg.or.X=marg.or.X,joint.or.X=joint.or.X,marg.se.G=sqrt(var.G),joint.se.G=sqrt(var.GX),NCP.GX.G=NCP.ratio)) } # ## ### ######### CONTINUOUS COVARIATE ### ## # K.integrand<-function(x,a,b,c,fg){ #evaluates the following function at point x #sum_g=0,1,2 {exp(a+b*g+c*x)/(1+exp(a+b*g+c*x))}*pi(g)*dnorm(x), #where pi=((1-fg)^2,2*fg*(1-fg),fg^2) is the vector of genotype frequencies in H-W equilibrium #INPUT #x, point at which function is evaluated #a,b,c parameters of the logistic regression model #fg, frequency of allele 1 stopifnot(fg>0 & fg<1) return(((1-fg)^2/(1+exp(-a-c*x))+2*fg*(1-fg)/(1+exp(-a-b-c*x))+fg^2/(1+exp(-a-2*b-c*x)))*dnorm(x)) } G.integrand<-function(x,a,b,c,g,case){ #evaluates #dnorm(x)*exp(a+b*g+c*x)/(1+exp(a+b*g+c*x)), if case==1 #dnorm(x)/(1+exp(a+b*g+c*x)), if case==0 stopifnot(case==0 || case==1) return( ( 1-case+(-1)^(1-case)/(1+exp(-a-b*g-c*x)) )*dnorm(x) ) } Gx.integrand<-function(x,a,b,c,g,case){ #as G.integrand but mutiplied by x return( ( 1-case+(-1)^(1-case)/(1+exp(-a-b*g-c*x)) )*dnorm(x)*x )} Gx2.integrand<-function(x,a,b,c,g,case){ #as G.integrand but mutiplied by x^2 return( ( 1-case+(-1)^(1-case)/(1+exp(-a-b*g-c*x)) )*dnorm(x)*x^2 )} gaussian.covariate<-function(K,freq.G,or.G,or.X,ncases,ncontrols,nsimulations=1000,population.controls=FALSE,x.lower=-6,x.upper=6,print.plots=FALSE){ #Matti Pirinen, February 2012 #INPUT # #K, the (target) prevalence of the disease #freq.G, frequency of risk allele in general population #or.G, odds-ratio for each copy of the risk allele #or.X, odds-ratio per each std deviation of the continuous covariate X #ncases, number of cases in the case-control sample #ncontrols, number of controls in the case-control sample #nsimulations, numer of data sets simulated to get the parameter estimates #population.controls, boolean, if TRUE then controls have general population frequencies, otherwise # controls have proper control frequencies #x.lower, the lower limit of the integration interval for X, on log-odds scale #x.upper, the upper limit of the integration interval for X, on log-odds scale #print.plots, boolean, if true prints the densities of X in each of the six phenotype x genotype group, # and compares cdfs of those distributions to gaussians with the same mean and std # # #OUTPUT #K, the prevalence of the disease (will be close to target prevalence given as input) #ncases, number of cases for which the standard error was computed #ncontrols, number of controls for which the standard error was computed #sample.a, the value of the base-line parameter 'a' in the case-control sample #case.G.freq, the genotype frequencies in cases (0,1,2) #case.X.means, the means of X in cases for each genotype group (0,1,2) #case.X.ses, the standard deviations of X in cases for each genotype group (0,1,2) #control.G.freq, the genotype frequencies in controls (0,1,2) #control.X.means, the means of X in controls for each genotype group (0,1,2) #contrl.X.ses, the standard deviations of X in controls for each genotype group (0,1,2) #population.controls, if TRUE, then controls are from the general population #marg.or.X marginal odds-ratio of X per each stdev (i.e. the effect of X omitting G from the model) #joint.or.X odds-ratio of X per each stdev when adjusted for genotype G #marg.or.G marginal odds-ratio for each copy of risk allele (i.e. effect of G omitting X from the model) #joint.or.G odds-ratio for each copy of risk allele when adjusted for covariate X #marg.se.G standard error of marg.or.G #joint.se.G standard error of joint.or.G #NCP.GX.G ratio of the non-centrality parameters for X-adjusted G to marginal G (called SSM in the paper) # stopifnot(K>0 & K<1) stopifnot(freq.G>0 & freq.G<1) stopifnot(or.G>0 & or.X>0) stopifnot(ncases>0 & ncontrols>0) stopifnot(nsimulations>0) stopifnot(x.lower<x.upper) stopifnot(is.logical(population.controls)) stopifnot(is.logical(print.plots)) fg=freq.G #choose base-line log-odds alpha in such a way that population prevalence of the disease is approx. K alpha.found=0;iter=1; alpha=seq(-30,10,by=0.001) while(!alpha.found & iter<10){ K.table=rep(NA,length(alpha)) for(i in 1:length(alpha)){ K.table[i]=integrate(K.integrand,lower=x.lower,upper=x.upper,a=alpha[i],b=log(or.G),c=log(or.X),fg=fg)$value} #this is prevalence for alpha[i] i=which.min(abs(K-K.table)) if(abs(K-K.table[i])<0.01*K){alpha.found=1;K=K.table[i];alpha=alpha[i];} #you can choose relative error tolerance, here 0.01 if(!alpha.found){ if(i==1) alpha=seq(-30-iter*40,10-iter*40,by=0.001) if(i==length(alpha)) alpha=seq(-30+iter*40,10+iter*40,by=0.001) if(i>1 && i<length(alpha)) alpha=seq(alpha[i-1],alpha[i+1],length.out=10000) } iter=iter+1; } stopifnot(alpha.found==1) print(paste("Found a=",alpha," corresponding to K=",K,sep="")) g.freqs=c((1-fg)^2,2*fg*(1-fg),fg^2) p=rep(0,3) #case genotype freqs q=rep(0,3) #control genotype freqs for(g in 0:2){ p[g+1]=g.freqs[g+1]/K*integrate(G.integrand,lower=x.lower,upper=x.upper,a=alpha,b=log(or.G),c=log(or.X),g=g,case=1)$value; q[g+1]=g.freqs[g+1]/(1-K)*integrate(G.integrand,lower=x.lower,upper=x.upper,a=alpha,b=log(or.G),c=log(or.X),g=g,case=0)$value; } #print(paste(sum(p),sum(q))) stopifnot(abs(sum(p)-1)<1e-4 & abs(sum(q)-1)<1e-4) #p and q sum to 1 stopifnot(sum(abs(g.freqs-K*p-(1-K)*q))<1e-4) #general population is mixture of cases and controls with prevalence K x=seq(x.lower,x.upper,length.out=10000) means=matrix(0,ncol=3,nrow=2) ses=means if(print.plots) par(mfcol=c(2,3)) for(g in 0:2){#calculates means and sds of the X distributions in all six classes (case/control(0,1) x genotype(0,1,2)) #distribution of X in cases mu=integrate(Gx.integrand,lower=x.lower,upper=x.upper,a=alpha,b=log(or.G),c=log(or.X),g=g,case=1)$value*g.freqs[g+1]/K/p[g+1] s=sqrt(integrate(Gx2.integrand,lower=x.lower,upper=x.upper,a=alpha,b=log(or.G),c=log(or.X),g=g,case=1)$value*g.freqs[g+1]/K/p[g+1]-mu^2) means[1,g+1]=mu ses[1,g+1]=s if(print.plots) plot(x,G.integrand(x,alpha,log(or.G),log(or.X),g,1)*g.freqs[g+1]/K/p[g+1],t="l",ylab="density",main=paste("cases G=",g,sep=""),xlab="",sub=paste("mean=",signif(mu,4)," SE=",signif(s,4),sep="")) #distribution of X in controls mu=integrate(Gx.integrand,lower=x.lower,upper=x.upper,a=alpha,b=log(or.G),c=log(or.X),g=g,case=0)$value*g.freqs[g+1]/(1-K)/q[g+1] s=sqrt(integrate(Gx2.integrand,lower=x.lower,upper=x.upper,a=alpha,b=log(or.G),c=log(or.X),g=g,case=0)$value*g.freqs[g+1]/(1-K)/q[g+1]-mu^2) means[2,g+1]=mu ses[2,g+1]=s if(print.plots) plot(x,G.integrand(x,alpha,log(or.G),log(or.X),g,0)*g.freqs[g+1]/(1-K)/q[g+1],t="l",ylab="density",main=paste("controls G=",g,sep=""),sub=paste("mean=",signif(mu,4)," SE=",signif(s,4),sep="")) } #plots cumulative distribution functions of X in each genotype class if(print.plots){ cdf.1=rep(0,length(x)) cdf.0=cdf.1 case.distr=list() control.distr=list() for(g in 0:2){ for(i in 1:length(x)){ cdf.1[i]=g.freqs[g+1]/K/p[g+1]*integrate(G.integrand,lower=x.lower,upper=x[i],a=alpha,b=log(or.G),c=log(or.X),g=g,case=1)$value cdf.0[i]=g.freqs[g+1]/(1-K)/q[g+1]*integrate(G.integrand,lower=x.lower,upper=x[i],a=alpha,b=log(or.G),c=log(or.X),g=g,case=0)$value } case.distr[[g+1]]=cdf.1 control.distr[[g+1]]=cdf.0 } x11() par(mfcol=c(2,3)) for(g in 0:2){ plot(pnorm(x,mean=means[1,g+1],sd=ses[1,g+1]),case.distr[[g+1]],pch="+",xlab="p norm",ylab="p empirical",main=paste("cases G=",g,sep="")) abline(0,1) plot(pnorm(x,mean=means[2,g+1],sd=ses[2,g+1]),control.distr[[g+1]],pch="+",xlab="p norm",ylab="p empirical",main=paste("controls G=",g,sep="")) abline(0,1) } } if(population.controls){q=g.freqs;means[2,]=0.0;ses[2,]=1.0;} #simulate data and analyse by glm y=c(rep(1,ncases),rep(0,ncontrols)) g=rep(NA,ncases+ncontrols) x=g res=matrix(0,ncol=7,nrow=nsimulations) for(iter in 1:nsimulations){ g[1:ncases]=sample(c(0,1,2),prob=p,replace=TRUE,size=ncases) for(i in 0:2){ x[y==1 & g==i]=rnorm(sum(y==1 & g==i),mean=means[1,i+1],sd=ses[1,i+1])} g[(ncases+1):(ncases+ncontrols)]=sample(c(0,1,2),prob=q,replace=TRUE,size=ncontrols) for(i in 0:2){ x[y==0 & g==i]=rnorm(sum(y==0 & g==i),mean=means[2,i+1],sd=ses[2,i+1])} summary(glm(y~g,family="binomial"))$coefficients[2,c(1,2)]->res[iter,1:2] glm(y~g+x,family="binomial")->glm.fit summary(glm.fit)$coefficients[3,1]->res[iter,3] summary(glm.fit)$coefficients[2,c(1,2)]->res[iter,4:5] glm(y~x,family="binomial")->glm.fit summary(glm.fit)$coefficients[2,1]->res[iter,6] summary(glm.fit)$coefficients[1,1]->res[iter,7] #a parameter in case-control sample } return(list(K=K,ncases=ncases,ncontrols=ncontrols,sample.a=median(res[,7]),case.G.freq=p,case.X.means=means[1,],case.X.ses=ses[1,],control.G.freq=q,control.X.means=means[2,],control.X.ses=ses[2,],population.controls=population.controls,marg.or.X=exp(mean(res[,6])),joint.or.X=exp(mean(res[,3])),marg.or.G=exp(mean(res[,1])),joint.or.G=exp(mean(res[,4])),marg.se.G=median(res[,2]),joint.se.G=median(res[,5]),NCP.GX.G=(mean(res[,4])/median(res[,5])*median(res[,2])/mean(res[,1]))^2)) } phi.1.minus.phi.integrand<-function(x,a,c,mean,sd){ #evaluates phi(x)*(1-phi(x))*theta(x), when theta is dnorm, see subsection "General case" in SoM return( exp(a+c*x)/(1+exp(a+c*x))^2 *dnorm(x,mean=mean,sd=sd) ) } analytic.var.ratio.normal<-function(a,c,phi,case.G.freq,case.X.means,case.X.ses,control.G.freq,control.X.means,control.X.ses,x.lower=-6,x.upper=6){ #evaluates formula (10) in Online Methods for a single continuous covariate which has a normal distribution #in all six groups of individuals stopifnot(length(case.G.freq)==3) stopifnot(length(case.X.means)==3) stopifnot(length(case.X.ses)==3) stopifnot(abs(sum(case.G.freq)-1)<1e-4) stopifnot(length(control.G.freq)==3) stopifnot(length(control.X.means)==3) stopifnot(length(control.X.ses)==3) stopifnot(abs(sum(control.G.freq)-1)<1e-4) stopifnot(x.lower<x.upper) stopifnot(phi>0 & phi<1) v=0.0 for(g in 1:3){ v=v+(1-phi)*control.G.freq[g]*integrate(phi.1.minus.phi.integrand,lower=x.lower,upper=x.upper,a=a,c=c,mean=control.X.means[g],sd=control.X.ses[g])$value } for(g in 1:3){ v=v+phi*case.G.freq[g]*integrate(phi.1.minus.phi.integrand,lower=x.lower,upper=x.upper,a=a,c=c,mean=case.X.means[g],sd=case.X.ses[g])$value } return(v/phi/(1-phi)) }