...
Code Block | ||
---|---|---|
| ||
#### PRSice2 #### ##Some quality control steps before using summary statistics file in PRSice2 ## *** make sure to review any documents that come with the summary statistics to understand how the summary statistics were derived and what the column names stand for *** # In R: ss=read.table("BMI_1kgph3_chr16_snps_summarystat_2024.txt",header=T) nrow(ss) #[1] 68385 head(ss) summary(ss$INFO) #imputation quality summary(ss$N) #N for each SNP in source GWAS summary(ss$Freq1.Hapmap) #minor allele frequency for each SNP nrow(ss[ss$INFO<0.8,]) #[1] 199 nrow(ss[ss$N<0.5*max(ss$N),]) #[1] 2629 nrow(ss[ss$Freq1.Hapmap<=0.01 | is.na(ss$Freq1.Hapmap),]) #[1] 1590 #Some quality control for imputation quality, N, and minor allele frequency: ss1=ss[ss$INFO>0.8 & ss$N>0.5*max(ss$N) & ss$Freq1.Hapmap>0.01 & !is.na(ss$Freq1.Hapmap),] nrow(ss1) #[1] 64933 colnames(ss1)<-c("SNP","A1","A2","Freq1.Hapmap","BETA","SE","P","N","INFO") head(ss1) write.table(ss1,"BMI_1kgph3_chr16_snps_summarystat_2024_short.txt", col.names=T, row.names=F, quote=F, sep='\t') ##Now we are ready to try PRSice2! ##Need to set up the following modules in R for PRSice2 to run properly: module load lang/R/3.5.1-Python-3.8.5-Anaconda3-2020.11 R --vanilla library(ggplot2) library(optparse) #may need to install this package manually library(methods) library(tools) library(data.table) library(grDevices) library(RColorBrewer) q() ## Linux -- Using our tutorial files: Rscript PRSice.R --prsice ./PRSice_linux --base BMI_1kgph3_chr16_snps_summarystat_2024_short.txt --target 1kgph3_chr16 --pheno 1kgph3_dummybmi20200804.txt --pheno-col dummybmi --thread 1 --stat BETA --pvalue P --binary-target F --out dummybmi_nocov2024 ## Windows (in R) -- Using our tutorial files: system("Rscript.exe PRSice.R --prsice ./PRSice_win64.exe --base BMI_1kgph3_chr16_snps_summarystat_2024_short.txt --target 1kgph3_chr16 --pheno 1kgph3_dummybmi20200804.txt --pheno-col dummybmi --thread 1 --stat BETA --pvalue P --binary-target F --out dummybmi_nocov2024") ## Linux -- Adding a covariate: Rscript PRSice.R --prsice ./PRSice_linux --base BMI_1kgph3_chr16_snps_summarystat_2024_short.txt --target 1kgph3_chr16 --pheno 1kgph3_dummybmi20200804.txt --pheno-col dummybmi --cov 1kgph3_dummybmi20200804.txt --cov-col sex --thread 1 --stat BETA --pvalue P --binary-target F --out dummybmi_sexcov2024 ## Windows (in R) -- Adding a covariate: system("Rscript.exe PRSice.R --prsice ./PRSice_win64.exe --base BMI_1kgph3_chr16_snps_summarystat_2024_short.txt --target 1kgph3_chr16 --pheno 1kgph3_dummybmi20200804.txt --pheno-col dummybmi --cov 1kgph3_dummybmi20200804.txt --cov-col sex --thread 1 --stat BETA --pvalue P --binary-target F --out dummybmi_sexcov2024") ## Check the log files to see what went on behind the scene. ## Now we check the output file with PRS (in R): prs=read.table("dummybmi_sexcov2024.best",header=T) pheno=read.table("1kgph3_dummybmi20200804.txt",header=T) nrow(prs) #[1] 476 nrow(pheno) #[1] 476 head(prs) head(pheno) data=merge(pheno,prs,by.x=c("V1","V2"),by.y=c("FID","IID")) nrow(data) #[1] 476 head(data) lmprs<-lm(data=data, dummybmi~PRS + sex) summary(lmprs) #notice the magnitude of the effect estimate data$stdprs<-scale(data$PRS) #standardizing the PRS to a mean of 0 and standard deviation of 1 lmprs<-lm(data=data, dummybmi~stdprs + sex) summary(lmprs) #notice how the magnitude of the effect estimate has changed and everything else stayed the same ## Scatterplot of BMI vs PRS (shown for p-value threshold of 0.5 from Example (a)) pdf("Scatterplot_dummybmi_vs_PRSice2_best_stdprs.pdf") plot(data$stdprs,data$dummybmi) abline(lm(data$dummybmi ~ data$stdprs), col = "red", lty = 1, lwd = 1) dev.off() ####################################################################### ##Additional Notes/Examples: ##This is sample script (***Need to change the actual file names***): #Rscript workdir/PRSice.R --dir workdir --prsice ./workdir/PRSice_linux --base workdir/source_gwas_sumstat_info_9_prsicein.txt --target indir/target_gwas_cleaned_plink --bar-levels 5e-8,0.00001,0.00005,0.0001,0.0005,0.001,0.005,0.01,0.05,0.1,0.2,0.3,0.4,0.5,1 --seed 1234 --perm 10000 --fastscore --all-score --no-regress T --out outdir/target_gwas_cleaned_source_gwas_prs ## PRSice2 within R on Windows (from tutorial): system("Rscript.exe PRSice.R --prsice ./PRSice_win64.exe --base TOY_BASE_GWAS.assoc --target TOY_TARGET_DATA --thread 1 --stat OR --binary-target T") |
...