Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

range_list.txt

PRS_KCNI_2021_new.pptx

PRS_supplementary.pptx

#Additional test files for running PRSice (Section 8):

BMI_1kgph3_chr16_snps_summarystat_2024.txt

1kgph3_dummybmi20200804.txt

NCBI37.3.gene.loc.gtf


3.1- Base Data

We will use as the base data part of GWAS Anthropometric 2015 BMI summary statistics ( https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4382211/), made available by the GIANT consortium and were extracted from their online portal

...

Code Block
prs1phen=read.csv("test_clump_1_prs_5.csv",header=T)
head(prs1phen)
 
lmprs<-lm(data=prs1phen, dummybmi~SCORE + sex) #adding sex as a covariate here; consider adding population structure PCs as covariates
summary(lmprs)


6.2- PLOTTING

...

Code Block
titlePRSice
#### PRSice2 (adapted from https://choishingwan.github.io/PRSice/) ####
##Some quality control steps before using summary statistics file in PRSice2
## *** make sure to review any documents that come with the summary statistics to understand how the summary statistics were derived and what the column names stand for ***
# In R:
ss=read.table("BMI_1kgph3_chr16_snps_summarystat_2024.txt",header=T)
nrow(ss) #[1] 68385

head(ss)

summary(ss$INFO) #imputation quality

summary(ss$N) #N for each SNP in source GWAS

summary(ss$Freq1.Hapmap) #minor allele frequency for each SNP

nrow(ss[ss$INFO<0.8,]) #[1] 199

nrow(ss[ss$N<0.5*max(ss$N),]) #[1] 2629

nrow(ss[ss$Freq1.Hapmap<=0.01 | is.na(ss$Freq1.Hapmap),]) #[1] 1590

#Some quality control for imputation quality, N, and minor allele frequency:
ss1=ss[ss$INFO>0.8 & ss$N>0.5*max(ss$N) & ss$Freq1.Hapmap>0.01 & !is.na(ss$Freq1.Hapmap),]

nrow(ss1) #[1] 64933

colnames(ss1)<-c("SNP","A1","A2","Freq1.Hapmap","BETA","SE","P","N","INFO")

head(ss1)

write.table(ss1,"BMI_1kgph3_chr16_snps_summarystat_2024_short.txt", col.names=T, row.names=F, quote=F, sep='\t')

##Now we are ready to try PRSice2!
##Need to set up the following modules in R for PRSice2 to run properly:
module load lang/R/3.5.1-Python-3.8.5-Anaconda3-2020.11

R --vanilla
library(ggplot2)
library(optparse#if R packages not already installed:
install.packages("ggplot2")
install.packages("optparse") #may need to install this package manually
library(methods)
library(tools)
library(data.table)
library(grDevices)
library(RColorBrewer)
q()

## Linux -- Using our tutorial files: 
Rscript PRSice.R --prsice ./PRSice_linux --base BMI_1kgph3_chr16_snps_summarystat_2024_short.txt --target 1kgph3_chr16 --pheno 1kgph3_dummybmi20200804.txt --pheno-col dummybmi --thread 1 --stat BETA --pvalue P --binary-target F --out dummybmi_nocov2024

## Windows (in R) -- Using our tutorial files:
system("Rscript.exe PRSice.R --prsice ./PRSice_win64.exe --base BMI_1kgph3_chr16_snps_summarystat_2024_short.txt --target 1kgph3_chr16 --pheno 1kgph3_dummybmi20200804.txt --pheno-col dummybmi --thread 1 --stat BETA --pvalue P --binary-target F --out dummybmi_nocov2024")

## Linux -- Adding a covariate:
Rscript PRSice.R --prsice ./PRSice_linux --base BMI_1kgph3_chr16_snps_summarystat_2024_short.txt --target 1kgph3_chr16 --pheno 1kgph3_dummybmi20200804.txt --pheno-col dummybmi --cov 1kgph3_dummybmi20200804.txt --cov-col sex --thread 1 --stat BETA --pvalue P --binary-target F --out dummybmi_sexcov2024

## Windows (in R) -- Adding a covariate:
system("Rscript.exe PRSice.R --prsice ./PRSice_win64.exe --base BMI_1kgph3_chr16_snps_summarystat_2024_short.txt --target 1kgph3_chr16 --pheno 1kgph3_dummybmi20200804.txt --pheno-col dummybmi --cov 1kgph3_dummybmi20200804.txt --cov-col sex --thread 1 --stat BETA --pvalue P --binary-target F --out dummybmi_sexcov2024")
 


##Additional Notes/Examples:
##This is sample script (***Need to change the actual file names***):
#Rscript workdir/PRSice.R --dir workdir --prsice ./workdir/PRSice_linux --base workdir/source_gwas_sumstat_info_9_prsicein.txt --target indir/target_gwas_cleaned_plink --bar-levels 5e-8,0.00001,0.00005,0.0001,0.0005,0. by downloading from https://cran.r-project.org/web/packages/optparse/index.html; install.packages(c("optparse"))
install.packages("methods")
install.packages("tools")
install.packages("data.table")
install.packages("grDevices")
install.packages("RColorBrewer")

#once R packages are installed:
library(ggplot2)
library(optparse)
library(methods)
library(tools)
library(data.table)
library(grDevices)
library(RColorBrewer)
#if using Linux, exit R (if using Windows: stay in R):
q() #to exit R

## Linux -- Using our tutorial files: 
#may need to make PRSice_linux executable
chmod u+x ./PRSice_linux
#
Rscript PRSice.R --prsice ./PRSice_linux --base BMI_1kgph3_chr16_snps_summarystat_2024_short.txt --target 1kgph3_chr16 --pheno 1kgph3_dummybmi20200804.txt --pheno-col dummybmi --thread 1 --stat BETA --pvalue P --binary-target F --out dummybmi_nocov2024

## Windows (in R) -- Using our tutorial files:
system("Rscript.exe PRSice.R --prsice ./PRSice_win64.exe --base BMI_1kgph3_chr16_snps_summarystat_2024_short.txt --target 1kgph3_chr16 --pheno 1kgph3_dummybmi20200804.txt --pheno-col dummybmi --thread 1 --stat BETA --pvalue P --binary-target F --out dummybmi_nocov2024")

## Linux -- Adding a covariate:
Rscript PRSice.R --prsice ./PRSice_linux --base BMI_1kgph3_chr16_snps_summarystat_2024_short.txt --target 1kgph3_chr16 --pheno 1kgph3_dummybmi20200804.txt --pheno-col dummybmi --cov 1kgph3_dummybmi20200804.txt --cov-col sex --thread 1 --stat BETA --pvalue P --binary-target F --out dummybmi_sexcov2024

## Windows (in R) -- Adding a covariate:
system("Rscript.exe PRSice.R --prsice ./PRSice_win64.exe --base BMI_1kgph3_chr16_snps_summarystat_2024_short.txt --target 1kgph3_chr16 --pheno 1kgph3_dummybmi20200804.txt --pheno-col dummybmi --cov 1kgph3_dummybmi20200804.txt --cov-col sex --thread 1 --stat BETA --pvalue P --binary-target F --out dummybmi_sexcov2024")

## Check the log files to see what went on behind the scene.
# How many SNPs went into the PRS calculations? #Hint: check the *.prsice file

## Now we check the output file with PRS (in R):
prs=read.table("dummybmi_sexcov2024.best",header=T)
pheno=read.table("1kgph3_dummybmi20200804.txt",header=T)

nrow(prs) #[1] 476

nrow(pheno) #[1] 476

head(prs)

head(pheno)

data=merge(pheno,prs,by.x=c("V1","V2"),by.y=c("FID","IID"))

nrow(data) #[1] 476 

head(data)

lmprs<-lm(data=data, dummybmi~PRS + sex)
summary(lmprs) #notice the magnitude of the effect estimate

mean(data$PRS)

sd(data$PRS)

data$stdprs<-scale(data$PRS) #standardizing the PRS to a mean of 0 and standard deviation of 1

mean(data$stdprs)

sd(data$stdprs)

lmprs<-lm(data=data, dummybmi~stdprs + sex)
summary(lmprs) #notice how the magnitude of the effect estimate has changed and everything else stayed the same

## Scatterplot of BMI vs best PRS
pdf("Scatterplot_dummybmi_vs_PRSice2_best_stdprs.pdf")
plot(data$stdprs,data$dummybmi)
abline(lm(data$dummybmi ~ data$stdprs), col = "red",
lty = 1, lwd = 1)
dev.off()
 

#######################################################################
## Additional Notes/Examples:
# add "--perm 10000": *.summary file with additional column showing empirical p value
# "--all-score": outputs PRS for all thresholds tested -- beware of large output file
# "--quantile 10": generates plot and file for PRS quantiles (deciles in this case)
# add "--gtf gene.gtf --msigdb set.txt --multi-plot 10" for PRSet --> outputs plot for top 10 gene sets (*.gtf file with genome boundary of each gene from MAGMA; set.txt with gene IDs in each gene set from MSigDB -- refer to https://www.gsea-msigdb.org/gsea/msigdb/index.jsp for gene set definitions
#
##This is sample script (***Need to change the actual file names***):
#Rscript workdir/PRSice.R --dir workdir --prsice ./workdir/PRSice_linux --base workdir/source_gwas_sumstat_info_9_prsicein.txt --target indir/target_gwas_cleaned_plink --bar-levels 5e-8,0.00001,0.00005,0.0001,0.0005,0.001,0.005,0.01,0.05,0.1,0.2,0.3,0.4,0.5,1 --seed 1234 --perm 10000 --fastscore --all-score --no-regress T --out outdir/target_gwas_cleaned_source_gwas_prs
#
#system("Rscript.exe PRSice.R --prsice ./PRSice_win64.exe --base BMI_1kgph3_chr16_snps_summarystat_2024_short.txt --target 1kgph3_chr16 --pheno 1kgph3_dummybmi20200804.txt --pheno-col dummybmi --cov 1kgph3_dummybmi20200804.txt --cov-col sex --bar-levels 0.001,0.005,0.01,0.05,0.1,0.2,0.3,0.4,0.5,1 --seedfastscore 1234 --permthread 100001 --fastscorestat BETA --all-scorepvalue P --nobinary-regresstarget TF --out outdir/target_gwas_cleaned_source_gwas_prs
## PRSice2 within R on Windows (from tutorial):
system("Rscript.exe PRSice.R --prsice ./PRSice_win64.exe --base TOY_BASE_GWAS.assoc --target TOY_TARGET_DATAdummybmi_sexcov2024setpvalthreshs")
#system("Rscript.exe PRSice.R --prsice ./PRSice_win64.exe --base BMI_1kgph3_chr16_snps_summarystat_2024_short.txt --target 1kgph3_chr16 --pheno 1kgph3_dummybmi20200804.txt --pheno-col dummybmi --cov 1kgph3_dummybmi20200804.txt --cov-col sex --thread 1 --stat ORBETA --pvalue P --binary-target T")
 F --gtf NCBI37.3.gene.loc.gtf --msigdb c2.all.v2024.1.Hs.entrez.gmt --multi-plot 10 --out dummybmi_sexcov2024")


Running PRS-CS

Code Block
titlePRS-CS-auto
## PRS-CS
#need effective sample size of source GWAS (neff =2*N_cases*N_controls/(N_cases+N_controls))
python workdir/PRScs/PRScs.py --ref_dir=workdir/ldblk_1kg_eur --bim_prefix=indir/target_gwas_cleaned_plink --sst_file=workdir/source_gwas_sumstat_prscsin.txt --n_gwas=[neff] --seed=1234 --out_dir=outdir/target_gwas_source_gwas_sumstat_prscsout
# Merge sumstat outputs from PRS-CS chr1-22 in R:
R
mci<-c()
mcall<-c()
for (i in 1:22) {
	mci<-read.table(paste("outdir//target_gwas_source_gwas_sumstat_prscsout_pst_eff_a1_b0.5_phiauto_chr",i,".txt",sep=""),header=F)
	mcall<-rbind(mcall,mci)
}
colnames(mcall)<-c("CHR","SNP","BP","A1","A2","B")
mcalla=subset(mcall,select=c("SNP","A1","B"))
colnames(mcalla)<-c("SNP","A1","Score")
write.table(mcalla,"outdir//target_gwas_source_gwas_sumstat_prscsout_pst_eff_a1_b0.5_phiauto_chr1-22.raw", row.names=F, col.names=T, quote=F, sep='\t')
# Run polygenic scoring in PLINK:
plink --bfile indir/target_gwas_cleaned_plink --score outdir/target_gwas_source_gwas_sumstat_prscsout_pst_eff_a1_b0.5_phiauto_chr1-22.raw --out outdir/target_gwas_source_gwas_sumstat_prscsout

...