-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmqtl.r
executable file
·119 lines (107 loc) · 3.14 KB
/
mqtl.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# Matrix eQTL by Andrey A. Shabalin
# http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
#
# Be sure to use an up to date version of R and Matrix eQTL.
# source("Matrix_eQTL_R/Matrix_eQTL_engine.r");
library(MatrixEQTL)
## Location of the package with the data files.
# base.dir = find.package('MatrixEQTL');
base.dir <- "/dir/"
## Settings
# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
useModel <- modelLINEAR
# modelANOVA, modelLINEAR, or modelLINEAR_CROSS
# Genotype file name
SNP_file_name <- paste(base.dir, "/data/meta234/set4/SNP.txt", sep = "")
# Gene expression file name
expression_file_name <- paste(base.dir, "/data/meta234/set4/GE.txt", sep = "")
# Covariates file name
# Set to character() for no covariates
covariates_file_name <- paste(base.dir, "/data/meta234/set4/Covariates.txt", sep = "")
# Output file name
output_file_name <- tempfile()
# Only associations significant at this level will be saved
# pvOutputThreshold = 1e-2;
pvOutputThreshold <- 1
# Error covariance matrix
# Set to numeric() for identity.
errorCovariance <- numeric()
# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
## Load genotype data
snps <- SlicedData$new()
snps$fileDelimiter <- ","
# the period character
snps$fileOmitCharacters <- "NA"
# denote missing values;
snps$fileSkipRows <- 1
# one row of column labels
snps$fileSkipColumns <- 1
# one column of row labels
snps$fileSliceSize <- 2000
# read file in slices of 2,000 rows
snps$LoadFile(SNP_file_name)
## Load gene expression data
gene <- SlicedData$new()
gene$fileDelimiter <- ","
# the TAB character
gene$fileOmitCharacters <- "NA"
# denote missing values;
gene$fileSkipRows <- 1
# one row of column labels
gene$fileSkipColumns <- 1
# one column of row labels
gene$fileSliceSize <- 2000
# read file in slices of 2,000 rows
gene$LoadFile(expression_file_name)
## Load covariates
cvrt <- SlicedData$new()
cvrt$fileDelimiter <- ","
# the TAB character
cvrt$fileOmitCharacters <- "NA"
# denote missing values;
cvrt$fileSkipRows <- 1
# one row of column labels
cvrt$fileSkipColumns <- 1
# one column of row labels
if (length(covariates_file_name) > 0) {
cvrt$LoadFile(covariates_file_name)
}
## Run the analysis
me <- Matrix_eQTL_engine(
snps = snps,
gene = gene,
cvrt = cvrt,
output_file_name = output_file_name,
pvOutputThreshold = pvOutputThreshold,
useModel = useModel,
errorCovariance = errorCovariance,
verbose = TRUE,
pvalue.hist = T,
min.pv.by.genesnp = FALSE,
noFDRsaveMemory = FALSE
)
me2 <- Matrix_eQTL_engine(
snps = snps,
gene = gene,
cvrt = cvrt,
output_file_name = output_file_name,
pvOutputThreshold = pvOutputThreshold,
useModel = useModel,
errorCovariance = errorCovariance,
verbose = TRUE,
pvalue.hist = "qqplot",
min.pv.by.genesnp = FALSE,
noFDRsaveMemory = FALSE
)
unlink(output_file_name)
## Results:
cat("Analysis done in: ", me$time.in.sec, " seconds", "\n")
cat("Detected eQTLs:", "\n")
show(me$all$eqtls)
library(tidyverse)
me$all$eqtls %>%
mutate(se = beta / statistic) %>%
data.table::fwrite("mqtl_set4_dmp234.txt")
## Plot the histogram of all p-values
plot(me)
plot(me2)