-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
64 lines (42 loc) · 10.5 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
CPP=g++
CFLAGS=$(OPT) --std=c++11 -O3 -w
MODULE := conv1 conv1c opt-conv1 opt-conv1c class1 class1c opt-class1 opt-class1c
.PHONY: all clean
all: $(MODULE)
HEADERS=dnn.hpp
test1: convolution.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNx=256 -DNy=256 -DKx=3 -DKy=3 -DNi=128 -DNn=128 -DTii=4 -DTi=16 -DTnn=4 -DTn=16 -DTx=4 -DTy=4 -DCONCURRENT=0 -DNb=10
conv1: convolution.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNx=256 -DNy=256 -DKx=3 -DKy=3 -DNi=128 -DNn=128 -DTii=4 -DTi=16 -DTnn=4 -DTn=16 -DTx=4 -DTy=4 -DCONCURRENT=0 -DNb=10
conv1c: convolution.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNx=256 -DNy=256 -DKx=3 -DKy=3 -DNi=128 -DNn=128 -DTii=4 -DTi=16 -DTnn=4 -DTn=16 -DTx=4 -DTy=4 -DCONCURRENT=1 -DNb=10
opt-conv1: convolution.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNx=${NX_PARAM} -DNy=${NY_PARAM} -DKx=3 -DKy=3 -DNi=${NI_PARAM} -DNn=${NN_PARAM} -DTii=4 -DTi=16 -DTnn=4 -DTn=16 -DTx=4 -DTy=4 -DCONCURRENT=0 -DNb=${NUM_BATCHES}
opt-conv1c: convolution.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNx=${NX_PARAM} -DNy=${NY_PARAM} -DKx=3 -DKy=3 -DNi=${NI_PARAM} -DNn=${NN_PARAM} -DTii=4 -DTi=16 -DTnn=4 -DTn=16 -DTx=4 -DTy=4 -DCONCURRENT=1 -DNb=${NUM_BATCHES}
class1: classifier.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNi=1024 -DNn=512 -DTii=8 -DTi=32 -DTnn=16 -DTn=8 -DCONCURRENT=0 -DNb=1
class1c: classifier.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNi=1024 -DNn=512 -DTii=8 -DTi=32 -DTnn=16 -DTn=8 -DCONCURRENT=1 -DNb=1
opt-class1: classifier.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNi=${NI_PARAM} -DNn=${NN_PARAM} -DCONCURRENT=0 -DNb=${NUM_BATCHES}
opt-class1c: classifier.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNi=${NI_PARAM} -DNn=${NN_PARAM} -DCONCURRENT=1 -DNb=${NUM_BATCHES}
class-batched: batched-classifier.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNi=1024 -DNn=512 -DTii=8 -DTi=32 -DTnn=16 -DTn=8 -DNb=10
opt-class-batched: batched-classifier.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNi=${NI_PARAM} -DNn=${NN_PARAM} -DCONCURRENT=0 -DNb=${NUM_BATCHES}
conv-batched: batched-convolution.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNx=256 -DNy=256 -DKx=3 -DKy=3 -DNi=128 -DNn=128 -DTii=4 -DTi=16 -DTnn=4 -DTn=16 -DTx=4 -DTy=4 -DCONCURRENT=1 -DNb=20
opt-conv-batched: batched-convolution.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNx=${NX_PARAM} -DNy=${NY_PARAM} -DKx=3 -DKy=3 -DNi=${NI_PARAM} -DNn=${NN_PARAM} -DTii=4 -DTi=16 -DTnn=4 -DTn=16 -DTx=4 -DTy=4 -DCONCURRENT=0 -DNb=${NUM_BATCHES}
conv2: convolution2.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNx=256 -DNy=256 -DKx=3 -DKy=3 -DNi=128 -DNn=128 -DTii=4 -DTi=16 -DTnn=4 -DTn=16 -DTx=4 -DTy=4 -DCONCURRENT=0 -DNb=10
conv2c: convolution2.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNx=256 -DNy=256 -DKx=3 -DKy=3 -DNi=128 -DNn=128 -DTii=4 -DTi=16 -DTnn=4 -DTn=16 -DTx=4 -DTy=4 -DCONCURRENT=1 -DNb=10
opt-conv2: convolution2.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNx=${NX_PARAM} -DNy=${NY_PARAM} -DKx=3 -DKy=3 -DNi=${NI_PARAM} -DNn=${NN_PARAM} -DTii=4 -DTi=16 -DTnn=4 -DTn=16 -DTx=4 -DTy=4 -DCONCURRENT=0 -DNb=${NUM_BATCHES}
opt-conv2c: convolution2.cu
/usr/local/cuda-10.1/bin/nvcc -ccbin g++ -I /usr/local/cuda-10.1/samples/common/inc -m64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75 $^ $(CFLAGS) -o $@ -DNx=${NX_PARAM} -DNy=${NY_PARAM} -DKx=3 -DKy=3 -DNi=${NI_PARAM} -DNn=${NN_PARAM} -DTii=4 -DTi=16 -DTnn=4 -DTn=16 -DTx=4 -DTy=4 -DCONCURRENT=1 -DNb=${NUM_BATCHES}
clean:
@rm -f $(MODULE)