Skip to content

Commit 4a281ff

Browse files
committed
Merge branch 'master' into 0.4.x
2 parents 55e056f + e4cc818 commit 4a281ff

File tree

507 files changed

+42731
-6093
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

507 files changed

+42731
-6093
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,3 +147,6 @@ cscope.*
147147
*.swo
148148
*.un~
149149
*~
150+
151+
# parameters
152+
*.params

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,6 @@
1010
[submodule "third_party/minigun"]
1111
path = third_party/minigun
1212
url = https://github.com/jermainewang/minigun.git
13+
[submodule "third_party/METIS"]
14+
path = third_party/METIS
15+
url = https://github.com/KarypisLab/METIS.git

CMakeLists.txt

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,10 @@ endif()
2525
dgl_option(USE_CUDA "Build with CUDA" OFF)
2626
dgl_option(USE_OPENMP "Build with OpenMP" ON)
2727
dgl_option(BUILD_CPP_TEST "Build cpp unittest executables" OFF)
28+
dgl_option(LIBCXX_ENABLE_PARALLEL_ALGORITHMS "Enable the parallel algorithms library. This requires the PSTL to be available." OFF)
2829
# Set debug compile option for gdb, only happens when -DCMAKE_BUILD_TYPE=DEBUG
2930
if (NOT MSVC)
30-
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb")
31+
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb")
3132
endif(NOT MSVC)
3233

3334
if(USE_CUDA)
@@ -39,6 +40,7 @@ endif(USE_CUDA)
3940
# include directories
4041
include_directories("include")
4142
include_directories("third_party/dlpack/include")
43+
include_directories("third_party/METIS/include/")
4244
include_directories("third_party/dmlc-core/include")
4345
include_directories("third_party/minigun/minigun")
4446
include_directories("third_party/minigun/third_party/moderngpu/src")
@@ -85,13 +87,19 @@ if(USE_OPENMP)
8587
endif(OPENMP_FOUND)
8688
endif(USE_OPENMP)
8789

90+
# To compile METIS correct for DGL.
91+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32")
92+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32")
93+
8894
# configure minigun
8995
add_definitions(-DENABLE_PARTIAL_FRONTIER=0) # disable minigun partial frontier compile
9096
# Source file lists
9197
file(GLOB DGL_SRC
9298
src/*.cc
9399
src/array/*.cc
94100
src/array/cpu/*.cc
101+
src/random/*.cc
102+
src/random/cpu/*.cc
95103
src/kernel/*.cc
96104
src/kernel/cpu/*.cc
97105
src/runtime/*.cc
@@ -122,6 +130,20 @@ add_subdirectory("third_party/dmlc-core")
122130
list(APPEND DGL_LINKER_LIBS dmlc)
123131
set(GOOGLE_TEST 0) # Turn off dmlc-core test
124132

133+
if(NOT MSVC)
134+
# Compile METIS
135+
set(GKLIB_PATH "third_party/METIS/GKlib")
136+
include(${GKLIB_PATH}/GKlibSystem.cmake)
137+
include_directories(${GKLIB_PATH})
138+
add_subdirectory("third_party/METIS/libmetis/")
139+
list(APPEND DGL_LINKER_LIBS metis)
140+
141+
# support PARALLEL_ALGORITHMS
142+
if (LIBCXX_ENABLE_PARALLEL_ALGORITHMS)
143+
add_definitions(-DPARALLEL_ALGORITHMS)
144+
endif(LIBCXX_ENABLE_PARALLEL_ALGORITHMS)
145+
endif(NOT MSVC)
146+
125147
target_link_libraries(dgl ${DGL_LINKER_LIBS} ${DGL_RUNTIME_LINKER_LIBS})
126148

127149
# Installation rules

Jenkinsfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ pipeline {
149149
}
150150
}
151151
steps {
152-
sh "nvidia-smi"
152+
// sh "nvidia-smi"
153153
build_dgl_linux("gpu")
154154
}
155155
post {

README.md

Lines changed: 143 additions & 121 deletions
Large diffs are not rendered by default.

apps/kg/README.md

Lines changed: 126 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -42,77 +42,136 @@ We will support multi-GPU training and distributed training in a near future.
4242
The package can run with both Pytorch and MXNet. For Pytorch, it works with Pytorch v1.2 or newer.
4343
For MXNet, it works with MXNet 1.5 or newer.
4444

45-
## Datasets
45+
## Built-in Datasets
4646

47-
DGL-KE provides five knowledge graphs:
47+
DGL-KE provides five built-in knowledge graphs:
4848

4949
| Dataset | #nodes | #edges | #relations |
5050
|---------|--------|--------|------------|
51-
| [FB15k](https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/FB15k.zip) | 14951 | 592213 | 1345 |
52-
| [FB15k-237](https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/FB15k-237.zip) | 14541 | 310116 | 237 |
53-
| [wn18](https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/wn18.zip) | 40943 | 151442 | 18 |
54-
| [wn18rr](https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/wn18rr.zip) | 40943 | 93003 | 11 |
55-
| [Freebase](https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/Freebase.zip) | 86054151 | 338586276 | 14824 |
51+
| [FB15k](https://data.dgl.ai/dataset/FB15k.zip) | 14951 | 592213 | 1345 |
52+
| [FB15k-237](https://data.dgl.ai/dataset/FB15k-237.zip) | 14541 | 310116 | 237 |
53+
| [wn18](https://data.dgl.ai/dataset/wn18.zip) | 40943 | 151442 | 18 |
54+
| [wn18rr](https://data.dgl.ai/dataset/wn18rr.zip) | 40943 | 93003 | 11 |
55+
| [Freebase](https://data.dgl.ai/dataset/Freebase.zip) | 86054151 | 338586276 | 14824 |
5656

5757
Users can specify one of the datasets with `--dataset` in `train.py` and `eval.py`.
5858

5959
## Performance
60+
The 1 GPU speed is measured with 8 CPU cores and one Nvidia V100 GPU. (AWS P3.2xlarge)
61+
The 8 GPU speed is measured with 64 CPU cores and eight Nvidia V100 GPU. (AWS P3.16xlarge)
6062

61-
The speed is measured with 16 CPU cores and one Nvidia V100 GPU.
63+
The speed on FB15k 1GPU
64+
65+
| Models | TransE_l1 | TransE_l2 | DistMult | ComplEx | RESCAL | TransR | RotatE |
66+
|---------|-----------|-----------|----------|---------|--------|--------|--------|
67+
|MAX_STEPS| 48000 | 32000 | 40000 | 100000 | 32000 | 32000 | 20000 |
68+
|TIME | 370s | 270s | 312s | 282s | 2095s | 1556s | 1861s |
69+
70+
The accuracy on FB15k
71+
72+
| Models | MR | MRR | HITS@1 | HITS@3 | HITS@10 |
73+
|-----------|-------|-------|--------|--------|---------|
74+
| TransE_l1 | 44.18 | 0.675 | 0.551 | 0.774 | 0.861 |
75+
| TransE_l2 | 46.71 | 0.665 | 0.551 | 0.804 | 0.846 |
76+
| DistMult | 61.04 | 0.725 | 0.625 | 0.837 | 0.883 |
77+
| ComplEx | 64.59 | 0.785 | 0.718 | 0.835 | 0.889 |
78+
| RESCAL | 122.3 | 0.669 | 0.598 | 0.711 | 0.793 |
79+
| TransR | 59.86 | 0.676 | 0.591 | 0.735 | 0.814 |
80+
| RotatE | 43.66 | 0.728 | 0.632 | 0.801 | 0.874 |
6281

63-
The speed on FB15k
82+
83+
The speed on FB15k 8GPU
6484

6585
| Models | TransE_l1 | TransE_l2 | DistMult | ComplEx | RESCAL | TransR | RotatE |
6686
|---------|-----------|-----------|----------|---------|--------|--------|--------|
67-
|MAX_STEPS| 20000 | 30000 |100000 | 100000 | 30000 | 100000 | 100000 |
68-
|TIME | 411s | 329s |690s | 806s | 1800s | 7627s | 4327s |
87+
|MAX_STEPS| 6000 | 4000 | 5000 | 4000 | 4000 | 4000 | 2500 |
88+
|TIME | 88.93s | 62.99s | 72.74s | 68.37s | 245.9s | 203.9s | 126.7s |
6989

7090
The accuracy on FB15k
7191

7292
| Models | MR | MRR | HITS@1 | HITS@3 | HITS@10 |
7393
|-----------|-------|-------|--------|--------|---------|
74-
| TransE_l1 | 69.12 | 0.656 | 0.567 | 0.718 | 0.802 |
75-
| TransE_l2 | 35.86 | 0.570 | 0.400 | 0.708 | 0.834 |
76-
| DistMult | 43.35 | 0.783 | 0.713 | 0.837 | 0.897 |
77-
| ComplEx | 51.99 | 0.785 | 0.720 | 0.832 | 0.889 |
78-
| RESCAL | 130.89| 0.668 | 0.597 | 0.720 | 0.800 |
79-
| TransR | 138.7 | 0.501 | 0.274 | 0.704 | 0.801 |
80-
| RotatE | 39.6 | 0.725 | 0.628 | 0.802 | 0.875 |
94+
| TransE_l1 | 44.25 | 0.672 | 0.547 | 0.774 | 0.860 |
95+
| TransE_l2 | 46.13 | 0.658 | 0.539 | 0.748 | 0.845 |
96+
| DistMult | 61.72 | 0.723 | 0.626 | 0.798 | 0.881 |
97+
| ComplEx | 65.84 | 0.754 | 0.676 | 0.813 | 0.880 |
98+
| RESCAL | 135.6 | 0.652 | 0.580 | 0.693 | 0.779 |
99+
| TransR | 65.27 | 0.676 | 0.591 | 0.736 | 0.811 |
100+
| RotatE | 49.59 | 0.683 | 0.581 | 0.759 | 0.848 |
81101

82-
In comparison, GraphVite uses 4 GPUs and takes 14 minutes. Thus, DGL-KE trains TransE on FB15k twice as fast as GraphVite while using much few resources. More performance information on GraphVite can be found [here](https://github.com/DeepGraphLearning/graphvite).
102+
In comparison, GraphVite uses 4 GPUs and takes 14 minutes. Thus, DGL-KE trains TransE on FB15k 9.5X as fast as GraphVite with 8 GPUs. More performance information on GraphVite can be found [here](https://github.com/DeepGraphLearning/graphvite).
83103

84-
The speed on wn18
104+
The speed on wn18 1GPU
85105

86106
| Models | TransE_l1 | TransE_l2 | DistMult | ComplEx | RESCAL | TransR | RotatE |
87107
|---------|-----------|-----------|----------|---------|--------|--------|--------|
88-
|MAX_STEPS| 40000 | 20000 | 10000 | 20000 | 20000 | 20000 | 20000 |
89-
|TIME | 719s | 254s | 126s | 266s | 333s | 1547s | 786s |
108+
|MAX_STEPS| 32000 | 32000 | 20000 | 20000 | 20000 | 30000 | 24000 |
109+
|TIME | 531.5s | 406.6s | 284.1s | 282.3s | 443.6s | 766.2s | 829.4s |
90110

91111
The accuracy on wn18
92112

113+
| Models | MR | MRR | HITS@1 | HITS@3 | HITS@10 |
114+
|-----------|-------|-------|--------|--------|---------|
115+
| TransE_l1 | 318.4 | 0.764 | 0.602 | 0.929 | 0.949 |
116+
| TransE_l2 | 206.2 | 0.561 | 0.306 | 0.800 | 0.944 |
117+
| DistMult | 486.0 | 0.818 | 0.711 | 0.921 | 0.948 |
118+
| ComplEx | 268.6 | 0.933 | 0.916 | 0.949 | 0.961 |
119+
| RESCAL | 536.6 | 0.848 | 0.790 | 0.900 | 0.927 |
120+
| TransR | 452.4 | 0.620 | 0.461 | 0.758 | 0.856 |
121+
| RotatE | 487.9 | 0.944 | 0.940 | 0.947 | 0.952 |
122+
123+
The speed on wn18 8GPU
124+
125+
| Models | TransE_l1 | TransE_l2 | DistMult | ComplEx | RESCAL | TransR | RotatE |
126+
|---------|-----------|-----------|----------|---------|--------|--------|--------|
127+
|MAX_STEPS| 4000 | 4000 | 2500 | 2500 | 2500 | 2500 | 3000 |
128+
|TIME | 119.3s | 81.1s | 76.0s | 58.0s | 594.1s | 1168s | 139.8s |
129+
130+
The accuracy on wn18
131+
132+
| Models | MR | MRR | HITS@1 | HITS@3 | HITS@10 |
133+
|-----------|-------|-------|--------|--------|---------|
134+
| TransE_l1 | 360.3 | 0.745 | 0.562 | 0.930 | 0.951 |
135+
| TransE_l2 | 193.8 | 0.557 | 0.301 | 0.799 | 0.942 |
136+
| DistMult | 499.9 | 0.807 | 0.692 | 0.917 | 0.945 |
137+
| ComplEx | 476.7 | 0.935 | 0.926 | 0.943 | 0.949 |
138+
| RESCAL | 618.8 | 0.848 | 0.791 | 0.897 | 0.927 |
139+
| TransR | 513.1 | 0.659 | 0.491 | 0.821 | 0.871 |
140+
| RotatE | 466.2 | 0.944 | 0.940 | 0.945 | 0.951 |
141+
142+
143+
The speed on Freebase (8 GPU)
144+
145+
| Models | TransE_l2 | DistMult | ComplEx | TransR | RotatE |
146+
|---------|-----------|----------|---------|--------|--------|
147+
|MAX_STEPS| 320000 | 300000 | 360000 | 300000 | 300000 |
148+
|TIME | 7908s | 7425s | 8946s | 16816s | 12817s |
149+
150+
The accuracy on Freebase (it is tested when 1000 negative edges are sampled for each positive edge).
151+
93152
| Models | MR | MRR | HITS@1 | HITS@3 | HITS@10 |
94153
|-----------|--------|-------|--------|--------|---------|
95-
| TransE_l1 | 321.35 | 0.760 | 0.652 | 0.850 | 0.940 |
96-
| TransE_l2 | 181.57 | 0.570 | 0.322 | 0.802 | 0.944 |
97-
| DistMult | 271.09 | 0.769 | 0.639 | 0.892 | 0.949 |
98-
| ComplEx | 276.37 | 0.935 | 0.916 | 0.950 | 0.960 |
99-
| RESCAL | 579.54 | 0.846 | 0.791 | 0.898 | 0.931 |
100-
| TransR | 615.56 | 0.606 | 0.378 | 0.826 | 0.890 |
101-
| RotatE | 367.64 | 0.931 | 0.924 | 0.935 | 0.944 |
154+
| TransE_l2 | 22.4 | 0.756 | 0.688 | 0.800 | 0.882 |
155+
| DistMul | 45.4 | 0.833 | 0.812 | 0.843 | 0.872 |
156+
| ComplEx | 48.0 | 0.830 | 0.812 | 0.838 | 0.864 |
157+
| TransR | 51.2 | 0.697 | 0.656 | 0.716 | 0.771 |
158+
| RotatE | 93.3 | 0.770 | 0.749 | 0.780 | 0.805 |
102159

103-
The speed on Freebase
160+
The speed on Freebase (48 CPU)
161+
This measured with 48 CPU cores on an AWS r5dn.24xlarge
104162

105-
| Models | DistMult | ComplEx |
106-
|---------|----------|---------|
107-
|MAX_STEPS| 3200000 | 3200000 |
108-
|TIME | 2.44h | 2.94h |
163+
| Models | TransE_l2 | DistMult | ComplEx |
164+
|---------|-----------|----------|---------|
165+
|MAX_STEPS| 50000 | 50000 | 50000 |
166+
|TIME | 7002s | 6340s | 8133s |
109167

110-
The accuracy on Freebase (it is tested when 100,000 negative edges are sampled for each positive edge).
168+
The accuracy on Freebase (it is tested when 1000 negative edges are sampled for each positive edge).
111169

112-
| Models | MR | MRR | HITS@1 | HITS@3 | HITS@10 |
113-
|----------|--------|-------|--------|--------|---------|
114-
| DistMul | 6159.1 | 0.716 | 0.690 | 0.729 | 0.760 |
115-
| ComplEx | 6888.8 | 0.716 | 0.697 | 0.728 | 0.760 |
170+
| Models | MR | MRR | HITS@1 | HITS@3 | HITS@10 |
171+
|-----------|--------|-------|--------|--------|---------|
172+
| TransE_l2 | 30.8 | 0.814 | 0.764 | 0.848 | 0.902 |
173+
| DistMul | 45.1 | 0.834 | 0.815 | 0.843 | 0.871 |
174+
| ComplEx | 44.9 | 0.837 | 0.819 | 0.845 | 0.870 |
116175

117176
The configuration for reproducing the performance results can be found [here](https://github.com/dmlc/dgl/blob/master/apps/kg/config/best_config.sh).
118177

@@ -129,24 +188,20 @@ when given (?, rel, tail).
129188

130189
### Input formats:
131190

132-
DGL-KE supports two knowledge graph input formats. A knowledge graph is stored
133-
using five files.
134-
135-
Format 1:
191+
DGL-KE supports two knowledge graph input formats for user defined dataset
136192

137-
- entities.dict contains pairs of (entity Id, entity name). The number of rows is the number of entities (nodes).
138-
- relations.dict contains pairs of (relation Id, relation name). The number of rows is the number of relations.
139-
- train.txt stores edges in the training set. They are stored as triples of (head, rel, tail).
140-
- valid.txt stores edges in the validation set. They are stored as triples of (head, rel, tail).
141-
- test.txt stores edges in the test set. They are stored as triples of (head, rel, tail).
193+
- raw_udd_[h|r|t], raw user defined dataset. In this format, user only need to provide triples and let the dataloader generate and manipulate the id mapping. The dataloader will generate two files: entities.tsv for entity id mapping and relations.tsv for relation id mapping. The order of head, relation and tail entities are described in [h|r|t], for example, raw_udd_trh means the triples are stored in the order of tail, relation and head. It should contains three files:
194+
- *train* stores the triples in the training set. In format of a triple, e.g., [src_name, rel_name, dst_name] and should follow the order specified in [h|r|t]
195+
- *valid* stores the triples in the validation set. In format of a triple, e.g., [src_name, rel_name, dst_name] and should follow the order specified in [h|r|t]
196+
- *test* stores the triples in the test set. In format of a triple, e.g., [src_name, rel_name, dst_name] and should follow the order specified in [h|r|t]
142197

143198
Format 2:
144-
145-
- entity2id.txt contains pairs of (entity name, entity Id). The number of rows is the number of entities (nodes).
146-
- relation2id.txt contains pairs of (relation name, relation Id). The number of rows is the number of relations.
147-
- train.txt stores edges in the training set. They are stored as triples of (head, tail, rel).
148-
- valid.txt stores edges in the validation set. They are stored as a triple of (head, tail, rel).
149-
- test.txt stores edges in the test set. They are stored as a triple of (head, tail, rel).
199+
- udd_[h|r|t], user defined dataset. In this format, user should provide the id mapping for entities and relations. The order of head, relation and tail entities are described in [h|r|t], for example, raw_udd_trh means the triples are stored in the order of tail, relation and head. It should contains five files:
200+
- *entities* stores the mapping between entity name and entity Id
201+
- *relations* stores the mapping between relation name relation Id
202+
- *train* stores the triples in the training set. In format of a triple, e.g., [src_id, rel_id, dst_id] and should follow the order specified in [h|r|t]
203+
- *valid* stores the triples in the validation set. In format of a triple, e.g., [src_id, rel_id, dst_id] and should follow the order specified in [h|r|t]
204+
- *test* stores the triples in the test set. In format of a triple, e.g., [src_id, rel_id, dst_id] and should follow the order specified in [h|r|t]
150205

151206
### Output formats:
152207

@@ -166,34 +221,36 @@ Here are some examples of using the training script.
166221
Train KGE models with GPU.
167222

168223
```bash
169-
python3 train.py --model DistMult --dataset FB15k --batch_size 1024 \
170-
--neg_sample_size 256 --hidden_dim 2000 --gamma 500.0 --lr 0.1 --max_step 100000 \
171-
--batch_size_eval 16 --gpu 0 --valid --test -adv
224+
python3 train.py --model DistMult --dataset FB15k --batch_size 1024 --neg_sample_size 256 \
225+
--hidden_dim 400 --gamma 143.0 --lr 0.08 --batch_size_eval 16 --valid --test -adv \
226+
--gpu 0 --max_step 40000
172227
```
173228

174-
Train KGE models with mixed CPUs and GPUs.
229+
Train KGE models with mixed multiple GPUs.
175230

176231
```bash
177-
python3 train.py --model DistMult --dataset FB15k --batch_size 1024 \
178-
--neg_sample_size 256 --hidden_dim 2000 --gamma 500.0 --lr 0.1 --max_step 100000 \
179-
--batch_size_eval 16 --gpu 0 --valid --test -adv --mix_cpu_gpu
232+
python3 train.py --model DistMult --dataset FB15k --batch_size 1024 --neg_sample_size 256 \
233+
--hidden_dim 400 --gamma 143.0 --lr 0.08 --batch_size_eval 16 --valid --test -adv \
234+
--max_step 5000 --mix_cpu_gpu --num_proc 8 --gpu 0 1 2 3 4 5 6 7 --async_update \
235+
--soft_rel_part --force_sync_interval 1000
180236
```
181237

182238
Train embeddings and verify it later.
183239

184240
```bash
185-
python3 train.py --model DistMult --dataset FB15k --batch_size 1024 \
186-
--neg_sample_size 256 --hidden_dim 2000 --gamma 500.0 --lr 0.1 --max_step 100000 \
187-
--batch_size_eval 16 --gpu 0 --valid -adv --save_emb DistMult_FB15k_emb
241+
python3 train.py --model DistMult --dataset FB15k --batch_size 1024 --neg_sample_size 256 \
242+
--hidden_dim 400 --gamma 143.0 --lr 0.08 --batch_size_eval 16 --valid --test -adv \
243+
--gpu 0 --max_step 40000 --save_emb DistMult_FB15k_emb
188244

189-
python3 eval.py --model_name DistMult --dataset FB15k --hidden_dim 2000 \
190-
--gamma 500.0 --batch_size 16 --gpu 0 --model_path DistMult_FB15k_emb/
245+
python3 eval.py --model_name DistMult --dataset FB15k --hidden_dim 400 \
246+
--gamma 143.0 --batch_size 16 --gpu 0 --model_path DistMult_FB15k_emb/
191247

192248
```
193249

194250
Train embeddings with multi-processing. This currently doesn't work in MXNet.
195251
```bash
196-
python3 train.py --model DistMult --dataset FB15k --batch_size 1024 \
197-
--neg_sample_size 256 --hidden_dim 2000 --gamma 500.0 --lr 0.07 --max_step 3000 \
198-
--batch_size_eval 16 --regularization_coef 0.000001 --valid --test -adv --num_proc 8
252+
python3 train.py --model TransE_l2 --dataset Freebase --batch_size 1000 \
253+
--neg_sample_size 200 --hidden_dim 400 --gamma 10 --lr 0.1 --max_step 50000 \
254+
--log_interval 100 --batch_size_eval 1000 --neg_sample_size_eval 1000 --test \
255+
-adv --regularization_coef 1e-9 --num_thread 1 --num_proc 48
199256
```

0 commit comments

Comments
 (0)