-
Notifications
You must be signed in to change notification settings - Fork 89
Expand file tree
/
Copy pathclp-config.template.json.yaml
More file actions
189 lines (185 loc) · 5.37 KB
/
clp-config.template.json.yaml
File metadata and controls
189 lines (185 loc) · 5.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# yaml-language-server: $schema=../usr/share/config-schemas/clp-config.schema.json
#package:
# storage_engine: "clp-s"
# query_engine: "clp-s"
## API server config
#api_server:
# host: "localhost"
# port: 3001
# default_max_num_query_results: 1000
# query_job_polling:
# initial_backoff_ms: 100
# max_backoff_ms: 5000
## log-ingestor config. Currently, the config is applicable only if `logs_input.type` is "s3".
#log_ingestor:
# host: "localhost"
# port: 3002
# # The timeout (in seconds) after which the log buffer is flushed for compression if no new input
# # arrives.
# buffer_flush_timeout: 300
# # The log buffer size (in bytes) that triggers a flush for compression.
# buffer_flush_threshold: 4294967296 # 4 GiB
# # The capacity of the internal channel used for communication between an ingestion job and the
# # log buffer.
# channel_capacity: 10
# logging_level: "INFO"
## Location (e.g., directory) containing any logs you wish to compress. Must be reachable by all
## workers.
#logs_input:
# type: "fs"
#
# # NOTE: This directory will be exposed inside the container, so symbolic links to files outside
# # this directory will be ignored.
# directory: "/"
#
## File containing credentials for services
#credentials_file_path: "etc/credentials.yaml"
#
## Remove any bundled services below if you wish to use your own.
#bundled:
# - "database"
# - "queue"
# - "redis"
# - "results_cache"
#
#database:
# type: "mariadb" # "mariadb" or "mysql"
# host: "localhost"
# port: 3306
# names:
# clp: "clp-db"
# spider: "spider-db"
#
#compression_scheduler:
# jobs_poll_delay: 0.1 # seconds
# max_concurrent_tasks_per_job: 0 # A value of 0 disables the limit
# logging_level: "INFO"
# type: "celery" # "celery" or "spider"
#
#spider_scheduler:
# host: "localhost"
# port: 6000
# logging_level: "INFO"
#
#query_scheduler:
# host: "localhost"
# port: 7000
# jobs_poll_delay: 0.1 # seconds
# max_datasets_per_query: 10 # Set to null to disable the limit
# num_archives_to_search_per_sub_job: 16
# logging_level: "INFO"
#
#queue:
# host: "localhost"
# port: 5672
#
#redis:
# host: "localhost"
# port: 6379
# query_backend_database: 0
# compression_backend_database: 1
#
#reducer:
# host: "localhost"
# base_port: 14009
# logging_level: "INFO"
# upsert_interval: 100 # milliseconds
#
#results_cache:
# host: "localhost"
# port: 27017
# db_name: "clp-query-results"
# stream_collection_name: "stream-files"
#
# # Retention period for search results, in minutes. Set to null to disable automatic deletion.
# retention_period: 60
#
#compression_worker:
# logging_level: "INFO"
#
#query_worker:
# logging_level: "INFO"
#
#webui:
# host: "localhost"
# port: 4000
# results_metadata_collection_name: "results-metadata"
# rate_limit: 1000
#
#mcp_server: null
#
## Where archives should be output to
#archive_output:
# storage:
# type: "fs"
# # NOTE: This directory must not overlap with any path used in CLP's execution container. An
# # error will be raised if so.
# directory: "var/data/archives"
#
# # Retention period for archives, in minutes. Set to null to disable automatic deletion.
# retention_period: null
#
# # How much data CLP should try to compress into each archive
# target_archive_size: 268435456 # 256 MiB
#
# # How large the dictionaries should be allowed to get before the archive is
# # closed and a new one is created
# target_dictionaries_size: 33554432 # 32 MiB
#
# # How large each encoded file should be before being split into a new encoded
# # file
# target_encoded_file_size: 268435456 # 256 MiB
#
# # How much data CLP should try to fit into each segment within an archive
# target_segment_size: 268435456 # 256 MiB
#
# # How much archives should be compressed: 1 (fast/low compression) to 19 (slow/high compression)
# compression_level: 3
#
## Where CLP stream files (e.g., IR streams) should be output
#stream_output:
# storage:
# type: "fs"
# # NOTE: This directory must not overlap with any path used in CLP's execution container. An
# # error will be raised if so.
# directory: "var/data/streams"
#
# # How large each stream file should be before being split into a new stream file
# target_uncompressed_size: 134217728 # 128 MiB
#
## Garbage collector config
#garbage_collector:
# logging_level: "INFO"
#
# # Interval (in minutes) at which garbage collector jobs run
# sweep_interval:
# archive: 60
# search_result: 30
#
## Presto client config
#presto: null
#
## Location where other data (besides archives) are stored. It will be created if
## it doesn't exist.
## NOTE: This directory must not overlap with any path used in CLP's execution container. An error
## will be raised if so.
#data_directory: "var/data"
#
## Location where logs are stored. It will be created if it doesn't exist.
## NOTE: This directory must not overlap with any path used in CLP's execution container. An error
## will be raised if so.
#logs_directory: "var/log"
#
## Location where temporary runtime data are stored. It will be created if
## it doesn't exist.
## NOTE: This directory must not overlap with any path used in CLP's execution container. An error
## will be raised if so.
#tmp_directory: "var/tmp"
#
## Location of the AWS tools' config files (e.g., `~/.aws`)
#aws_config_directory: null
#
## Anonymous usage telemetry. Set to true to disable.
## See: https://docs.yscope.com/clp/main/user-guide/telemetry
#telemetry:
# disable: false