File tree Expand file tree Collapse file tree 3 files changed +42
-0
lines changed
ansible/roles/opensearch/tasks
environments/common/files/filebeat Expand file tree Collapse file tree 3 files changed +42
-0
lines changed Original file line number Diff line number Diff line change 1+ # Remove data which was NOT indexed by Slurm Job ID
2+ # It will be re-ingested by filebeat from the slurmdbd, with that index
3+
4+ - name : Ensure opensearch stopped
5+ systemd :
6+ name : opensearch
7+ state : stopped
8+ register : _opensearch_stop
9+ until : " _opensearch_stop.status.ActiveState in ['inactive', 'failed']"
10+ retries : 15
11+ delay : 5
12+
13+ - name : Archive existing data
14+ community.general.archive :
15+ path : " {{ opensearch_data_path }}"
16+ dest : " {{ opensearch_data_path | dirname }}/data-{{ lookup('pipe', 'date --iso-8601=minutes') }}.tar.gz"
17+ remove : true
Original file line number Diff line number Diff line change 1515 path : /etc/systemd/system/opendistro.service
1616 state : absent
1717
18+ - name : Enumerate files in data directory
19+ find :
20+ path : " {{ opensearch_data_path }}"
21+ register : _find_opensearch_data
22+
23+ - name : Archive incorrectly indexed data
24+ import_tasks : archive_data.yml
25+ when :
26+ - _find_opensearch_data.files | length > 0
27+ - " 'slurm_jobid_index' not in _find_opensearch_data.files | map(attribute='path') | map('basename')"
28+
1829- name : Ensure required opensearch host directories exist
1930 file :
2031 state : directory
2738 - " {{ opensearch_config_path }}"
2839 - " {{ opensearch_data_path }}"
2940
41+ - name : Set indexed data flag
42+ copy :
43+ dest : " {{ opensearch_data_path }}/slurm_jobid_index"
44+ content : |
45+ This is a flag file to indicate that filebeat is pushing data
46+ indexed by Slurm JobID to prevent duplicate OpenSearch records
47+ owner : " {{ opensearch_podman_user }}"
48+ group : " {{ opensearch_podman_user }}"
49+
3050- name : Create certs
3151 import_tasks : certs.yml
3252
Original file line number Diff line number Diff line change @@ -22,6 +22,11 @@ filebeat.inputs:
2222 fields_under_root : true
2323
2424processors :
25+ # Want to use the Slurm JobID as the ElasticSearch id to avoid duplicated records
26+ # Don't use filebeat.inputs:json.document_id as this removes the JobID from the record
27+ - fingerprint :
28+ fields : ["json.JobID"]
29+ target_field : " @metadata._id"
2530 - timestamp :
2631 field : json.End
2732 layouts :
You can’t perform that action at this time.
0 commit comments