77from pidsmaker .config import get_runtime_required_args , get_yml_cfg
88from pidsmaker .utils .dataset_utils import edge_reversed , exclude_edge_type
99from pidsmaker .utils .utils import init_database_connection , log
10+ import json
1011
1112from . import filelist
1213
@@ -27,31 +28,40 @@ def store_netflow(file_path, cur, connect, index_id, filelist):
2728 for file in tqdm (filelist ):
2829 with open (file_path + file , "r" ) as f :
2930 for line in f :
30- if '{"datum":{"com.bbn.tc.schema.avro.cdm18.NetFlowObject"' in line :
31- try :
32- # res = re.findall(
33- # 'NetFlowObject":{"uuid":"(.*?)"(.*?)"localAddress":{"string":"(.*?)"},"localPort":{"int":(.*?)},"remoteAddress":{"string":"(.*?)"},"remotePort":{"int":(.*?)}',
34- # line)[0]
35- res = re .findall (
36- 'NetFlowObject":{"uuid":"(.*?)"(.*?)"localAddress":"(.*?)","localPort":(.*?),"remoteAddress":"(.*?)","remotePort":(.*?),' ,
37- line ,
38- )[0 ]
39-
40- nodeid = res [0 ]
41- srcaddr = res [2 ]
42- srcport = res [3 ]
43- dstaddr = res [4 ]
44- dstport = res [5 ]
45-
46- nodeproperty = srcaddr + "," + srcport + "," + dstaddr + "," + dstport
47- hashstr = stringtomd5 (nodeid )
48- netobj2hash [nodeid ] = [hashstr , nodeproperty ]
49- netobj2hash [hashstr ] = nodeid
50- netobjset .add (hashstr )
51- successful_num += 1
52- except :
53- failed_num += 1
54- pass
31+ if '{"datum":{"com.bbn.tc.schema.avro.cdm18.NetFlowObject"' not in line :
32+ continue
33+
34+ try :
35+ obj = json .loads (line )
36+ netobj = obj ["datum" ]["com.bbn.tc.schema.avro.cdm18.NetFlowObject" ]
37+
38+ nodeid = netobj ["uuid" ]
39+
40+ srcaddr = netobj .get ("localAddress" , "null" )
41+ srcport = netobj .get ("localPort" , "null" )
42+ dstaddr = netobj .get ("remoteAddress" , "null" )
43+ dstport = netobj .get ("remotePort" , "null" )
44+
45+ if isinstance (srcaddr , dict ):
46+ srcaddr = srcaddr .get ("string" , "null" )
47+ if isinstance (dstaddr , dict ):
48+ dstaddr = dstaddr .get ("string" , "null" )
49+ if isinstance (srcport , dict ):
50+ srcport = str (srcport .get ("int" , "null" ))
51+ if isinstance (dstport , dict ):
52+ dstport = str (dstport .get ("int" , "null" ))
53+
54+ nodeproperty = f"{ str (srcaddr )} ,{ str (srcport )} ,{ str (dstaddr )} ,{ str (dstport )} "
55+ hashstr = stringtomd5 (nodeid )
56+
57+ netobj2hash [nodeid ] = [hashstr , nodeproperty ]
58+ netobj2hash [hashstr ] = nodeid
59+ netobjset .add (hashstr )
60+
61+ successful_num += 1
62+
63+ except Exception as e :
64+ failed_num += 1
5565
5666 # Store data into database
5767 datalist = []
@@ -81,24 +91,35 @@ def store_subject(file_path, cur, connect, index_id, filelist):
8191 for file in tqdm (filelist ):
8292 with open (file_path + file , "r" ) as f :
8393 for line in f :
84- if '{"datum":{"com.bbn.tc.schema.avro.cdm18.Subject"' in line :
85- subject_uuid = re .findall (
86- 'Subject":{"uuid":"(.*?)"(.*?)"cmdLine":{"string":"(.*?)"}(.*?)"path":"(.*?)"' ,
87- line ,
88- )
89-
90- try :
91- subject_obj2hash [subject_uuid [0 ][0 ]] = [
92- subject_uuid [0 ][- 1 ],
93- subject_uuid [0 ][- 3 ],
94- ] # {uuid:[path, cmd]}
95- success_count += 1
96- except :
97- try :
98- subject_obj2hash [subject_uuid [0 ][0 ]] = ["null" , subject_uuid [0 ][- 3 ]]
99- except :
100- pass
101- fail_count += 1
94+ if '{"datum":{"com.bbn.tc.schema.avro.cdm18.Subject"' not in line :
95+ continue
96+
97+ try :
98+ obj = json .loads (line )
99+ subject = obj ["datum" ]["com.bbn.tc.schema.avro.cdm18.Subject" ]
100+
101+ uuid = subject ["uuid" ]
102+
103+ cmd = "null"
104+ cmd_raw = subject .get ("cmdLine" )
105+
106+ if isinstance (cmd_raw , str ):
107+ # in cadets_e3
108+ cmd = cmd_raw
109+ elif isinstance (cmd_raw , dict ):
110+ # in theia_e3 / clearscope_e3
111+ cmd = cmd_raw .get ("string" , "null" )
112+
113+ path = "null"
114+ props = subject .get ("properties" , {}).get ("map" , {})
115+ if "path" in props :
116+ path = props ["path" ]
117+
118+ subject_obj2hash [uuid ] = [path , cmd ]
119+ success_count += 1
120+ except Exception as e :
121+ fail_count += 1
122+
102123 # Store into database
103124 datalist = []
104125 subject_uuid2hash = {}
@@ -128,15 +149,32 @@ def store_file(file_path, cur, connect, index_id, filelist):
128149 for file in tqdm (filelist ):
129150 with open (file_path + file , "r" ) as f :
130151 for line in f :
131- if '{"datum":{"com.bbn.tc.schema.avro.cdm18.FileObject"' in line :
132- Object_uuid = re .findall (
133- 'FileObject":{"uuid":"(.*?)",(.*?)"filename":"(.*?)"' , line
134- )
135- try :
136- file_obj2hash [Object_uuid [0 ][0 ]] = Object_uuid [0 ][- 1 ]
137- success_count += 1
138- except :
139- fail_count += 1
152+ if '{"datum":{"com.bbn.tc.schema.avro.cdm18.FileObject"' not in line :
153+ continue
154+
155+ try :
156+ obj = json .loads (line )
157+ fileobj = obj ["datum" ]["com.bbn.tc.schema.avro.cdm18.FileObject" ]
158+ uuid = fileobj ["uuid" ]
159+
160+ filename = "null"
161+ base = fileobj .get ("baseObject" , {})
162+ props = base .get ("properties" , {}).get ("map" , {})
163+
164+ if "filename" in base :
165+ filename = base ["filename" ]
166+ elif "path" in base :
167+ filename = base ["path" ]
168+
169+ if "filename" in props :
170+ filename = props ["filename" ]
171+ elif "path" in props :
172+ filename = props ["path" ]
173+
174+ file_obj2hash [uuid ] = filename
175+
176+ except Exception as e :
177+ fail_count += 1
140178
141179 datalist = []
142180 file_uuid2hash = {}
0 commit comments