1+ import csv
12import json
3+ import os
4+ import time
25from dataclasses import asdict
36from multiprocessing import Process
47import ipaddress
@@ -82,6 +85,8 @@ def __init__(
8285 self .gw_ips = {}
8386 # flag to know which flow is the start of the pcap/file
8487 self .first_flow = True
88+ self .times = {}
89+ self .init_csv ()
8590
8691 def read_configuration (self ):
8792 self .client_ips : List [
@@ -512,13 +517,23 @@ def add_flow_to_profile(self, flow):
512517 # software and weird.log flows are allowed to not have a daddr
513518 return False
514519
520+ n = time .time ()
515521 self .get_gateway_info (flow )
522+ time_it_Took = time .time () - n
523+ self .log_time ("get_gateway_info" , time_it_Took )
524+
525+ n = time .time ()
516526
517527 # Check if the flow is whitelisted and we should not process it
518528 if self .whitelist .is_whitelisted_flow (flow ):
519529 self .print (f"{ self .whitelist .get_bloom_filters_stats ()} " , 2 , 0 )
520530 return True
521531
532+ time_it_Took = time .time () - n
533+ self .log_time ("is_whitelisted_flow" , time_it_Took )
534+
535+ n = time .time ()
536+
522537 # 5th. Store the data according to the paremeters
523538 # Now that we have the profileid and twid, add the data from the flow
524539 # in this tw for this profile
@@ -531,7 +546,14 @@ def add_flow_to_profile(self, flow):
531546
532547 # Create profiles for all ips we see
533548 self .db .add_profile (profileid , flow .starttime )
549+ time_it_Took = time .time () - n
550+ self .log_time ("db_calls" , time_it_Took )
551+
552+ n = time .time ()
534553 self .store_features_going_out (flow , flow_parser )
554+ time_it_Took = time .time () - n
555+ self .log_time ("store_features_going_out" , time_it_Took )
556+
535557 if self .analysis_direction == "all" :
536558 self .handle_in_flow (flow )
537559
@@ -541,6 +563,31 @@ def add_flow_to_profile(self, flow):
541563 self .print (pprint .pp (asdict (flow )))
542564 return True
543565
566+ def init_csv (self ):
567+ path = os .path .join (self .output_dir , "times_each_func_took.csv" )
568+ with open (path , "w" , newline = "" ) as f :
569+ writer = csv .writer (f )
570+ writer .writerow (
571+ [
572+ "get_gateway_info" ,
573+ "is_whitelisted_flow" ,
574+ "db_calls" ,
575+ "store_features_going_out" ,
576+ "process_line" ,
577+ "add_flow_to_profile" ,
578+ ]
579+ )
580+
581+ def log_time (self , what , time ):
582+ self .times [what ] = f"{ time :.2f} "
583+ if what == "store_features_going_out" :
584+ path = os .path .join (self .output_dir , "times_each_func_took.csv" )
585+
586+ with open (path , "a" , newline = "" ) as f :
587+ writer = csv .DictWriter (f , fieldnames = self .times .keys ())
588+ writer .writerow (self .times )
589+ self .times = {}
590+
544591 def run (self ):
545592 """
546593 This function runs in 3 different processes for faster processing of
@@ -552,7 +599,7 @@ def run(self):
552599 if not msg :
553600 # wait for msgs
554601 continue
555-
602+ self . times = {}
556603 line : dict = msg ["line" ]
557604 # TODO who is putting this True here?
558605 if line is True :
@@ -562,10 +609,20 @@ def run(self):
562609 self .print (f"< Received Line: { line } " , 2 , 0 )
563610 self .received_lines += 1
564611
612+ n = time .time ()
565613 flow = self .input_handler .process_line (line )
614+ time_it_took = time .time () - n
615+
616+ self .log_time ("process_line" , time_it_took )
617+
566618 if not flow :
567619 continue
620+
621+ n = time .time ()
568622 self .add_flow_to_profile (flow )
623+ time_it_took = time .time () - n
624+ self .log_time ("add_flow_to_profile" , time_it_took )
625+
569626 self .handle_setting_local_net (flow )
570627 self .db .increment_processed_flows ()
571628 except Exception as e :
0 commit comments