diff --git a/Solutions/BitSight/Data Connectors/BitSightDataConnector/AlertsGraphStatisticsDetails/bitsight_statistics.py b/Solutions/BitSight/Data Connectors/BitSightDataConnector/AlertsGraphStatisticsDetails/bitsight_statistics.py index ed6fe866aa0..1da5bc69269 100644 --- a/Solutions/BitSight/Data Connectors/BitSightDataConnector/AlertsGraphStatisticsDetails/bitsight_statistics.py +++ b/Solutions/BitSight/Data Connectors/BitSightDataConnector/AlertsGraphStatisticsDetails/bitsight_statistics.py @@ -19,7 +19,7 @@ ALERTS_DATA_TABLE, COMPANIES, ENDPOINTS, - ALERT_GRAPH_STATISTICS_FUNC_NAME, + ALERT_GRAPH_STATISTICS_FUNC_NAME ) @@ -96,7 +96,7 @@ def get_risk_vector_data( data_to_post = [] risk_vector_data = [] checkpoint_key = "{}".format(company_guid) - checkpoint_data = self.checkpoint_obj.get_last_data(state) + checkpoint_data = self.checkpoint_obj.get_last_data(state, table_name=table_name) last_data = self.checkpoint_obj.get_endpoint_last_data( checkpoint_data, endpoint, checkpoint_key ) @@ -124,9 +124,8 @@ def get_risk_vector_data( self.send_data_to_sentinel( risk_vector_data, table_name, company_name, endpoint ) - # data_to_post = str(data_to_post) self.checkpoint_obj.save_checkpoint( - state, checkpoint_data, endpoint, checkpoint_key, data_to_post + state, checkpoint_data, endpoint, "{}_{}".format(table_name, "Checkpoint"), checkpoint_key, data_to_post ) except BitSightException: raise BitSightException() @@ -183,7 +182,7 @@ def get_diligence_historical_statistics_details(self, company_name, company_guid post_data = [] checkpoint_key = "{}".format(company_guid) checkpoint_data = self.checkpoint_obj.get_last_data( - self.diligence_historical_statistics_state + self.diligence_historical_statistics_state, table_name=DILIGENCE_HISTORICAL_STATISTICS_TABLE ) last_data = self.checkpoint_obj.get_endpoint_last_data( checkpoint_data, "diligence_historical-statistics", company_guid @@ -218,11 +217,11 @@ def get_diligence_historical_statistics_details(self, company_name, company_guid company_name, "diligence historical statistics", ) - # checkpoint_data_to_post = str(checkpoint_data_to_post) self.checkpoint_obj.save_checkpoint( self.diligence_historical_statistics_state, checkpoint_data, "diligence_historical-statistics", + "{}_{}".format(DILIGENCE_HISTORICAL_STATISTICS_TABLE, "Checkpoint"), checkpoint_key, checkpoint_data_to_post, ) @@ -254,7 +253,7 @@ def get_graph_data(self, company_name, company_guid): rating_diff = None last_date = None checkpoint_key = "{}".format(company_guid) - checkpoint_data = self.checkpoint_obj.get_last_data(self.graph_state) + checkpoint_data = self.checkpoint_obj.get_last_data(self.graph_state, table_name=GRAPH_DATA_TABLE) last_data = self.checkpoint_obj.get_endpoint_last_data( checkpoint_data, "graph_data", company_guid ) @@ -307,6 +306,7 @@ def get_graph_data(self, company_name, company_guid): self.graph_state, checkpoint_data, "graph_data", + "{}_{}".format(GRAPH_DATA_TABLE, "Checkpoint"), checkpoint_key, data_to_post, ) @@ -340,7 +340,7 @@ def get_alerts_details(self, company_name, company_guid): try: data_to_post = None checkpoint_key = "{}".format(company_guid) - checkpoint_data = self.checkpoint_obj.get_last_data(self.alerts_state) + checkpoint_data = self.checkpoint_obj.get_last_data(self.alerts_state, table_name=ALERTS_DATA_TABLE) last_date = self.checkpoint_obj.get_endpoint_last_data( checkpoint_data, "alerts_data", company_guid ) @@ -403,6 +403,7 @@ def get_alerts_details(self, company_name, company_guid): self.alerts_state, checkpoint_data, "alerts_data", + "{}_{}".format(ALERTS_DATA_TABLE, "Checkpoint"), checkpoint_key, data_to_post, ) @@ -422,7 +423,7 @@ def get_all_copmanies_alerts_graph_statisctics_details( company_names (list): List of company names. """ fetching_index = self.get_last_data_index( - company_names, self.checkpoint_obj, self.company_state + company_names, self.checkpoint_obj, self.company_state, table_name="{}_{}".format(ALERTS_DATA_TABLE, "Statistics") ) for company_index in range(fetching_index + 1, len(logs_data)): company_name = logs_data[company_index].get("name_s") @@ -443,6 +444,7 @@ def get_all_copmanies_alerts_graph_statisctics_details( self.company_state, company_name, "statisctics_company", + "{}_{}".format(ALERTS_DATA_TABLE, "Statistics_Company_Checkpoint"), company_name_flag=True, ) diff --git a/Solutions/BitSight/Data Connectors/BitSightDataConnector/BitSight.zip b/Solutions/BitSight/Data Connectors/BitSightDataConnector/BitSight.zip index 9d4146d6aaa..90f55d30dbc 100644 Binary files a/Solutions/BitSight/Data Connectors/BitSightDataConnector/BitSight.zip and b/Solutions/BitSight/Data Connectors/BitSightDataConnector/BitSight.zip differ diff --git a/Solutions/BitSight/Data Connectors/BitSightDataConnector/BreachesDetails/bitsight_breaches.py b/Solutions/BitSight/Data Connectors/BitSightDataConnector/BreachesDetails/bitsight_breaches.py index 8a77aa4b8c2..230228d759f 100644 --- a/Solutions/BitSight/Data Connectors/BitSightDataConnector/BreachesDetails/bitsight_breaches.py +++ b/Solutions/BitSight/Data Connectors/BitSightDataConnector/BreachesDetails/bitsight_breaches.py @@ -78,7 +78,7 @@ def get_all_companies_breaches_details(self, company_names, logs_data): """ count_companies = 0 fetching_index = self.get_last_data_index( - company_names, self.checkpoint_obj, self.breach_company_state + company_names, self.checkpoint_obj, self.breach_company_state, table_name=consts.BREACHES_TABLE_NAME ) for company_index in range(fetching_index + 1, len(logs_data)): company_name = logs_data[company_index].get("name_s") @@ -98,6 +98,7 @@ def get_all_companies_breaches_details(self, company_names, logs_data): self.breach_company_state, company_name, "breaches", + "{}_{}".format(consts.BREACHES_TABLE_NAME, "Company_Checkpoint"), company_name_flag=True, ) applogger.info( @@ -171,7 +172,7 @@ def get_breaches_data(self, company_name, company_guid): ) ) return - last_data = self.checkpoint_obj.get_last_data(self.breaches_details_state) + last_data = self.checkpoint_obj.get_last_data(self.breaches_details_state, table_name=consts.BREACHES_TABLE_NAME) last_checkpoint_company = self.checkpoint_obj.get_endpoint_last_data( last_data, "breaches", company_guid ) @@ -188,6 +189,7 @@ def get_breaches_data(self, company_name, company_guid): self.breaches_details_state, last_data, "breaches", + "{}_{}".format(consts.BREACHES_TABLE_NAME, "Checkpoint"), checkpoint_key, checkpoint_date, ) diff --git a/Solutions/BitSight/Data Connectors/BitSightDataConnector/CompaniesDetails/bitsight_companies.py b/Solutions/BitSight/Data Connectors/BitSightDataConnector/CompaniesDetails/bitsight_companies.py index 402fddec381..8b49063cbf5 100644 --- a/Solutions/BitSight/Data Connectors/BitSightDataConnector/CompaniesDetails/bitsight_companies.py +++ b/Solutions/BitSight/Data Connectors/BitSightDataConnector/CompaniesDetails/bitsight_companies.py @@ -11,7 +11,7 @@ COMPANIES_RATING_DETAILS_TABLE_NAME, COMPANY_DETAIL_TABLE_NAME, ENDPOINTS, - COMPANY_DETAILS_FUNC_NAME, + COMPANY_DETAILS_FUNC_NAME ) from ..SharedCode.get_logs_data import get_logs_data from ..SharedCode.logger import applogger @@ -61,10 +61,10 @@ def get_company_details(self, company_name, company_guid): post_data_ratings = [] checkpoint_key = "{}".format(company_guid) checkpoint_data_company = self.checkpoint_obj.get_last_data( - self.company_detail_state + self.company_detail_state, table_name=COMPANY_DETAIL_TABLE_NAME ) checkpoint_data_company_ratings = self.checkpoint_obj.get_last_data( - self.company_rating_state + self.company_rating_state, table_name=COMPANIES_RATING_DETAILS_TABLE_NAME ) last_data_company_details = self.checkpoint_obj.get_endpoint_last_data( checkpoint_data_company, "companies_details", checkpoint_key @@ -114,6 +114,7 @@ def get_company_details(self, company_name, company_guid): self.company_rating_state, checkpoint_data_company_ratings, "companies_ratings_details", + "{}_{}".format(COMPANIES_RATING_DETAILS_TABLE_NAME, "Checkpoint"), checkpoint_key, data_to_post, ) @@ -137,6 +138,7 @@ def get_company_details(self, company_name, company_guid): self.company_detail_state, checkpoint_data_company, "companies_details", + "{}_{}".format(COMPANY_DETAIL_TABLE_NAME, "Checkpoint"), checkpoint_key, data_to_post, ) @@ -166,7 +168,7 @@ def get_all_copmanies_details(self, logs_data, company_names): """ count_companies = 0 fetching_index = self.get_last_data_index( - company_names, self.checkpoint_obj, self.company_state + company_names, self.checkpoint_obj, self.company_state, table_name=COMPANY_DETAIL_TABLE_NAME ) for company_index in range(fetching_index + 1, len(logs_data)): company_name = logs_data[company_index].get("name_s") @@ -186,6 +188,7 @@ def get_all_copmanies_details(self, logs_data, company_names): self.company_state, company_name, "portfolio_company", + "{}_{}".format(COMPANY_DETAIL_TABLE_NAME, "Company_Checkpoint"), company_name_flag=True, ) applogger.info( diff --git a/Solutions/BitSight/Data Connectors/BitSightDataConnector/FindingsDetails/bitsight_findings.py b/Solutions/BitSight/Data Connectors/BitSightDataConnector/FindingsDetails/bitsight_findings.py index 103830dc234..ab0e1c8809e 100644 --- a/Solutions/BitSight/Data Connectors/BitSightDataConnector/FindingsDetails/bitsight_findings.py +++ b/Solutions/BitSight/Data Connectors/BitSightDataConnector/FindingsDetails/bitsight_findings.py @@ -13,6 +13,7 @@ FINDINGS_TABLE_NAME, COMPANIES, ENDPOINTS, + FINDING_DETAILS_QUERY ) @@ -40,7 +41,7 @@ def __init__(self, start_time) -> None: def get_all_copmanies_findings_details(self, logs_data, company_names): count_companies = 0 fetching_index = self.get_last_data_index( - company_names, self.checkpoint_obj, self.company_state + company_names, self.checkpoint_obj, self.company_state, table_name=FINDINGS_TABLE_NAME ) for company_index in range(fetching_index + 1, len(logs_data)): company_name = logs_data[company_index].get("name_s") @@ -63,6 +64,7 @@ def get_all_copmanies_findings_details(self, logs_data, company_names): self.company_state, company_name, "findings_company", + "{}_{}".format(FINDINGS_TABLE_NAME, "Company_Checkpoint"), company_name_flag=True, ) @@ -125,7 +127,7 @@ def get_findings_details(self, company_name, company_guid): {"risk_category": "Compromised Systems"}, {"risk_category": "User Behavior"}, ] - last_data = self.checkpoint_obj.get_last_data(self.findings_state) + last_data = self.checkpoint_obj.get_last_data(self.findings_state, table_name=FINDINGS_TABLE_NAME, checkpoint_query=FINDING_DETAILS_QUERY) findings_url = self.base_url + self.findings_endpoint_path.format( company_guid ) @@ -173,6 +175,7 @@ def get_findings_details(self, company_name, company_guid): self.findings_state, last_data, "findings_details", + "{}_{}".format(FINDINGS_TABLE_NAME, "Checkpoint"), checkpoint_key, str(data_to_post.date()), ) @@ -196,6 +199,13 @@ def get_findings_details(self, company_name, company_guid): c_data["next1"] = self.get_bitsight_data(findings_url, params) next_link = c_data["next1"].get("links").get("next") length_results = len(c_data.get("next1").get("results")) + if length_results == 0: + applogger.info( + 'BitSight: No new findings found for {} on page {} ({})'.format( + company_name, page, risk + ) + ) + break applogger.info( "BitSight: Got {} findings for {} on page {}".format( length_results, company_name, page @@ -222,6 +232,7 @@ def get_findings_details(self, company_name, company_guid): self.findings_state, last_data, "findings_details", + "{}_{}".format(FINDINGS_TABLE_NAME, "Checkpoint"), checkpoint_key, str(data_to_post.date()), ) diff --git a/Solutions/BitSight/Data Connectors/BitSightDataConnector/FindingsSummaryDetails/bitsight_findings_summary.py b/Solutions/BitSight/Data Connectors/BitSightDataConnector/FindingsSummaryDetails/bitsight_findings_summary.py index 10e36351ac0..c226d8a9c80 100644 --- a/Solutions/BitSight/Data Connectors/BitSightDataConnector/FindingsSummaryDetails/bitsight_findings_summary.py +++ b/Solutions/BitSight/Data Connectors/BitSightDataConnector/FindingsSummaryDetails/bitsight_findings_summary.py @@ -83,7 +83,7 @@ def get_all_companies_findings_summary_details(self, company_names, logs_data): """ count_companies = 0 fetching_index = self.get_last_data_index( - company_names, self.checkpoint_obj, self.findings_summary_company_state + company_names, self.checkpoint_obj, self.findings_summary_company_state, table_name=consts.FINDINGS_SUMMARY_TABLE_NAME ) for company_index in range(fetching_index + 1, len(logs_data)): company_name = logs_data[company_index].get("name_s") @@ -103,6 +103,7 @@ def get_all_companies_findings_summary_details(self, company_names, logs_data): self.findings_summary_company_state, company_name, "findings_summary", + "{}_{}".format(consts.FINDINGS_SUMMARY_TABLE_NAME, "Company_Checkpoint"), company_name_flag=True, ) applogger.info( @@ -232,7 +233,7 @@ def create_findings_summary_data( company_guid (str): GUID of the company. """ last_data = self.checkpoint_obj.get_last_data( - self.findings_summary_details_state + self.findings_summary_details_state, table_name=consts.FINDINGS_SUMMARY_TABLE_NAME ) last_checkpoint_company = self.checkpoint_obj.get_endpoint_last_data( last_data, "findings_summary", company_guid @@ -276,6 +277,7 @@ def create_findings_summary_data( self.findings_summary_details_state, last_data, "findings_summary", + "{}_{}".format(consts.FINDINGS_SUMMARY_TABLE_NAME, "Checkpoint"), checkpoint_key, last_checkpoint_company, ) diff --git a/Solutions/BitSight/Data Connectors/BitSightDataConnector/PortFolioCompanies/bitsight_portfolio.py b/Solutions/BitSight/Data Connectors/BitSightDataConnector/PortFolioCompanies/bitsight_portfolio.py index 99f567ecfcc..dc9100ceffd 100644 --- a/Solutions/BitSight/Data Connectors/BitSightDataConnector/PortFolioCompanies/bitsight_portfolio.py +++ b/Solutions/BitSight/Data Connectors/BitSightDataConnector/PortFolioCompanies/bitsight_portfolio.py @@ -9,7 +9,7 @@ COMPANIES_TABLE_NAME, ENDPOINTS, LOGS_STARTS_WITH, - PORTFOLIO_PAGE_SIZE, + PORTFOLIO_PAGE_SIZE ) from ..SharedCode.get_logs_data import get_logs_data from ..SharedCode.logger import applogger diff --git a/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/bitsight_client.py b/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/bitsight_client.py index f684a6e3bb3..baa87858f4f 100644 --- a/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/bitsight_client.py +++ b/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/bitsight_client.py @@ -8,7 +8,7 @@ from ..SharedCode.azure_sentinel import MicrosoftSentinel from .bitsight_exception import BitSightException from .utils import CheckpointManager -from .consts import API_TOKEN, BASE_URL, LOGS_STARTS_WITH +from .consts import API_TOKEN, BASE_URL, LOGS_STARTS_WITH, COMPANY_FETCH_QUERY from .logger import applogger @@ -36,7 +36,6 @@ def check_environment_var_exist(self, environment_var): Returns: bool: True if all environment variables are set, False otherwise. """ - __method_name = inspect.currentframe().f_code.co_name try: applogger.debug( "BitSight: check_environment_var_exist: started checking existence of all custom environment variable" @@ -86,7 +85,7 @@ def generate_auth_token(self): raise BitSightException() def get_last_data_index( - self, company_names, checkpoint_obj: CheckpointManager, company_state + self, company_names, checkpoint_obj: CheckpointManager, company_state, table_name ): """Get the index for fetching last data. @@ -94,12 +93,13 @@ def get_last_data_index( company_names (list): List of company names. checkpoint_obj (CheckpointManager): CheckpointManager object. company_state (str): State of the company. + table_name (str): Table name from which data should be fetched in case of checkpoint file corrupted. Returns: int: Index for fetching last data. """ last_company_name = checkpoint_obj.get_last_data( - company_state, company_name_flag=True + company_state, company_name_flag=True, table_name=table_name, checkpoint_query=COMPANY_FETCH_QUERY ) fetching_index = -1 if last_company_name is not None: diff --git a/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/consts.py b/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/consts.py index 7fbe0af2119..a930a6a0c54 100644 --- a/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/consts.py +++ b/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/consts.py @@ -52,3 +52,20 @@ FINDINGS_FUNC_NAME = "Findings:" COMPANY_DETAILS_FUNC_NAME = "Company Details:" ALERT_GRAPH_STATISTICS_FUNC_NAME = "Alerts-Graph-statistics Details:" +PORTFOLIO_COMPANY_QUERY = """{}_CL + | summarize arg_max(TimeGenerated, *) by guid_g + | sort by name_s asc + | project name_s, guid_g""".format( + COMPANIES_TABLE_NAME + ) +FINDING_DETAILS_QUERY = """{}_CL + | summarize arg_max(TimeGenerated, *) by Key_s + | sort by Key_s asc + | project Key_s, Value_s""" +CHECKPOINT_DATA_QUERY = """{}_CL + | summarize arg_max(TimeGenerated, *) by Key_g + | sort by Key_g asc + | project Key_g, Value_s""" +COMPANY_FETCH_QUERY = """{}_CL + | summarize arg_max(TimeGenerated, *) by Key_s + | project Key_s, Value_s""" diff --git a/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/get_logs_data.py b/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/get_logs_data.py index 768bb5c1abb..221085c5164 100644 --- a/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/get_logs_data.py +++ b/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/get_logs_data.py @@ -9,9 +9,9 @@ AZURE_CLIENT_ID, AZURE_CLIENT_SECRET, AZURE_TENANT_ID, - COMPANIES_TABLE_NAME, LOGS_STARTS_WITH, WORKSPACE_ID, + PORTFOLIO_COMPANY_QUERY ) from .logger import applogger @@ -25,11 +25,11 @@ def parse_table_data(rows): return data_to_return -def get_logs_data(): +def get_logs_data(query=PORTFOLIO_COMPANY_QUERY): """Get data from log analytics workspace. Args: - time_generated (string): Time generated data + query (str): query to be executed. Returns: list: List containing the table data. @@ -41,11 +41,6 @@ def get_logs_data(): tenant_id=AZURE_TENANT_ID, ) client = LogsQueryClient(credential) - query = """{}_CL - | sort by name_s asc - | project name_s, guid_g""".format( - COMPANIES_TABLE_NAME - ) try: response = client.query_workspace( workspace_id=WORKSPACE_ID, query=query, timespan=None @@ -62,7 +57,9 @@ def get_logs_data(): applogger.debug("BitSight: get_logs_data: Data count: {}".format(len(data_to_send))) return data_to_send, True except Exception as error: - if "Failed to resolve table or column expression" in str(error): + table_not_exist = "Failed to resolve table expression" + column_not_exist = "Failed to resolve scalar expression" + if table_not_exist or column_not_exist in str(error): applogger.error( "{}(method={}) : TableName provided is not Created or Data is not Ingested.".format( LOGS_STARTS_WITH, diff --git a/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/utils.py b/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/utils.py index 33193753808..b512f3af54c 100644 --- a/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/utils.py +++ b/Solutions/BitSight/Data Connectors/BitSightDataConnector/SharedCode/utils.py @@ -1,9 +1,12 @@ """This file contains implementation of companies endpoint.""" + import json from ..SharedCode.logger import applogger from ..SharedCode.bitsight_exception import BitSightException from ..SharedCode.state_manager import StateManager -from ..SharedCode.consts import CONN_STRING, LOGS_STARTS_WITH +from ..SharedCode.consts import CONN_STRING, LOGS_STARTS_WITH, CHECKPOINT_DATA_QUERY +from ..SharedCode.azure_sentinel import MicrosoftSentinel +from ..SharedCode.get_logs_data import get_logs_data class CheckpointManager: @@ -12,6 +15,7 @@ class CheckpointManager: def __init__(self) -> None: """Initialize CheckpointManager object.""" self.connection_string = CONN_STRING + self.sentinel_obj = MicrosoftSentinel() def get_state(self, file_path): """Get StateManager instance for a specific file path. @@ -26,32 +30,132 @@ def get_state(self, file_path): connection_string=self.connection_string, file_path=file_path ) - def get_last_data(self, state: StateManager, company_name_flag=False): + def remove_truncated_data(self, data): + """Function to remove the truncated data from the checkpoint file. + + Args: + data (str): data from the checkpoint file. + + Returns: + list: data after removing the truncated data. + """ + truncated_log_prefix = "REMOVE TRUNCATED DATA" + try: + reversed_data = data[::-1] + comma_index = reversed_data.find(",") + updated_data = data[: -comma_index - 1] + "]" + data = json.loads(updated_data) + return data + except json.JSONDecodeError as err: + applogger.error( + "{} {}: JSONDecodeError: Unable to loads JSON data from Sentinel table. Error: {}".format( + LOGS_STARTS_WITH, truncated_log_prefix, err + ) + ) + raise BitSightException() + except Exception as err: + applogger.error( + "{} {}: Exception: Unknown error while loads JSON data from Sentinel table. Error: {}".format( + LOGS_STARTS_WITH, truncated_log_prefix, err + ) + ) + raise BitSightException() + + def set_checkpoint_file_content(self, state: StateManager, content, company_flag): + """Function update checkpoint file after getting corrupted data. + + Args: + state (StateManager): StateManager instance. + content (list): list of objects from Checkpoint Table for corresponding function. + company_flag (bool): Flag indicating if the company name is requested. + + Raises: + BitSightException: Raised for any exception during updating Checkpoint file. + + Returns: + dict: dict object of checkpoint data. + str: company name if company_flag is True + """ + checkpoint_content = {} + log_prefix = "SET CHECKPOINT FILE CONTENT" + try: + for data in content: + val_val = data.get("Value_s") + key_val = data.get("Key_g") or data.get("Key_s") + if not key_val: + continue + if "[" in val_val and "]" in val_val: + try: + checkpoint_content[key_val] = json.loads(val_val) + except json.JSONDecodeError: + applogger.error( + "{} {}: JSONDecodeError: Unable to loads JSON data for {} key from Sentinel table.".format( + LOGS_STARTS_WITH, log_prefix, key_val + ) + ) + continue + elif "[" in val_val: + applogger.info( + "{} {}: Truncated data found for {} key from Sentinel table.".format( + LOGS_STARTS_WITH, log_prefix, key_val + ) + ) + updated_val = self.remove_truncated_data(val_val) + checkpoint_content[key_val] = updated_val + else: + checkpoint_content[key_val] = val_val + state.post(json.dumps(checkpoint_content)) + applogger.info( + "{} {}: Checkpoint file data updated from corrupted data.".format( + LOGS_STARTS_WITH, log_prefix + ) + ) + if company_flag: + return val_val + return checkpoint_content + except Exception as error: + applogger.error("{} {} Error: {}".format(LOGS_STARTS_WITH, log_prefix, error)) + raise BitSightException() + + def get_last_data( + self, state: StateManager, table_name=None, company_name_flag=False, checkpoint_query=CHECKPOINT_DATA_QUERY + ): """Fetch last data from the checkpoint file. Args: state (StateManager): StateManager instance. + table_name (str): Table name for the checkpoint file. company_name_flag (bool): Flag indicating if the company name is requested. + checkpoint_query (str): Query for fetching checkpoint data from table.default query is CHECKPOINT_DATA_QUERY Returns: - None/json: Last data from the checkpoint file. + None/json: Last data from the checkpoint file or None. + str: company name if company_name_flag is True """ try: last_data = state.get() - # applogger.info("Checkpoint Data: {}".format(last_data)) if last_data: if company_name_flag: - return last_data + company_name = json.loads(last_data).get("company_name") + return company_name return json.loads(last_data) else: - applogger.debug( - "{}: GET LAST DATA: Checkpoint is not available.".format( - LOGS_STARTS_WITH - ) - ) + applogger.debug("{} GET LAST DATA: Checkpoint is not available.".format(LOGS_STARTS_WITH)) + return None + except json.decoder.JSONDecodeError as error: + applogger.error("{} GET LAST DATA: JSONDecodeError: {}".format(LOGS_STARTS_WITH, error)) + if company_name_flag: + table = "{}_{}".format(table_name, "Company_Checkpoint") + else: + table = "{}_{}".format(table_name, "Checkpoint") + applogger.debug("{} Fetching Checkpoint data from table: {}".format(LOGS_STARTS_WITH, table)) + logs_data, logs_flag = get_logs_data(checkpoint_query.format(table)) + if logs_flag: + return self.set_checkpoint_file_content(state, logs_data, company_flag=company_name_flag) + else: return None except Exception as err: - applogger.exception("{}: GET LAST DATA: {}".format(LOGS_STARTS_WITH, err)) + applogger.exception("{} GET LAST DATA Error: {}".format(LOGS_STARTS_WITH, err)) raise BitSightException() def save_checkpoint( @@ -59,6 +163,7 @@ def save_checkpoint( state, data, endpoint, + table_name, checkpoint_key=None, value=None, company_name_flag=False, @@ -69,6 +174,7 @@ def save_checkpoint( state (StateManager): StateManager instance. data (dict): Data to be saved in the checkpoint. endpoint (str): Endpoint for which the checkpoint is being saved. + table_name (str): Table name for which the checkpoint is being saved. checkpoint_key (str): Checkpoint key. value (str): Value to be set in the checkpoint key. company_name_flag (bool): Flag indicating if the company name is used. @@ -78,29 +184,89 @@ def save_checkpoint( """ try: if company_name_flag: - state.post(data) + company_data = {"company_name": data} + state.post(json.dumps(company_data)) + self.save_checkpoint_to_sentinel("company_name", data, table_name) + applogger.info( + "{} save_checkpoint: {} -> {} successfully posted data.".format(LOGS_STARTS_WITH, endpoint, data) + ) else: if data is None: data = {} - # data[endpoint] = {} - # elif endpoint not in data: - # data[endpoint] = {} data[checkpoint_key] = value + self.save_checkpoint_to_sentinel(checkpoint_key, value, table_name) state.post(json.dumps(data)) applogger.debug( "BitSight: save_checkpoint: {}: {}: Posted Data: {}".format( endpoint, checkpoint_key, data[checkpoint_key] ) ) - applogger.info( - "BitSight: save_checkpoint: {} -> {} successfully posted data.".format( - endpoint, checkpoint_key - ) - ) except Exception as err: applogger.exception("BitSight: SAVE CHECKPOINT: {}".format(err)) raise BitSightException() + def save_checkpoint_to_sentinel(self, checkpoint_key, checkpoint_value, table_name): + """Save Checkpoint data into sentinel for backup purpose + + Args: + checkpoint_key (str): unique key of the checkpoint value + checkpoint_value (str): checkpoint value of the key to be stored + table_name (str): Name of the Table in which data to be stored + + Raises: + BitSightException: Raised for any exception during checkpoint data ingesting into sentinel. + """ + try: + json_data = self.convert_data_to_dict(checkpoint_key, checkpoint_value) + body = json.dumps(json_data) + ingestion_status_code = self.sentinel_obj.post_data(body, table_name) + if ingestion_status_code >= 200 and ingestion_status_code <= 299: + applogger.info( + "{} Checkpoint value: {} Stored to Sentinel Table Name: {} : Status code: {}".format( + LOGS_STARTS_WITH, + body, + table_name, + ingestion_status_code, + ) + ) + else: + applogger.error( + "{} Error while storing Checkpoint value: {} into Sentinel Table Name: {} : Status code: {}".format( + LOGS_STARTS_WITH, + body, + table_name, + ingestion_status_code, + ) + ) + raise BitSightException() + except BitSightException: + raise BitSightException() + except Exception as err: + applogger.error("{} SAVE CHECKPOINT TO SENTINEL: {}".format(LOGS_STARTS_WITH, err)) + raise BitSightException() + + def convert_data_to_dict(self, key, value): + """Convert Key Value of checkpoint into a dict for storing into MS Sentinel Table + + Args: + key (str): unique key of the value + value (str): value of key + """ + try: + if not key and not value: + applogger.warning( + "{} Both Key and Value must be non empty. Key: {} and Value: {}.".format( + LOGS_STARTS_WITH, key, value + ) + ) + raise ValueError() + return {"Key": key, "Value": value} + except ValueError as err: + applogger.exception( + "{} Error Occurred while creating dict from key value: {}".format(LOGS_STARTS_WITH, err) + ) + raise BitSightException() + def get_endpoint_last_data(self, endpoint_last_data, endpoint, checkpoint_key): """Get last data for a specific endpoint and checkpoint key. diff --git a/Solutions/BitSight/Data Connectors/BitSightDataConnector/azuredeploy_BitSight_API_FunctionApp.json b/Solutions/BitSight/Data Connectors/BitSightDataConnector/azuredeploy_BitSight_API_FunctionApp.json index 15fde711a88..87b83635bc0 100644 --- a/Solutions/BitSight/Data Connectors/BitSightDataConnector/azuredeploy_BitSight_API_FunctionApp.json +++ b/Solutions/BitSight/Data Connectors/BitSightDataConnector/azuredeploy_BitSight_API_FunctionApp.json @@ -246,7 +246,7 @@ "alwaysOn": true, "reserved": true, "siteConfig": { - "linuxFxVersion": "python|3.9" + "linuxFxVersion": "python|3.11" } }, "resources": [ diff --git a/Solutions/BitSight/Data Connectors/BitSightDataConnector/requirements.txt b/Solutions/BitSight/Data Connectors/BitSightDataConnector/requirements.txt index 0c1f577d818..a5307d9a2b4 100644 --- a/Solutions/BitSight/Data Connectors/BitSightDataConnector/requirements.txt +++ b/Solutions/BitSight/Data Connectors/BitSightDataConnector/requirements.txt @@ -7,5 +7,4 @@ requests #Libraries for getting data from LogAnalytics azure-monitor-query azure-identity -cryptography==43.0.1 -azure-storage-file-share==12.15.0 \ No newline at end of file +azure-storage-file-share==12.10.0 \ No newline at end of file