Skip to content

Commit 1a086f2

Browse files
gorskysdSean Gorsky
andauthored
Bugfix/prod 442 missing rollover logs results in unknown error message (#17)
* Substitued some ValueErrors for SyncParserExceptions so errors will pass through to user * incremented version * fixed new exception case * make tidy * align SyncParserException with SyncException * Kept version at 0.1.5 * More cleanup on exceptions * Small PR changes Co-authored-by: Sean Gorsky <[email protected]>
1 parent bb82e49 commit 1a086f2

File tree

4 files changed

+108
-84
lines changed

4 files changed

+108
-84
lines changed

spark_log_parser/eventlog.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import pandas as pd
66

7+
from spark_log_parser.parsing_models.exceptions import LogSubmissionException
8+
79

810
class EventLogBuilder:
911
def __init__(
@@ -27,7 +29,9 @@ def _validate_work_dir(self, work_dir: Path | str) -> Path:
2729
def build(self) -> tuple[Path, bool]:
2830

2931
if not self.event_log_paths:
30-
raise ValueError("No files found")
32+
raise LogSubmissionException(
33+
error_message="No Spark eventlogs were found in submission"
34+
)
3135

3236
self.event_log, self.parsed = self._get_event_log(self.event_log_paths)
3337

@@ -62,18 +66,20 @@ def _get_event_log(self, paths: list[Path]) -> tuple[Path, bool]:
6266
continue
6367

6468
if len(log_files) > 1 and parsed:
65-
raise ValueError("A parsed log file was submitted with other log files")
69+
raise LogSubmissionException("A parsed log file was submitted with other log files")
6670

6771
if rollover_dat:
6872
if len(log_files) > len(rollover_dat):
69-
raise ValueError(
70-
"Rollover logs were detected, but not all files had rollover properties"
73+
raise LogSubmissionException(
74+
error_message="Rollover logs were detected, but not all files had rollover properties"
7175
)
7276

7377
return self._concat(rollover_dat), False
7478

7579
if len(log_files) > 1:
76-
raise ValueError("Multiple files detected without log rollover properties")
80+
raise LogSubmissionException(
81+
error_message="Multiple files detected without log rollover properties"
82+
)
7783

7884
return log_files[0], parsed
7985

@@ -85,15 +91,17 @@ def _concat(self, rollover_dat: list[tuple[str, str, str]]) -> Path:
8591
)
8692

8793
if not len(rollover_df.context_id.unique()) == 1:
88-
raise ValueError("Not all rollover log files have the same Spark context ID")
94+
raise LogSubmissionException(
95+
error_message="Not all rollover log files have the same Spark context ID"
96+
)
8997

9098
diffs = rollover_df.rollover_index.diff()
9199

92100
if any(diffs > 1) or rollover_df.rollover_index[0] > 0:
93-
raise ValueError("Rollover log file appears to be missing")
101+
raise LogSubmissionException(error_message="One or more rollover logs is missing")
94102

95103
if any(diffs < 1):
96-
raise ValueError("Duplicate rollover log file detected")
104+
raise LogSubmissionException(error_message="Duplicate rollover log file detected")
97105

98106
event_log = Path(tempfile.mkstemp(suffix="-concatenated.json", dir=str(self.work_dir))[1])
99107
with open(event_log, "w") as fobj:

spark_log_parser/parsing_models/errors.py

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,52 +2,58 @@
22

33
logger = logging.getLogger("ParserExceptionLogger")
44

5-
class ParserErrorMessages():
65

7-
SPARK_CONFIG_GENERIC_MESSAGE = (
6+
class ParserErrorMessages:
87

9-
"Some configurations were detected that are not yet supported by the Sync Autotuner. " +
10-
"If you would like to try again, please make the following configuration changes and " +
11-
"rerun your application:"
8+
SPARK_CONFIG_GENERIC_MESSAGE = (
9+
"Some configurations were detected that are not yet supported by the Sync Autotuner. "
10+
+ "If you would like to try again, please make the following configuration changes and "
11+
+ "rerun your application:"
1212
)
1313

14-
MISSING_EVENT_JOB_START = (
15-
"Event SparkListenerJobStart was missing for the following jobs: ")
16-
MISSING_EVENT_JOB_END = (
17-
"Event SparkListenerJobEnd was missing for the following jobs: ")
14+
MISSING_EVENT_JOB_START = "Event SparkListenerJobStart was missing for the following jobs: "
15+
MISSING_EVENT_JOB_END = "Event SparkListenerJobEnd was missing for the following jobs: "
1816

1917
MISSING_EVENT_STAGE_SUBMIT = (
20-
"Event SparkListenerStageSubmitted was missing for the following stages: ")
18+
"Event SparkListenerStageSubmitted was missing for the following stages: "
19+
)
2120
MISSING_EVENT_STAGE_COMPLETE = (
22-
"Event SparkListenerStageCompleted was missing for the following stages: ")
21+
"Event SparkListenerStageCompleted was missing for the following stages: "
22+
)
2323

2424
MISSING_EVENT_GENERIC_MESSAGE = (
25-
"Some Spark Listener Event data is missing from the eventlog related to: ")
25+
"Some Spark Listener Event data is missing from the eventlog related to: "
26+
)
2627

2728
MISSING_EVENT_EXPLANATION = (
28-
"This Event data is necessary for the Sync Autotuner. " +
29-
"Events may be missing for a number of reasons including " +
30-
"-- (1) The application did not complete successfully " +
31-
"-- (2) If these are rollover logs, there may be one or more missing logs " +
32-
"-- (3) Spark Listener communication failures. " +
33-
"There are some steps that can help mitigate these issues " +
34-
"-- (1) Ensure the SparkContext closes correctly at the end of your application, e.g. " +
35-
"by using sc.stop() " +
36-
"-- (2) If you submitted a rollover log set, ensure that all rollover logs for the " +
37-
"application were submitted " +
38-
"-- (3) Resubmit the next log produced with this application."
29+
"This Event data is necessary for the Sync Autotuner. "
30+
+ "Events may be missing for a number of reasons including "
31+
+ "-- (1) The application did not complete successfully "
32+
+ "-- (2) If these are rollover logs, there may be one or more missing logs "
33+
+ "-- (3) Spark Listener communication failures. "
34+
+ "There are some steps that can help mitigate these issues "
35+
+ "-- (1) Ensure the SparkContext closes correctly at the end of your application, e.g. "
36+
+ "by using sc.stop() "
37+
+ "-- (2) If you submitted a rollover log set, ensure that all rollover logs for the "
38+
+ "application were submitted "
39+
+ "-- (3) Resubmit the next log produced with this application."
3940
)
4041

4142
SUPPORT_MESSAGE = (
42-
"If you have questions or would like assistance in resolving the issue " +
43-
"please contact our support at [email protected].")
43+
"If you have questions or would like assistance in resolving the issue "
44+
+ "please contact our support at [email protected]."
45+
)
4446

45-
class ParserErrorTypes():
47+
48+
class ParserErrorTypes:
4649

4750
SPARK_CONFIG_ERROR = "Invalid Spark Configuration Error"
48-
MISSING_EVENT_ERROR = 'Event missing from Spark eventlog'
51+
MISSING_EVENT_ERROR = "Event missing from Spark eventlog"
52+
LOG_SUBMISSION_ERROR = "Invalid log submission"
53+
4954

50-
class ParserErrorCodes():
55+
class ParserErrorCodes:
5156

5257
SPARK_CONFIG_ERROR = 2001
53-
SPARK_EVENT_ERROR = 2002
58+
SPARK_EVENT_ERROR = 2002
59+
LOG_SUBMISSION_ERROR = 2003
Lines changed: 42 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
import json
22
import logging
33

4-
from .errors import ParserErrorMessages, ParserErrorTypes, ParserErrorCodes
4+
from .errors import ParserErrorCodes, ParserErrorMessages, ParserErrorTypes
55

66
logger = logging.getLogger("ParserExceptionLogger")
77

8+
89
class SyncParserException(Exception):
910
def __init__(
10-
self,
11-
error_type: str = None,
12-
error_message: str = None,
13-
status_code: int = None,
14-
exception: Exception = None ):
11+
self,
12+
error_type: str = None,
13+
error_message: str = None,
14+
status_code: int = None,
15+
):
1516

1617
super().__init__(error_message)
1718

@@ -20,84 +21,88 @@ def __init__(
2021
self.status_code = status_code
2122

2223
# 2022-03-25 RW: Format the information in the way it is expected by the backend code
23-
self.error = {
24-
"error" : error_type,
25-
"message" : error_message
26-
}
27-
28-
logger.error(error_message)
29-
if exception:
30-
logger.exception(exception)
31-
24+
self.error = {"error": error_type, "message": error_message}
3225

3326
def get_ui_return_value(self) -> dict:
3427
"""
3528
A possible rendering of one set of return information as dict
3629
"""
37-
return {
38-
"error": self.error_type,
39-
"message": self.error_message
40-
}
30+
return {"error": self.error_type, "message": self.error_message}
4131

4232
def get_ui_return_value_as_json(self) -> json:
4333
"""
4434
A possible rendering of one set of return information as JSON
4535
"""
46-
return json.dumps( {
47-
"error" : self.error_type,
48-
"message" : self.error_message
49-
} )
36+
return json.dumps({"error": self.error_type, "message": self.error_message})
5037

51-
class ConfigurationException(SyncParserException):
5238

39+
class ConfigurationException(SyncParserException):
5340
def __init__(self, config_recs: str):
5441

55-
5642
error_message = ParserErrorMessages.SPARK_CONFIG_GENERIC_MESSAGE
5743

5844
for idx, c in enumerate(config_recs):
59-
count = idx+1
60-
error_message += f' ({count}) {c}'
45+
count = idx + 1
46+
error_message += f" ({count}) {c}"
6147

62-
error_message += f'. {ParserErrorMessages.SUPPORT_MESSAGE}'
48+
error_message += f". {ParserErrorMessages.SUPPORT_MESSAGE}"
6349

6450
super().__init__(
6551
error_type=ParserErrorTypes.SPARK_CONFIG_ERROR,
6652
error_message=error_message,
6753
status_code=ParserErrorCodes.SPARK_CONFIG_ERROR,
68-
exception=None)
54+
)
55+
6956

7057
class LazyEventValidationException(SyncParserException):
7158
"""
7259
This Exception is for missing event data that doesn't immediately kill the parser.
7360
All of the related missing events can be gathered and identified in the error message.
7461
"""
62+
7563
def __init__(self, error_message: str):
7664

7765
error_message += (
78-
f"{ParserErrorMessages.MISSING_EVENT_EXPLANATION} " +
79-
f"{ParserErrorMessages.SUPPORT_MESSAGE}")
66+
f"{ParserErrorMessages.MISSING_EVENT_EXPLANATION} "
67+
+ f"{ParserErrorMessages.SUPPORT_MESSAGE}"
68+
)
8069

8170
super().__init__(
8271
error_type=ParserErrorTypes.MISSING_EVENT_ERROR,
8372
error_message=error_message,
8473
status_code=ParserErrorCodes.SPARK_EVENT_ERROR,
85-
exception=None)
74+
)
8675

8776

8877
class UrgentEventValidationException(SyncParserException):
8978
"""
9079
This Exception is for missing event data that stops the parser dead in its tracks.
9180
"""
92-
def __init__(self, missing_event: str = ''):
81+
82+
def __init__(self, missing_event: str = ""):
9383

9484
error_message = (
95-
f"{ParserErrorMessages.MISSING_EVENT_GENERIC_MESSAGE} '{missing_event}'. " +
96-
f"{ParserErrorMessages.MISSING_EVENT_EXPLANATION} " +
97-
f"{ParserErrorMessages.SUPPORT_MESSAGE}")
85+
f"{ParserErrorMessages.MISSING_EVENT_GENERIC_MESSAGE} '{missing_event}'. "
86+
+ f"{ParserErrorMessages.MISSING_EVENT_EXPLANATION} "
87+
+ f"{ParserErrorMessages.SUPPORT_MESSAGE}"
88+
)
9889

9990
super().__init__(
10091
error_type=ParserErrorTypes.MISSING_EVENT_ERROR,
10192
error_message=error_message,
10293
status_code=ParserErrorCodes.SPARK_EVENT_ERROR,
103-
exception=None)
94+
)
95+
96+
97+
class LogSubmissionException(SyncParserException, ValueError):
98+
"""
99+
This Exception is for malformed log submission
100+
"""
101+
102+
def __init__(self, error_message: str):
103+
104+
super().__init__(
105+
error_type=ParserErrorTypes.LOG_SUBMISSION_ERROR,
106+
error_message=error_message,
107+
status_code=ParserErrorCodes.LOG_SUBMISSION_ERROR,
108+
)

tests/test_bad_eventlog.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44
from zipfile import ZipFile
55

66
from spark_log_parser import eventlog, extractor
7+
from spark_log_parser.parsing_models.exceptions import LogSubmissionException
78

89

910
class BadEventLog(unittest.TestCase):
10-
def check_value_error(self, event_log_path, msg):
11+
def check_sync_exceptions(self, event_log_path, msg):
1112

1213
with tempfile.TemporaryDirectory() as temp_dir:
13-
with self.assertRaises(ValueError) as cm:
14+
with self.assertRaises(LogSubmissionException) as cm:
1415

1516
event_log_paths = extractor.Extractor(event_log_path.as_uri(), temp_dir).extract()
1617
eventlog.EventLogBuilder(event_log_paths, temp_dir).build()
@@ -19,31 +20,33 @@ def check_value_error(self, event_log_path, msg):
1920

2021
def test_multiple_context_ids(self):
2122
event_log = Path("tests", "logs", "bad", "non-unique-context-id.zip").resolve()
22-
self.check_value_error(
23+
self.check_sync_exceptions(
2324
event_log, "Not all rollover log files have the same Spark context ID"
2425
)
2526

2627
def test_missing_dbc_event(self):
2728
event_log = Path("tests", "logs", "bad", "missing-dbc-event.zip").resolve()
28-
self.check_value_error(
29+
self.check_sync_exceptions(
2930
event_log, "Rollover logs were detected, but not all files had rollover properties"
3031
)
3132

3233
def test_duplicate_log_part(self):
3334
event_log = Path("tests", "logs", "bad", "duplicate-part.tgz").resolve()
34-
self.check_value_error(event_log, "Duplicate rollover log file detected")
35+
self.check_sync_exceptions(event_log, "Duplicate rollover log file detected")
3536

3637
def test_missing_log_part(self):
3738
event_log = Path("tests", "logs", "bad", "missing-part.zip").resolve()
38-
self.check_value_error(event_log, "Rollover log file appears to be missing")
39+
self.check_sync_exceptions(event_log, "One or more rollover logs is missing")
3940

4041
def test_missing_first_part(self):
4142
event_log = Path("tests", "logs", "bad", "missing-first-part.zip").resolve()
42-
self.check_value_error(event_log, "Rollover log file appears to be missing")
43+
self.check_sync_exceptions(event_log, "One or more rollover logs is missing")
4344

4445
def test_mixed_parsed(self):
4546
event_log = Path("tests", "logs", "bad", "mixed_parsed.zip").resolve()
46-
self.check_value_error(event_log, "A parsed log file was submitted with other log files")
47+
self.check_sync_exceptions(
48+
event_log, "A parsed log file was submitted with other log files"
49+
)
4750

4851
def test_only_non_first_part(self):
4952
with tempfile.TemporaryDirectory() as temp_dir:
@@ -52,8 +55,10 @@ def test_only_non_first_part(self):
5255
[zinfo for zinfo in zfile.infolist() if not zinfo.is_dir()][0], temp_dir
5356
)
5457

55-
self.check_value_error(Path(temp_dir), "Rollover log file appears to be missing")
58+
self.check_sync_exceptions(Path(temp_dir), "One or more rollover logs is missing")
5659

5760
def test_empty_log_dir(self):
5861
with tempfile.TemporaryDirectory() as temp_dir:
59-
self.check_value_error(Path(temp_dir), "No files found")
62+
self.check_sync_exceptions(
63+
Path(temp_dir), "No Spark eventlogs were found in submission"
64+
)

0 commit comments

Comments
 (0)