-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathcommon_tools.py
More file actions
133 lines (103 loc) · 3.97 KB
/
common_tools.py
File metadata and controls
133 lines (103 loc) · 3.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import requests
import json
import re
import sys
def remove_ansi_escape_sequences(text: str) -> str:
"""Remove ANSI escape sequences from a string.
Args:
text (str): Input string containing ANSI escape sequences
Returns:
str: String with ANSI escape sequences removed
"""
ansi_escape = re.compile(r'\x1b\[[0-9;]*[a-zA-Z]')
return ansi_escape.sub('', text)
def extract_code_between_triple_quotes(input_str):
"""Extract content between triple quotes from a string.
Args:
input_str (str): Input string containing triple-quoted content
Returns:
str: Extracted content between triple quotes, or empty string if not found
"""
import re
# Find content between triple quotes
pattern = r'"""(.*?)"""'
match = re.search(pattern, input_str, re.DOTALL)
if match:
return match.group(1).strip()
return ''
def extract_list(input_str):
"""Extract a list of elements from a string representation of a Scala List.
Parameters:
input_str -- The input string containing a Scala List representation
Returns:
A Python list containing the extracted elements with cleaned data
"""
# Check if input is empty or None
if not input_str:
return []
# Use regex to match List content
list_pattern = r'List\((.*?)\)$'
list_match = re.search(list_pattern, input_str, re.DOTALL)
if not list_match:
return []
content = list_match.group(1).strip()
# Try to match content within triple quotes
triple_quote_pattern = r'"""(.*?)"""'
triple_quote_matches = re.findall(triple_quote_pattern, content, re.DOTALL)
if triple_quote_matches:
return triple_quote_matches
# If no triple-quoted content found, try to match content within regular quotes
single_quote_pattern = r'"((?:\\.|[^"\\])*?)"'
single_quote_matches = re.findall(single_quote_pattern, content, re.DOTALL)
elements = []
for item in single_quote_matches:
if item.strip():
# Handle escape characters
cleaned = item.replace('\\"', '"').replace('\\\\', '\\')
elements.append(cleaned)
return elements
def extract_quoted_string(input_str: str) -> str:
"""Extract content between quotes from a string.
Args:
input_str (str): Input string containing quoted content
Returns:
str: Extracted content between quotes, or empty string if not found
"""
pattern = r'"(.*?)"'
match = re.search(pattern, input_str)
if match:
return match.group(1)
return ''
def extract_long_value(input_str: str) -> str:
"""Extract Long value from a string representation of a Scala Long variable.
Args:
input_str (str): Input string containing a Scala Long value (e.g. 'val res4: Long = 90194313219L')
Returns:
str: Extracted Long value including the 'L' suffix, or empty string if not found
"""
pattern = r'= (\d+L)'
match = re.search(pattern, input_str)
if match:
return match.group(1)
return ''
def extract_value(input_str: str) -> str:
"""Extract value from a string based on its pattern.
This function automatically selects the appropriate extraction method based on
the input string format:
* If contains 'Long =', uses extract_long_value
* If contains triple quotes, uses extract_code_between_triple_quotes
* If contains single quotes, uses extract_quoted_string
Args:
input_str (str): Input string containing a value to extract
Returns:
str: Extracted value based on the detected pattern
"""
if 'Long =' in input_str:
return extract_long_value(input_str)
elif 'String = """' in input_str:
return extract_code_between_triple_quotes(input_str)
elif 'String = "' in input_str:
return extract_quoted_string(input_str)
else:
return input_str