Skip to content

Commit 7f37219

Browse files
authored
improve mindfa str expr + reorder dimensions in TCP properties: hosts… (#434)
* improve mindfa str expr + reorder dimensions in TCP properties: hosts before paths Signed-off-by: adisos <[email protected]>
1 parent c44fecc commit 7f37219

File tree

132 files changed

+424
-299
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

132 files changed

+424
-299
lines changed

nca/CoreDS/DimensionsManager.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,10 @@ class __DimensionsManager:
2424
def __init__(self):
2525
# TODO: verify alphabet for regex type dimensions, currently using one default alphabet
2626
# currently valid chars are: ['.', '/', '-', 0-9, a-z, A-Z ]
27-
self.default_dfa_alphabet_chars = ".\\w/\\-"
28-
self.default_dfa_alphabet_str = "[" + self.default_dfa_alphabet_chars + "]*"
2927
self.default_interval_domain_tuple = (0, 100000)
3028
self.domain_str_to_dfa_map = dict()
31-
dfa_all_words_default = self._get_dfa_from_alphabet_str(self.default_dfa_alphabet_str)
29+
dfa_all_words_default = self._get_dfa_from_alphabet_str(MinDFA.default_alphabet_regex)
30+
dfa_all_words_path_domain = self._get_dfa_path_domain()
3231
ports_interval = CanonicalIntervalSet.get_interval_set(1, 65535)
3332
all_methods_interval = MethodSet(True)
3433
all_peers_interval = CanonicalIntervalSet.get_interval_set(0, 10000) # assuming max possible peer number
@@ -37,7 +36,7 @@ def __init__(self):
3736
self.dim_dict["dst_ports"] = (DimensionsManager.DimensionType.IntervalSet, ports_interval)
3837
self.dim_dict["methods"] = (DimensionsManager.DimensionType.IntervalSet, all_methods_interval)
3938
self.dim_dict["peers"] = (DimensionsManager.DimensionType.IntervalSet, all_peers_interval)
40-
self.dim_dict["paths"] = (DimensionsManager.DimensionType.DFA, dfa_all_words_default)
39+
self.dim_dict["paths"] = (DimensionsManager.DimensionType.DFA, dfa_all_words_path_domain)
4140
self.dim_dict["hosts"] = (DimensionsManager.DimensionType.DFA, dfa_all_words_default)
4241

4342
icmp_type_interval = CanonicalIntervalSet.get_interval_set(0, 254)
@@ -58,6 +57,17 @@ def _get_dfa_from_alphabet_str(self, alphabet_str):
5857
self.domain_str_to_dfa_map[alphabet_str] = new_dfa
5958
return new_dfa
6059

60+
@staticmethod
61+
def _get_dfa_path_domain():
62+
"""
63+
get a dfa that represents all valid words in the paths domain
64+
:rtype MinDFA
65+
"""
66+
regex_str = "/" + MinDFA.default_alphabet_regex
67+
new_dfa = MinDFA.dfa_from_regex(regex_str)
68+
new_dfa.is_all_words = MinDFA.Ternary.TRUE
69+
return new_dfa
70+
6171
instance = None
6272

6373
def __init__(self):
@@ -95,7 +105,7 @@ def set_domain(self, dim_name, dim_type, interval_tuple=None, alphabet_str=None)
95105
interval = interval_tuple if interval_tuple is not None else self.default_interval_domain_tuple
96106
domain = CanonicalIntervalSet.get_interval_set(interval[0], interval[1])
97107
else:
98-
alphabet = alphabet_str if alphabet_str is not None else self.default_dfa_alphabet_str
108+
alphabet = alphabet_str if alphabet_str is not None else MinDFA.default_alphabet_regex
99109
domain = self._get_dfa_from_alphabet_str(alphabet)
100110
self.dim_dict[dim_name] = (dim_type, domain)
101111

nca/CoreDS/MinDFA.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
# SPDX-License-Identifier: Apache2.0
44
#
55
from greenery import fsm, parse
6-
from greenery.rxelems import from_fsm
76
from functools import lru_cache
87

98

109
# TODO: consider adding abstract base class for MinDFA and CanonicalIntervalSet , with common api
1110

11+
1212
class MinDFA:
1313
"""
1414
MinDFA is a wrapper class for greenery.fsm , to support the api required for dimensions in hypercube-set
@@ -42,6 +42,8 @@ class MinDFA:
4242
(no mix of MinDFA objects from different dimensions context)
4343
4444
"""
45+
default_dfa_alphabet_chars = ".\\w/\\-"
46+
default_alphabet_regex = "[.\\w/\\-]*"
4547

4648
class Ternary:
4749
FALSE = 0
@@ -59,10 +61,14 @@ def __init__(self, alphabet, states, initial, finals, map):
5961
necessary)
6062
complement_dfa: MinDFA of the complement dfa of self, e.g: relevant when doing subtraction from 'all'.
6163
for performance improvement (avoid computation of complement if could use this member instead).
64+
65+
regex_expr: str representation of regex expressions (possibly) with operations (subtract/intersect/union),
66+
from which the MinDFA object was constructed
6267
"""
6368
self.fsm = fsm.Fsm(initial, finals, alphabet, states, map)
6469
self.is_all_words = MinDFA.Ternary.UNKNOWN
6570
self.complement_dfa = None
71+
self.regex_expr = ''
6672

6773
def __contains__(self, string):
6874
return string in self.fsm
@@ -109,6 +115,7 @@ def dfa_from_regex(s, alphabet=None):
109115
# TODO: currently assuming input str as regex only has '*' operator for infinity
110116
if '*' not in s:
111117
res.is_all_words = MinDFA.Ternary.FALSE
118+
res.regex_expr = s.replace(MinDFA.default_alphabet_regex, "*")
112119
return res
113120

114121
@staticmethod
@@ -120,6 +127,7 @@ def dfa_all_words(alphabet):
120127
"""
121128
res = MinDFA.dfa_from_regex(alphabet)
122129
res.is_all_words = MinDFA.Ternary.TRUE
130+
res.regex_expr = '*'
123131
return res
124132

125133
# TODO: this function may not be necessary, if keeping the current __eq__ override
@@ -175,15 +183,18 @@ def __str__(self):
175183
"""
176184
str representation of the language accepted by this DFA:
177185
- option 1: if language has finite number of words -> return string with all accepted words.
178-
- option 2 (costly): convert fsm to regex with greenery
186+
- option 2 : a string of regex expressions with accumulated operations, from which the object was constructed.
179187
:rtype: str
180188
"""
189+
181190
if self.has_finite_len():
182191
return self._get_strings_set_str()
183192
if self.is_all_words == MinDFA.Ternary.TRUE:
184193
return "*"
185-
# TODO: consider performance implications of this conversion from MinDFA to regex
186-
return str(from_fsm(self.fsm))
194+
return self.regex_expr
195+
# in comment below: alternative based on conversion from MinDFA to regex
196+
# not readable regex result + had performance implications of this conversion from MinDFA to regex
197+
# return str(from_fsm(self.fsm))
187198

188199
def get_fsm_str(self):
189200
"""
@@ -219,6 +230,11 @@ def __or__(self, other):
219230
res = MinDFA.dfa_from_fsm(fsm_res)
220231
if res.has_finite_len():
221232
res.is_all_words = MinDFA.Ternary.FALSE
233+
# update regex_expr of the result object
234+
if self.regex_expr == other.regex_expr:
235+
res.regex_expr = self.regex_expr
236+
else:
237+
res.regex_expr = f'({self.regex_expr})|({other.regex_expr})'
222238
return res
223239

224240
@lru_cache(maxsize=500)
@@ -231,18 +247,30 @@ def __and__(self, other):
231247
res = MinDFA.dfa_from_fsm(fsm_res)
232248
if self.is_all_words == MinDFA.Ternary.FALSE or other.is_all_words == MinDFA.Ternary.FALSE:
233249
res.is_all_words = MinDFA.Ternary.FALSE
250+
# update regex_expr of the result object
251+
if self.regex_expr == other.regex_expr:
252+
res.regex_expr = self.regex_expr
253+
else:
254+
res.regex_expr = f'({self.regex_expr})&({other.regex_expr})'
234255
return res
235256

236257
@lru_cache(maxsize=500)
237258
def __sub__(self, other):
259+
if self.is_all_words == MinDFA.Ternary.TRUE and other.complement_dfa is not None:
260+
return other.complement_dfa
261+
238262
fsm_res = self.fsm - other.fsm
239263
res = MinDFA.dfa_from_fsm(fsm_res)
240-
if other.is_all_words == MinDFA.Ternary.TRUE:
264+
# update regex_expr of the result object
265+
res.regex_expr = f'({self.regex_expr})-({other.regex_expr})'
266+
267+
if other.is_all_words == MinDFA.Ternary.TRUE: # res becomes empty
241268
res.is_all_words = MinDFA.Ternary.FALSE
242269
elif other:
243-
res.is_all_words = MinDFA.Ternary.FALSE
270+
res.is_all_words = MinDFA.Ternary.FALSE # res cannot be all words
244271
if self.is_all_words == MinDFA.Ternary.TRUE and not other:
245272
res.is_all_words = MinDFA.Ternary.TRUE
273+
246274
if self.is_all_words == MinDFA.Ternary.TRUE:
247275
res.complement_dfa = other
248276
other.complement_dfa = res

nca/CoreDS/TcpLikeProperties.py

Lines changed: 41 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class TcpLikeProperties(CanonicalHyperCubeSet):
3636
(2) calico: +ve and -ve named ports, no src named ports, and no use of operators between these objects.
3737
"""
3838

39-
dimensions_list = ["src_ports", "dst_ports", "methods", "paths", "hosts", "peers"]
39+
dimensions_list = ["src_ports", "dst_ports", "methods", "hosts", "paths", "peers"]
4040

4141
# TODO: change constructor defaults? either all arguments in "allow all" by default, or "empty" by default
4242
def __init__(self, source_ports=PortSet(), dest_ports=PortSet(), methods=MethodSet(True), paths=None, hosts=None,
@@ -59,37 +59,26 @@ def __init__(self, source_ports=PortSet(), dest_ports=PortSet(), methods=MethodS
5959
self.base_peer_set = base_peer_set if base_peer_set else PeerSet()
6060

6161
# create the cube from input arguments
62-
cube = []
63-
active_dims = []
64-
if not source_ports.is_all():
65-
cube.append(source_ports.port_set)
66-
active_dims.append("src_ports")
67-
if not dest_ports.is_all():
68-
cube.append(dest_ports.port_set)
69-
active_dims.append("dst_ports")
70-
if not methods.is_whole_range():
71-
cube.append(methods)
72-
active_dims.append("methods")
73-
if paths is not None:
74-
cube.append(paths)
75-
active_dims.append("paths")
76-
if hosts is not None:
77-
cube.append(hosts)
78-
active_dims.append("hosts")
79-
if peers is not None:
80-
cube.append(peers)
81-
active_dims.append("peers")
62+
# create a dict object that holds the values required to build the cube
63+
dims_to_values = {"src_ports": {"value": source_ports.port_set,
64+
"is_all": source_ports.is_all()},
65+
"dst_ports": {"value": dest_ports.port_set,
66+
"is_all": dest_ports.is_all()},
67+
"methods": {"value": methods,
68+
"is_all": methods.is_whole_range()},
69+
"hosts": {"value": hosts,
70+
"is_all": hosts is None},
71+
"paths": {"value": paths,
72+
"is_all": paths is None},
73+
"peers": {"value": peers,
74+
"is_all": peers is None}}
75+
76+
cube, active_dims, has_empty_dim_value = self._get_cube_and_active_dims_from_input_values(dims_to_values)
8277

8378
if not active_dims:
8479
self.set_all()
85-
else:
86-
has_empty_dim_value = False
87-
for dim_val in cube:
88-
if not dim_val:
89-
has_empty_dim_value = True
90-
break
91-
if not has_empty_dim_value:
92-
self.add_cube(cube, active_dims)
80+
elif not has_empty_dim_value:
81+
self.add_cube(cube, active_dims)
9382

9483
# assuming named ports are only in dest, not src
9584
all_ports = PortSet.all_ports_interval.copy()
@@ -99,6 +88,27 @@ def __init__(self, source_ports=PortSet(), dest_ports=PortSet(), methods=MethodS
9988
# self.excluded_named_ports[port_name] = all_ports - source_ports.port_set
10089
self.excluded_named_ports[port_name] = all_ports
10190

91+
@staticmethod
92+
def _get_cube_and_active_dims_from_input_values(dims_to_values):
93+
"""
94+
Given initial values, get the matching cube and its active dimensions
95+
:param dict dims_to_values: map from dimension name to values properties
96+
:rtype tuple(list, list, bool)
97+
:return: tuple with: (1) cube values (2) active dimensions (3) bool indication if some dimension is empty
98+
"""
99+
cube = []
100+
active_dims = []
101+
has_empty_dim_value = False
102+
# add values to cube by required order of dimensions
103+
for dim in TcpLikeProperties.dimensions_list:
104+
dim_val = dims_to_values[dim]["value"]
105+
add_to_cube = not dims_to_values[dim]["is_all"]
106+
if add_to_cube:
107+
cube.append(dim_val)
108+
active_dims.append(dim)
109+
has_empty_dim_value |= not dim_val
110+
return cube, active_dims, has_empty_dim_value
111+
102112
def __bool__(self):
103113
return super().__bool__() or bool(self.named_ports)
104114

@@ -167,8 +177,8 @@ def get_properties_obj(self):
167177
def __eq__(self, other):
168178
if isinstance(other, TcpLikeProperties):
169179
assert self.base_peer_set == other.base_peer_set
170-
res = super().__eq__(other) and self.named_ports == other.named_ports and \
171-
self.excluded_named_ports == other.excluded_named_ports
180+
res = super().__eq__(other) and self.named_ports == other.named_ports \
181+
and self.excluded_named_ports == other.excluded_named_ports
172182
return res
173183
return False
174184

nca/Parsers/GenericIngressLikeYamlParser.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,11 @@ def parse_regex_host_value(self, regex_value, rule):
4040
if regex_value is None:
4141
return None # to represent that all is allowed, and this dimension can be inactive in the generated cube
4242

43+
if regex_value == '*':
44+
return DimensionsManager().get_dimension_domain_by_name('hosts')
45+
4346
allowed_chars = "[\\w]"
44-
allowed_chars_with_star_regex = "[*" + DimensionsManager().default_dfa_alphabet_chars + "]*"
47+
allowed_chars_with_star_regex = "[*" + MinDFA.default_dfa_alphabet_chars + "]*"
4548
if not re.fullmatch(allowed_chars_with_star_regex, regex_value):
4649
self.syntax_error(f'Illegal characters in host {regex_value}', rule)
4750

@@ -148,3 +151,18 @@ def _make_rules_from_conns(self, tcp_conns):
148151
for peer_set, conns in peers_to_conns.items():
149152
res.append(IngressPolicyRule(peer_set, conns))
150153
return res
154+
155+
@staticmethod
156+
def get_path_prefix_dfa(path_string):
157+
"""
158+
Given a prefix path, get its MinDFA that accepts all relevant paths
159+
:param str path_string: a path string from policy, specified as Prefix
160+
:rtype MinDFA
161+
"""
162+
if path_string == '/':
163+
return DimensionsManager().get_dimension_domain_by_name('paths')
164+
allowed_chars = "[" + MinDFA.default_dfa_alphabet_chars + "]"
165+
if path_string.endswith('/'):
166+
path_string = path_string[:-1]
167+
path_regex = f'{path_string}(/{allowed_chars}*)?'
168+
return MinDFA.dfa_from_regex(path_regex)

nca/Parsers/IngressPolicyYamlParser.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def __init__(self, policy, peer_container, ingress_file_name=''):
3535
def validate_path_value(self, path_value, path):
3636
if path_value[0] != '/':
3737
self.syntax_error(f'Illegal path {path_value} in the rule path', path)
38-
pattern = "[" + DimensionsManager().default_dfa_alphabet_chars + "]*"
38+
pattern = "[" + MinDFA.default_dfa_alphabet_chars + "]*"
3939
if not re.fullmatch(pattern, path_value):
4040
self.syntax_error(f'Illegal characters in path {path_value} in {path}')
4141

@@ -149,16 +149,13 @@ def segregate_longest_paths_and_make_dfa(parsed_paths):
149149
"""
150150
# first, convert path strings to dfas
151151
parsed_paths_with_dfa = []
152-
allowed_chars = "[" + DimensionsManager().default_dfa_alphabet_chars + "]"
153152
for path_string, path_type, peers, ports in parsed_paths:
154153
if path_type == 'Exact':
155-
path_regex = path_string
156-
else:
157-
if path_string:
158-
path_regex = path_string + '|' + path_string + '/' + allowed_chars + '*'
159-
else:
160-
path_regex = '/' + allowed_chars + '*'
161-
parsed_paths_with_dfa.append((path_string, MinDFA.dfa_from_regex(path_regex), path_type, peers, ports))
154+
path_dfa = MinDFA.dfa_from_regex(path_string)
155+
else: # Prefix type
156+
path_string = '/' if not path_string else path_string
157+
path_dfa = GenericIngressLikeYamlParser.get_path_prefix_dfa(path_string)
158+
parsed_paths_with_dfa.append((path_string, path_dfa, path_type, peers, ports))
162159

163160
# next, avoid shorter sub-paths to extend to longer ones, using dfa operations
164161
res = []

nca/Parsers/IstioPolicyYamlParser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,8 @@ def _parse_str_value(self, str_val_input, dim_name, operation):
269269
:param dict operation: the operation object being parsed
270270
:return: str: the result regex/str after conversion
271271
"""
272-
allowed_chars = "[" + DimensionsManager().default_dfa_alphabet_chars + "]"
273-
allowed_chars_with_star_regex = "[*" + DimensionsManager().default_dfa_alphabet_chars + "]*"
272+
allowed_chars = "[" + MinDFA.default_dfa_alphabet_chars + "]"
273+
allowed_chars_with_star_regex = "[*" + MinDFA.default_dfa_alphabet_chars + "]*"
274274
if not re.fullmatch(allowed_chars_with_star_regex, str_val_input):
275275
self.syntax_error(f'Illegal characters in {dim_name} {str_val_input} in {operation}')
276276

nca/Parsers/IstioTrafficResourcesYamlParser.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
#
55

66
from functools import reduce
7-
from nca.CoreDS.DimensionsManager import DimensionsManager
87
from nca.CoreDS.MinDFA import MinDFA
98
from nca.CoreDS.Peer import PeerSet
109
from nca.CoreDS.MethodSet import MethodSet
@@ -223,9 +222,11 @@ def parse_istio_regex_string(self, resource, attr_name, vs_name):
223222
if items[0][0] == 'exact':
224223
pass
225224
elif items[0][0] == 'prefix':
226-
regex += DimensionsManager().default_dfa_alphabet_str
225+
if attr_name == 'uri':
226+
return self.get_path_prefix_dfa(regex)
227+
regex += MinDFA.default_alphabet_regex
227228
elif items[0][0] == 'regex':
228-
regex.replace('.', DimensionsManager().default_dfa_alphabet_chars)
229+
regex.replace('.', MinDFA.default_dfa_alphabet_chars)
229230
if attr_name == 'uri' and resource.get('ignoreUriCase') == 'True':
230231
# https://github.com/google/re2/wiki/Syntax#:~:text=group%3B%20non%2Dcapturing-,(%3Fflags%3Are),-set%20flags%20during
231232
regex = '(?i:' + regex + ')'

0 commit comments

Comments
 (0)