Skip to content

Commit 392cfac

Browse files
authored
feat: ✨ added Rest.li decoder (#19)
1 parent c188c99 commit 392cfac

File tree

4 files changed

+261
-2
lines changed

4 files changed

+261
-2
lines changed
Lines changed: 217 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,218 @@
1-
def reduced_decode(value):
2-
# TODO
1+
from linkedin_api.common.errors import InvalidSerializedRestliError
2+
from urllib.parse import unquote
3+
from typing import Dict, List, Any, Union
4+
import re
5+
6+
LIST_PREFIX = "List("
7+
LIST_SUFFIX = ")"
8+
LIST_ITEM_SEP = ","
9+
OBJ_PREFIX = "("
10+
OBJ_SUFFIX = ")"
11+
OBJ_KEY_VAL_SEP = ":"
12+
OBJ_KEY_VAL_PAIR_SEP = ","
13+
LEFT_BRACKET = "("
14+
RIGHT_BRACKET = ")"
15+
16+
# These special characters are URL-encoded in reduced encoded primitives: "(", ")", ",", ":", "'"
17+
reduced_decode_special_chars_pattern = r"%28|%29|%2C|%3A|%27"
18+
19+
20+
def decode(value: str) -> Union[Dict[str, Any], List[Any], str]:
21+
"""
22+
Entry point to decode a Rest.li URL-encoded value. Note that the Rest.li format is lossy since all
23+
values come out as strings. For example:
24+
[1,2,3] -encode-> "List(1,2,3)" -decode-> ["1","2","3"]
25+
26+
Args:
27+
value (str): The URL-encoded value to decode
28+
29+
Returns:
30+
Union[Dict[str,Any], List[Any], str]: The decoded value
31+
"""
32+
return __internal_decode(value, False)
33+
34+
35+
def reduced_decode(value: str) -> Union[Dict[str, Any], List[Any], str]:
36+
"""
37+
Entry point to perform reduced decode of a Rest.li HTTP body/header-encoded value.
38+
39+
Args:
40+
value (str): The HTTP body/header-encoded value to decode
41+
42+
Returns:
43+
Union[Dict[str,Any], List[Any], str]: The decoded value
44+
"""
45+
return __internal_decode(value, True)
46+
47+
48+
def __validateSuffix(restli_encoded_str: str, suffix: str):
49+
"""
50+
Validates that the input restli_encoded_str has the expected suffix at the end
51+
"""
52+
if not restli_encoded_str.endswith(suffix):
53+
raise InvalidSerializedRestliError(
54+
f"The serialized Rest.li string has unbalanced prefix and suffix: {restli_encoded_str}"
55+
)
56+
57+
58+
def __restli_unescape(value: str, reduced: bool):
59+
if not reduced:
60+
value = unquote(value)
61+
elif re.search(reduced_decode_special_chars_pattern, value):
62+
value = re.sub(
63+
reduced_decode_special_chars_pattern,
64+
lambda match: unquote(match.group()),
65+
value,
66+
)
367
return value
68+
69+
70+
def __find_last_right_bracket(value: str, pos: int) -> int:
71+
"""
72+
Returns the index of the last right, matching bracket, starting from specified index.
73+
For example, consider value = "List(1,(k:v))".
74+
If pos = 0, then return the position of the outer matching bracket (12)
75+
If pos = 7, then return the position of the inner matching bracket (11)
76+
77+
Args:
78+
value (str): The encoded string value
79+
pos (int): The index at which to start searching
80+
81+
Raises:
82+
InvalidSerializedRestliError: Exception if there are unmatched brackets
83+
84+
Returns:
85+
int: The index of the right matching bracket
86+
"""
87+
unmatched_brackets = 0
88+
# Keep track of if we have encountered at least one left bracket
89+
has_met_first = False
90+
idx = pos
91+
92+
while idx < len(value):
93+
# Iterate through the string, if find left bracket, add to unmatched_brackets.
94+
# If you find right brackets, decrement unmatched_brackets
95+
# Once there are 0 unmatched brackets left, break
96+
curr_char = value[idx]
97+
if curr_char == LEFT_BRACKET:
98+
unmatched_brackets += 1
99+
has_met_first = True
100+
if curr_char == RIGHT_BRACKET:
101+
unmatched_brackets -= 1
102+
if unmatched_brackets == 0 and has_met_first:
103+
break
104+
idx += 1
105+
if unmatched_brackets > 0:
106+
# We have unmatched brackets, so throw error
107+
raise InvalidSerializedRestliError(
108+
f"The serialized Rest.li string has unbalanced brackets: {value}"
109+
)
110+
return idx
111+
112+
113+
def __internal_decode(restli_encoded_str: str, reduced: bool):
114+
if (restli_encoded_str is None) or (restli_encoded_str == "''"):
115+
restli_encoded_str = ""
116+
117+
if restli_encoded_str.startswith(LIST_PREFIX):
118+
__validateSuffix(restli_encoded_str, LIST_SUFFIX)
119+
return __decode_list(restli_encoded_str[5:-1], reduced)
120+
elif restli_encoded_str.startswith(OBJ_PREFIX):
121+
__validateSuffix(restli_encoded_str, OBJ_SUFFIX)
122+
return __decode_object(restli_encoded_str[1:-1], reduced)
123+
else:
124+
return __restli_unescape(restli_encoded_str, reduced)
125+
126+
127+
def __decode_list(restli_encoded_str: str, reduced: bool) -> List[Any]:
128+
"""
129+
Decodes a Rest.li-encoded string to a list
130+
131+
Args:
132+
restli_encoded_str (str): An encoded string value that should represent a list. It is expected
133+
that this is the string value inside of "List(...)". For example, if the original string is
134+
"List(val1,val2,val3)", then the string that should be passed into _decode_list() should be
135+
"val1,val2,val3".
136+
reduced (bool): Flag whether this is expected to be a reduced-encoded string
137+
138+
Returns:
139+
List[Any]: The decoded list
140+
"""
141+
142+
decoded_list = []
143+
idx = 0
144+
while idx < len(restli_encoded_str):
145+
if (restli_encoded_str[idx:].startswith(LIST_PREFIX)) or (
146+
restli_encoded_str[idx:].startswith(OBJ_PREFIX)
147+
):
148+
# If we encounter a List or Object as one of the current list's entries, append the decoded value
149+
right_bracket_idx = __find_last_right_bracket(restli_encoded_str, idx)
150+
decoded_list.append(
151+
__internal_decode(
152+
restli_encoded_str[idx : right_bracket_idx + 1], reduced
153+
)
154+
)
155+
156+
# Move past the comma (separating list values)
157+
idx = right_bracket_idx + 2
158+
else:
159+
# The current list entry is a primitive
160+
end_idx = restli_encoded_str.find(",", idx)
161+
if end_idx < 0:
162+
end_idx = len(restli_encoded_str)
163+
decoded_list.append(
164+
__restli_unescape(restli_encoded_str[idx:end_idx], reduced)
165+
)
166+
167+
# Move past the comma
168+
idx = end_idx + 1
169+
return decoded_list
170+
171+
172+
def __decode_object(restli_encoded_str: str, reduced: bool) -> Dict[str, Any]:
173+
"""
174+
Decodes a Rest.li-encoded string to an object.
175+
176+
Args:
177+
restli_encoded_str (str): An encoded string value that should represent an object. It is expected
178+
that this is the string value inside of the parentheses. For example, if the original string is
179+
"(prop1:val,prop2:val2)", then the string that should be passed into _decode_object() should be
180+
"prop1:val1,prop2:val2".
181+
reduced (bool): Flag whether this is expected to be a reduced-encoded string
182+
183+
Returns:
184+
Dict[str,Any]: The decoded object
185+
"""
186+
decoded_object = {}
187+
idx = 0
188+
while idx < len(restli_encoded_str):
189+
# Get the key value between the start index and key-val separater (:)
190+
colon_idx = restli_encoded_str.find(OBJ_KEY_VAL_SEP, idx)
191+
key = __restli_unescape(restli_encoded_str[idx:colon_idx], reduced)
192+
193+
# Move to the next character after the colon
194+
idx = colon_idx + 1
195+
196+
if (restli_encoded_str[idx:].startswith(LIST_PREFIX)) or (
197+
restli_encoded_str[idx:].startswith(OBJ_PREFIX)
198+
):
199+
# If we encounter a List or Object as the key's value, decode it
200+
right_bracket_idx = __find_last_right_bracket(restli_encoded_str, idx)
201+
decoded_object[key] = __internal_decode(
202+
restli_encoded_str[idx : right_bracket_idx + 1], reduced
203+
)
204+
205+
# Move index past next potential comma (separating obj key-value pairs)
206+
idx = right_bracket_idx + 2
207+
else:
208+
# The key's value is a primitive
209+
end_idx = restli_encoded_str.find(OBJ_KEY_VAL_PAIR_SEP, idx)
210+
if end_idx < 0:
211+
end_idx = len(restli_encoded_str)
212+
213+
decoded_object[key] = __restli_unescape(
214+
restli_encoded_str[idx:end_idx], reduced
215+
)
216+
# end_idx is the comma index, so move 1 past it
217+
idx = end_idx + 1
218+
return decoded_object

linkedin_api/common/errors.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,7 @@ class MissingArgumentError(Exception):
1111
class ResponseFormattingError(Exception):
1212
"Error raised when formatting API response"
1313
pass
14+
15+
class InvalidSerializedRestliError(Exception):
16+
"Error raised when an incorrectly serialized Rest.li string is encountered"
17+
pass

tests/clients/restli/utils/__init__.py

Whitespace-only changes.
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from linkedin_api.clients.restli.utils.decoder import decode, reduced_decode
2+
import pytest
3+
4+
5+
@pytest.mark.parametrize(
6+
"input_str,expected_output",
7+
[
8+
("", ""),
9+
("abc123", "abc123"),
10+
("urn%3Ali%20app%3F", "urn:li app?"),
11+
("List(1,2,3)", ["1", "2", "3"]),
12+
("(key1:val1,key2:val2)", {"key1": "val1", "key2": "val2"}),
13+
(
14+
"(key1:urn%3Ali%3Aapp%3A%281%2C2%29,key%3A2:foobar)",
15+
{"key1": "urn:li:app:(1,2)", "key:2": "foobar"},
16+
),
17+
("List(abc,def,(key1:val1))", ["abc", "def", {"key1": "val1"}]),
18+
("List(List(abc,def),ghi)", [["abc", "def"], "ghi"]),
19+
(
20+
"(key1:urn%3Ali%3Adevapp%3A123,key2:List(123%3A456,(key22:abc)))",
21+
{"key1": "urn:li:devapp:123", "key2": ["123:456", {"key22": "abc"}]},
22+
),
23+
],
24+
)
25+
def test_decode(input_str: str, expected_output):
26+
assert decode(input_str) == expected_output
27+
28+
29+
@pytest.mark.parametrize(
30+
"input_str,expected_output",
31+
[
32+
("", ""),
33+
("abc123", "abc123"),
34+
("%28urn%29%3Ali%2C_ %3A%27app%3F?", "(urn):li,_ :'app%3F?"),
35+
("List(%28a%3Ab%29,%27c%2Cd%27?)", ["(a:b)", "'c,d'?"]),
36+
("(a%3Ab?:c%3Ad )", {"a:b?": "c:d "}),
37+
],
38+
)
39+
def test_reduced_decode(input_str: str, expected_output):
40+
assert reduced_decode(input_str) == expected_output

0 commit comments

Comments
 (0)