Skip to content

Commit 70c4f42

Browse files
authored
Merge pull request #117 from hubmapconsortium/additions_to_string_helper
Additions to string helper
2 parents c194613 + a1c360e commit 70c4f42

File tree

3 files changed

+101
-1
lines changed

3 files changed

+101
-1
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,4 @@ venv.bak/
115115
*.ini
116116
.DS_Store
117117
appconfig.py
118+
.idea/

hubmap_commons/string_helper.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import ast
2+
import unicodedata
3+
14

25
def isBlank(val):
36
if val is None:
@@ -56,4 +59,100 @@ def listToCommaSeparated(lst, quoteChar = None, trimAndUpperCase = False):
5659
def allIndexes(value, character):
5760
return [i for i, ltr in enumerate(value) if ltr == character]
5861

62+
"""
63+
Convert a string representation of the Python list/dict (either nested or not) to a Python list/dict object
64+
with removing any non-printable control characters if presents.
65+
66+
Note: string representation of Python string can still contain control characters and should not be used by this method
67+
But if a string representation of Python string is used as input by mistake, control characters gets removed as a result.
68+
69+
This was copied from:
70+
https://github.com/hubmapconsortium/entity-api/blob/a832a906124623a889a943c15ff7c8d93f2bb068/src/schema/schema_manager.py#L1666
71+
72+
Parameters
73+
----------
74+
data_str: str
75+
The string representation of the Python list/dict stored in Neo4j.
76+
It's not stored in Neo4j as a json string! And we can't store it as a json string
77+
due to the way that Cypher handles single/double quotes.
78+
79+
Returns
80+
-------
81+
list or dict or str
82+
The desired Python list or dict object after evaluation or the original string input
83+
"""
84+
def convert_str_literal(data_str):
85+
if isinstance(data_str, str):
86+
# First remove those non-printable control characters that will cause SyntaxError
87+
# Use unicodedata.category(), we can check each character starting with "C" is the control character
88+
data_str = "".join(char for char in data_str if unicodedata.category(char)[0] != "C")
89+
90+
# ast uses compile to compile the source string (which must be an expression) into an AST
91+
# If the source string is not a valid expression (like an empty string), a SyntaxError will be raised by compile
92+
# If, on the other hand, the source string would be a valid expression (e.g. a variable name like foo),
93+
# compile will succeed but then literal_eval() might fail with a ValueError
94+
# Also this fails with a TypeError: literal_eval("{{}: 'value'}")
95+
try:
96+
data = ast.literal_eval(data_str)
97+
98+
if isinstance(data, (list, dict)):
99+
# The input string literal has been converted to {type(data)} successfully
100+
return data
101+
except (SyntaxError, ValueError, TypeError) as e:
102+
raise ValueError(f"Invalid expression (string value): {data_str} from ast.literal_eval(); "
103+
f"specific error: {str(e)}")
104+
# Skip any non-string data types, or a string literal that is not list or dict after evaluation
105+
return data_str
106+
107+
"""
108+
Build the property key-value pairs to be used in the Cypher clause for node creation/update
109+
110+
Parameters
111+
----------
112+
entity_data_dict : dict
113+
The target Entity node to be created
114+
115+
This was copied from:
116+
https://github.com/hubmapconsortium/entity-api/blob/1aa6c868df25514f8ac2130005d8080f3fbe229a/src/schema/schema_neo4j_queries.py#L1361
117+
118+
Returns
119+
-------
120+
str
121+
A string representation of the node properties map containing
122+
key-value pairs to be used in Cypher clause
123+
"""
124+
def build_properties_map(entity_data_dict):
125+
separator = ', '
126+
node_properties_list = []
127+
128+
for key, value in entity_data_dict.items():
129+
if isinstance(value, (int, bool)):
130+
# Treat integer and boolean as is
131+
key_value_pair = f"{key}: {value}"
132+
elif isinstance(value, str):
133+
# Special case is the value is 'TIMESTAMP()' string
134+
# Remove the quotes since neo4j only takes TIMESTAMP() as a function
135+
if value == 'TIMESTAMP()':
136+
key_value_pair = f"{key}: {value}"
137+
else:
138+
# Escape single quote
139+
escaped_str = value.replace("'", r"\'")
140+
# Quote the value
141+
key_value_pair = f"{key}: '{escaped_str}'"
142+
else:
143+
# Convert list and dict to string, retain the original data without removing any control characters
144+
# Will need to call schema_manager.convert_str_literal() to convert the list/dict literal back to object
145+
# Note that schema_manager.convert_str_literal() removes any control characters to avoid SyntaxError
146+
# Must also escape single quotes in the string to build a valid Cypher query
147+
escaped_str = str(value).replace("'", r"\'")
148+
# Also need to quote the string value
149+
key_value_pair = f"{key}: '{escaped_str}'"
150+
151+
# Add to the list
152+
node_properties_list.append(key_value_pair)
153+
154+
# Example: {uuid: 'eab7fd6911029122d9bbd4d96116db9b', rui_location: 'Joe <info>', lab_tissue_sample_id: 'dadsadsd'}
155+
# Note: all the keys are not quoted, otherwise Cypher syntax error
156+
node_properties_map = f"{{ {separator.join(node_properties_list)} }}"
59157

158+
return node_properties_map

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name="hubmap-commons",
8-
version="2.1.10",
8+
version="2.1.11",
99
author="HuBMAP Consortium",
1010
author_email="api-developers@hubmapconsortium.org",
1111
description="The common utilities used by the HuMBAP web services",

0 commit comments

Comments
 (0)