|
| 1 | +import ast |
| 2 | +import unicodedata |
| 3 | + |
1 | 4 |
|
2 | 5 | def isBlank(val): |
3 | 6 | if val is None: |
@@ -56,4 +59,100 @@ def listToCommaSeparated(lst, quoteChar = None, trimAndUpperCase = False): |
56 | 59 | def allIndexes(value, character): |
57 | 60 | return [i for i, ltr in enumerate(value) if ltr == character] |
58 | 61 |
|
| 62 | +""" |
| 63 | +Convert a string representation of the Python list/dict (either nested or not) to a Python list/dict object |
| 64 | +with removing any non-printable control characters if presents. |
| 65 | +
|
| 66 | +Note: string representation of Python string can still contain control characters and should not be used by this method |
| 67 | +But if a string representation of Python string is used as input by mistake, control characters gets removed as a result. |
| 68 | +
|
| 69 | +This was copied from: |
| 70 | +https://github.com/hubmapconsortium/entity-api/blob/a832a906124623a889a943c15ff7c8d93f2bb068/src/schema/schema_manager.py#L1666 |
| 71 | +
|
| 72 | +Parameters |
| 73 | +---------- |
| 74 | +data_str: str |
| 75 | + The string representation of the Python list/dict stored in Neo4j. |
| 76 | + It's not stored in Neo4j as a json string! And we can't store it as a json string |
| 77 | + due to the way that Cypher handles single/double quotes. |
| 78 | +
|
| 79 | +Returns |
| 80 | +------- |
| 81 | +list or dict or str |
| 82 | + The desired Python list or dict object after evaluation or the original string input |
| 83 | +""" |
| 84 | +def convert_str_literal(data_str): |
| 85 | + if isinstance(data_str, str): |
| 86 | + # First remove those non-printable control characters that will cause SyntaxError |
| 87 | + # Use unicodedata.category(), we can check each character starting with "C" is the control character |
| 88 | + data_str = "".join(char for char in data_str if unicodedata.category(char)[0] != "C") |
| 89 | + |
| 90 | + # ast uses compile to compile the source string (which must be an expression) into an AST |
| 91 | + # If the source string is not a valid expression (like an empty string), a SyntaxError will be raised by compile |
| 92 | + # If, on the other hand, the source string would be a valid expression (e.g. a variable name like foo), |
| 93 | + # compile will succeed but then literal_eval() might fail with a ValueError |
| 94 | + # Also this fails with a TypeError: literal_eval("{{}: 'value'}") |
| 95 | + try: |
| 96 | + data = ast.literal_eval(data_str) |
| 97 | + |
| 98 | + if isinstance(data, (list, dict)): |
| 99 | + # The input string literal has been converted to {type(data)} successfully |
| 100 | + return data |
| 101 | + except (SyntaxError, ValueError, TypeError) as e: |
| 102 | + raise ValueError(f"Invalid expression (string value): {data_str} from ast.literal_eval(); " |
| 103 | + f"specific error: {str(e)}") |
| 104 | + # Skip any non-string data types, or a string literal that is not list or dict after evaluation |
| 105 | + return data_str |
| 106 | + |
| 107 | +""" |
| 108 | +Build the property key-value pairs to be used in the Cypher clause for node creation/update |
| 109 | +
|
| 110 | +Parameters |
| 111 | +---------- |
| 112 | +entity_data_dict : dict |
| 113 | + The target Entity node to be created |
| 114 | +
|
| 115 | +This was copied from: |
| 116 | +https://github.com/hubmapconsortium/entity-api/blob/1aa6c868df25514f8ac2130005d8080f3fbe229a/src/schema/schema_neo4j_queries.py#L1361 |
| 117 | +
|
| 118 | +Returns |
| 119 | +------- |
| 120 | +str |
| 121 | + A string representation of the node properties map containing |
| 122 | + key-value pairs to be used in Cypher clause |
| 123 | +""" |
| 124 | +def build_properties_map(entity_data_dict): |
| 125 | + separator = ', ' |
| 126 | + node_properties_list = [] |
| 127 | + |
| 128 | + for key, value in entity_data_dict.items(): |
| 129 | + if isinstance(value, (int, bool)): |
| 130 | + # Treat integer and boolean as is |
| 131 | + key_value_pair = f"{key}: {value}" |
| 132 | + elif isinstance(value, str): |
| 133 | + # Special case is the value is 'TIMESTAMP()' string |
| 134 | + # Remove the quotes since neo4j only takes TIMESTAMP() as a function |
| 135 | + if value == 'TIMESTAMP()': |
| 136 | + key_value_pair = f"{key}: {value}" |
| 137 | + else: |
| 138 | + # Escape single quote |
| 139 | + escaped_str = value.replace("'", r"\'") |
| 140 | + # Quote the value |
| 141 | + key_value_pair = f"{key}: '{escaped_str}'" |
| 142 | + else: |
| 143 | + # Convert list and dict to string, retain the original data without removing any control characters |
| 144 | + # Will need to call schema_manager.convert_str_literal() to convert the list/dict literal back to object |
| 145 | + # Note that schema_manager.convert_str_literal() removes any control characters to avoid SyntaxError |
| 146 | + # Must also escape single quotes in the string to build a valid Cypher query |
| 147 | + escaped_str = str(value).replace("'", r"\'") |
| 148 | + # Also need to quote the string value |
| 149 | + key_value_pair = f"{key}: '{escaped_str}'" |
| 150 | + |
| 151 | + # Add to the list |
| 152 | + node_properties_list.append(key_value_pair) |
| 153 | + |
| 154 | + # Example: {uuid: 'eab7fd6911029122d9bbd4d96116db9b', rui_location: 'Joe <info>', lab_tissue_sample_id: 'dadsadsd'} |
| 155 | + # Note: all the keys are not quoted, otherwise Cypher syntax error |
| 156 | + node_properties_map = f"{{ {separator.join(node_properties_list)} }}" |
59 | 157 |
|
| 158 | + return node_properties_map |
0 commit comments