Skip to content

Commit 9db2a1a

Browse files
authored
[Fixes #13520] Improved Error Robustness for special characters and whitespaces in filenames and attribute tables (#13535)
* [Fixes #13520] added flow for dynamic vrt file and handle more special character * [Fixes #13520] update code for only create vrt if there is problem in field name
1 parent 576c3c4 commit 9db2a1a

File tree

6 files changed

+199
-16
lines changed

6 files changed

+199
-16
lines changed

geonode/upload/handlers/base.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from pathlib import Path
2323
from typing import List
2424
import zipfile
25+
import re
2526

2627
from geonode.assets.utils import create_asset_and_link
2728
from geonode.resource.enumerator import ExecutionRequestAction as exa
@@ -237,18 +238,14 @@ def fixup_name(self, name):
237238
prefix = name[0]
238239
if prefix.isnumeric():
239240
name = name.replace(name[0], "_")
240-
return (
241-
name.lower()
242-
.replace("-", "_")
243-
.replace(" ", "_")
244-
.replace("#", "_")
245-
.replace("\\", "_")
246-
.replace(".", "")
247-
.replace(")", "")
248-
.replace("(", "")
249-
.replace(",", "")
250-
.replace("&", "")[:62]
251-
)
241+
name = name.lower()
242+
# Replace specific chars with underscore in one pass
243+
name = re.sub(r"[-# \\&]", "_", name)
244+
245+
# Remove unwanted characters in one pass
246+
name = re.sub(r'[.(),!"$%\'*+/:;<=>?@\[\]^`{|}~]', "", name)
247+
248+
return name[:62]
252249

253250
def extract_resource_to_publish(self, files, layer_name, alternate, **kwargs):
254251
"""

geonode/upload/handlers/common/tests_vector.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from dynamic_models.models import ModelSchema
3636
from osgeo import ogr
3737
from django.test.utils import override_settings
38+
from geonode.upload.utils import create_vrt_file, has_incompatible_field_names
3839

3940

4041
class TestBaseVectorFileHandler(TestCase):
@@ -69,6 +70,100 @@ def test_create_error_log(self):
6970
expected = "Task: foo_task_name raised an error during actions for layer: alternate: my exception"
7071
self.assertEqual(expected, actual)
7172

73+
def test_create_vrt_file_with_special_chars(self):
74+
"""
75+
Test that create_vrt_file correctly sanitizes layer and field names
76+
with spaces and special characters, and generates a valid VRT file.
77+
"""
78+
source_filepath = "source file.shp"
79+
80+
mock_layer = MagicMock(spec=ogr.Layer)
81+
mock_layer.GetName.return_value = "Layer With Spaces"
82+
mock_layer.GetGeomType.return_value = ogr.wkbPolygon
83+
84+
mock_field_1 = MagicMock()
85+
mock_field_1.GetName.return_value = "Field 1 (m)"
86+
mock_field_1.GetTypeName.return_value = "String"
87+
88+
mock_field_2 = MagicMock()
89+
mock_field_2.GetName.return_value = "another-field"
90+
mock_field_2.GetTypeName.return_value = "Real"
91+
92+
mock_field_3 = MagicMock()
93+
mock_field_3.GetName.return_value = "field/with/slash"
94+
mock_field_3.GetTypeName.return_value = "Integer"
95+
96+
mock_layer_defn = MagicMock()
97+
mock_layer_defn.GetFieldCount.return_value = 3
98+
mock_layer_defn.GetFieldDefn.side_effect = [mock_field_1, mock_field_2, mock_field_3].__getitem__
99+
100+
mock_layer.GetLayerDefn.return_value = mock_layer_defn
101+
102+
vrt_filename, vrt_layer_name = None, None
103+
try:
104+
vrt_filename, vrt_layer_name = create_vrt_file(mock_layer, source_filepath)
105+
106+
self.assertIsNotNone(vrt_filename)
107+
self.assertTrue(os.path.exists(vrt_filename))
108+
self.assertEqual(vrt_layer_name, "layer_with_spaces")
109+
110+
with open(vrt_filename, "r") as f:
111+
vrt_content = f.read()
112+
113+
self.assertIn('<OGRVRTLayer name="layer_with_spaces">', vrt_content)
114+
self.assertIn(f"<SrcDataSource>{source_filepath}</SrcDataSource>", vrt_content)
115+
self.assertIn("<SrcLayer>Layer With Spaces</SrcLayer>", vrt_content)
116+
117+
self.assertIn('<Field name="field_1_m" src="Field 1 (m)" type="String" />', vrt_content)
118+
self.assertIn('<Field name="another_field" src="another-field" type="Real" />', vrt_content)
119+
self.assertIn('<Field name="fieldwithslash" src="field/with/slash" type="Integer" />', vrt_content)
120+
121+
finally:
122+
if vrt_filename and os.path.exists(vrt_filename):
123+
os.remove(vrt_filename)
124+
125+
def test_has_incompatible_field_names(self):
126+
"""
127+
Test that has_incompatible_field_names correctly identifies layers
128+
with field names that need sanitization.
129+
"""
130+
131+
# layer with incompatible field names
132+
mock_layer_incompatible = MagicMock(spec=ogr.Layer)
133+
mock_layer_incompatible.GetName.return_value = "Layer With Spaces"
134+
135+
mock_field_1 = MagicMock()
136+
mock_field_1.GetName.return_value = "Field 1 (m)"
137+
138+
mock_field_2 = MagicMock()
139+
mock_field_2.GetName.return_value = "another-#field"
140+
141+
mock_layer_defn_incompatible = MagicMock()
142+
mock_layer_defn_incompatible.GetFieldCount.return_value = 2
143+
mock_layer_defn_incompatible.GetFieldDefn.side_effect = [mock_field_1, mock_field_2].__getitem__
144+
145+
mock_layer_incompatible.GetLayerDefn.return_value = mock_layer_defn_incompatible
146+
147+
self.assertTrue(has_incompatible_field_names(mock_layer_incompatible))
148+
149+
# layer with compatible field names
150+
mock_layer_compatible = MagicMock(spec=ogr.Layer)
151+
mock_layer_compatible.GetName.return_value = "compatible_layer"
152+
153+
mock_field_3 = MagicMock()
154+
mock_field_3.GetName.return_value = "field_one"
155+
156+
mock_field_4 = MagicMock()
157+
mock_field_4.GetName.return_value = "field_two"
158+
159+
mock_layer_defn_compatible = MagicMock()
160+
mock_layer_defn_compatible.GetFieldCount.return_value = 2
161+
mock_layer_defn_compatible.GetFieldDefn.side_effect = [mock_field_3, mock_field_4].__getitem__
162+
163+
mock_layer_compatible.GetLayerDefn.return_value = mock_layer_defn_compatible
164+
165+
self.assertFalse(has_incompatible_field_names(mock_layer_compatible))
166+
72167
def test_create_dynamic_model_fields(self):
73168
try:
74169
# Prepare the test

geonode/upload/handlers/common/vector.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
from geonode.upload.utils import ImporterRequestAction as ira
6464
from geonode.security.registry import permissions_registry
6565
from geonode.storage.manager import FileSystemStorageManager
66+
from geonode.upload.utils import create_vrt_file, has_incompatible_field_names
6667

6768

6869
logger = logging.getLogger("importer")
@@ -260,7 +261,10 @@ def create_ogr2ogr_command(files, original_name, ovverwrite_layer, alternate):
260261
_datastore["USER"],
261262
_datastore["PASSWORD"],
262263
)
263-
options += f'"{files.get("base_file")}"' + " "
264+
# vrt file is aready created in import_resource and vrt will be auto detected by ogr2ogr
265+
# and also the base_file will work so can be used as alternative for fallback which will also be autodeteced by ogr2ogr.
266+
input_file = files.get("temp_vrt_file") or files.get("base_file")
267+
options += f'"{input_file}"' + " "
264268

265269
options += f'-nln {alternate} "{original_name}"'
266270

@@ -414,6 +418,12 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str:
414418
)
415419
# and layer.GetGeometryColumn() is not None
416420
):
421+
_files = files.copy()
422+
vrt_layer_name = None
423+
if has_incompatible_field_names(layer):
424+
vrt_filename, vrt_layer_name = create_vrt_file(layer, files.get("base_file"))
425+
_files["temp_vrt_file"] = vrt_filename
426+
417427
# update the execution request object
418428
# setup dynamic model and retrieve the group task needed for tun the async workflow
419429
# create the async task for create the resource into geonode_data with ogr2ogr
@@ -433,8 +443,8 @@ def import_resource(self, files: dict, execution_id: str, **kwargs) -> str:
433443

434444
ogr_res = self.get_ogr2ogr_task_group(
435445
execution_id,
436-
files,
437-
layer.GetName().lower(),
446+
_files,
447+
vrt_layer_name or layer.GetName().lower(),
438448
should_be_overwritten,
439449
alternate,
440450
)
@@ -594,7 +604,9 @@ def create_dynamic_model_fields(
594604
return_celery_group: bool = True,
595605
):
596606
# retrieving the field schema from ogr2ogr and converting the type to Django Types
597-
layer_schema = [{"name": x.name.lower(), "class_name": self._get_type(x), "null": True} for x in layer.schema]
607+
layer_schema = [
608+
{"name": self.fixup_name(x.name), "class_name": self._get_type(x), "null": True} for x in layer.schema
609+
]
598610
if (
599611
layer.GetGeometryColumn()
600612
or self.default_geometry_column_name
@@ -1329,6 +1341,10 @@ def import_with_ogr2ogr(
13291341

13301342
process = Popen(" ".join(commands), stdout=PIPE, stderr=PIPE, shell=True)
13311343
stdout, stderr = process.communicate()
1344+
1345+
if files.get("temp_vrt_file") and os.path.exists(files["temp_vrt_file"]):
1346+
os.remove(files["temp_vrt_file"])
1347+
13321348
if (
13331349
stderr is not None
13341350
and stderr != b""
@@ -1345,6 +1361,8 @@ def import_with_ogr2ogr(
13451361
raise Exception(f"{message} for layer {alternate}")
13461362
return "ogr2ogr", alternate, execution_id
13471363
except Exception as e:
1364+
if files.get("temp_vrt_file") and os.path.exists(files.get("temp_vrt_file")):
1365+
os.remove(files["temp_vrt_file"])
13481366
call_rollback_function(
13491367
execution_id,
13501368
handlers_module_path=handler_module_path,

geonode/upload/handlers/tests.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,9 @@ def test_fixup_name_replace_digits_with_underscore(self):
9292
expected_name = "_layername"
9393
actual = BaseHandler().fixup_name(layer_name)
9494
self.assertEqual(expected_name, actual)
95+
96+
def test_fixup_name_replace_digits_with_special_character(self):
97+
layer_name = "5Test-Name# \\&.()!$%*+@[]~end"
98+
expected_name = "_test_name____end"
99+
actual = BaseHandler().fixup_name(layer_name)
100+
self.assertEqual(expected_name, actual)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<OGRVRTDataSource>
2+
<OGRVRTLayer name="{{ layer_name|escape }}">
3+
<SrcDataSource>{{ source_filepath|escape }}</SrcDataSource>
4+
<SrcLayer>{{ original_layer_name|escape }}</SrcLayer>
5+
{% for field in fields %}
6+
<Field name="{{ field.name|escape }}" src="{{ field.src|escape }}" type="{{ field.type|escape }}" />
7+
{% endfor %}
8+
</OGRVRTLayer>
9+
</OGRVRTDataSource>

geonode/upload/utils.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#
1818
#########################################################################
1919
import enum
20+
import tempfile
21+
import os
2022
from geonode.resource.manager import ResourceManager
2123
from geonode.geoserver.manager import GeoServerResourceManager
2224
from geonode.base.models import ResourceBase
@@ -30,6 +32,7 @@
3032
from geonode.resource.models import ExecutionRequest
3133
from django.core.exceptions import ObjectDoesNotExist
3234
from django.conf import settings
35+
from django.template.loader import render_to_string
3336

3437

3538
def get_max_upload_size(slug):
@@ -48,6 +51,61 @@ def get_max_upload_parallelism_limit(slug):
4851
return max_number
4952

5053

54+
def has_incompatible_field_names(layer):
55+
"""
56+
Checks if any of the layer's field names contain special
57+
characters that need sanitization, Currently used to check if VRT need to be created or not.
58+
"""
59+
from geonode.upload.handlers.base import BaseHandler
60+
61+
def _name_needs_sanitization(name):
62+
# fixup_name lowercases, replaces special chars, and truncates.
63+
# If the sanitized name is different from just the lowercased name,
64+
# it means special characters were involved that need handling.
65+
return BaseHandler().fixup_name(name) != name.lower()
66+
67+
layer_defn = layer.GetLayerDefn()
68+
for i in range(layer_defn.GetFieldCount()):
69+
field_name = layer_defn.GetFieldDefn(i).GetName()
70+
if _name_needs_sanitization(field_name):
71+
return True
72+
73+
return False
74+
75+
76+
def create_vrt_file(layer, source_filepath):
77+
"""
78+
Dynamically creates a VRT file to sanitize field names.
79+
"""
80+
from geonode.upload.handlers.base import BaseHandler
81+
82+
vrt_layer_name = BaseHandler().fixup_name(layer.GetName())
83+
84+
layer_defn = layer.GetLayerDefn()
85+
fields = [
86+
{
87+
"name": BaseHandler().fixup_name(layer_defn.GetFieldDefn(i).GetName()),
88+
"src": layer_defn.GetFieldDefn(i).GetName(),
89+
"type": layer_defn.GetFieldDefn(i).GetTypeName(),
90+
}
91+
for i in range(layer_defn.GetFieldCount())
92+
]
93+
94+
context = {
95+
"layer_name": vrt_layer_name,
96+
"source_filepath": source_filepath,
97+
"original_layer_name": layer.GetName(),
98+
"fields": fields,
99+
}
100+
vrt_content = render_to_string("upload/vrt_template.xml", context)
101+
102+
vrt_fd, vrt_filename = tempfile.mkstemp(suffix=".vrt")
103+
with os.fdopen(vrt_fd, "w") as f:
104+
f.write(vrt_content)
105+
106+
return vrt_filename, vrt_layer_name
107+
108+
51109
class ImporterRequestAction(enum.Enum):
52110
ROLLBACK = _("rollback")
53111
RESOURCE_METADATA_UPLOAD = _("resource_metadata_upload")

0 commit comments

Comments
 (0)