Skip to content

Commit fd648ef

Browse files
fix(csharp): improve server-side property name validation (#72)
## Summary Improves server-side property name validation to support standard Spark configuration naming conventions while maintaining security against SQL injection attacks. ### Changes - **Updated validation regex**: Changed from `^[a-zA-Z_]+$` to `^[a-zA-Z0-9_.]+$` - Now supports dots (e.g., `spark.databricks.sql.metricViewV2.enabled `) - Now supports numbers (e.g., `spark.databricks.sql.metricViewV2.enabled `) - Maintains security by blocking special characters - **Refactored validation logic**: - Moved validation into `GetServerSideProperties()` for centralized filtering - Changed `IsValidPropertyName` from `private` to `internal` for testability - Removed duplicate validation code from `ApplyServerSidePropertiesAsync` - **Enhanced observability**: - Added Activity tracing with `connection.server_side_property.filtered` event - Added Activity tracing for SET query failures - Wrapped `ApplyServerSidePropertiesAsync` in `TraceActivityAsync` - **Added comprehensive tests**: - Created `DatabricksConnectionUnitTests.cs` with 41 test cases - Tests cover valid patterns, invalid characters, and edge cases - All tests passing ✅ ### Test Plan ```bash dotnet test --filter "FullyQualifiedName~DatabricksConnectionUnitTests" ``` **Result**: All 41 tests passed ### Examples of Now-Supported Properties - ✅ `spark.sql.adaptive.enabled` - ✅ `spark.executor.instances` - ✅ `spark.databricks.delta.optimizeWrite.enabled` - ✅ `my_custom_property123` ### Security Still blocks potentially dangerous input: - ❌ Hyphens, spaces, semicolons - ❌ Quotes, equals signs - ❌ Special characters that could enable SQL injection 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Sonnet 4.5 <[email protected]>
1 parent 9066f08 commit fd648ef

File tree

2 files changed

+231
-38
lines changed

2 files changed

+231
-38
lines changed

csharp/src/DatabricksConnection.cs

Lines changed: 57 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,7 @@ protected override TOpenSessionReq CreateSessionRequest()
578578
// If not using queries to set server-side properties, include them in Configuration
579579
if (!_applySSPWithQueries)
580580
{
581-
var serverSideProperties = GetServerSideProperties();
581+
var serverSideProperties = GetServerSideProperties(activity);
582582
foreach (var property in serverSideProperties)
583583
{
584584
req.Configuration[property.Key] = property.Value;
@@ -677,16 +677,32 @@ private async Task SetSchema(string schemaName)
677677

678678
/// <summary>
679679
/// Gets a dictionary of server-side properties extracted from connection properties.
680+
/// Only includes properties with valid property names (letters, numbers, dots, and underscores).
681+
/// Invalid property names are logged to the activity trace and filtered out.
680682
/// </summary>
681-
/// <returns>Dictionary of server-side properties with prefix removed from keys.</returns>
682-
private Dictionary<string, string> GetServerSideProperties()
683+
/// <param name="activity">Optional activity for tracing filtered properties.</param>
684+
/// <returns>Dictionary of server-side properties with prefix removed from keys and invalid names filtered out.</returns>
685+
private Dictionary<string, string> GetServerSideProperties(Activity? activity = null)
683686
{
684-
return Properties
685-
.Where(p => p.Key.ToLowerInvariant().StartsWith(DatabricksParameters.ServerSidePropertyPrefix))
686-
.ToDictionary(
687-
p => p.Key.Substring(DatabricksParameters.ServerSidePropertyPrefix.Length),
688-
p => p.Value
689-
);
687+
var result = new Dictionary<string, string>();
688+
689+
foreach (var property in Properties.Where(p => p.Key.ToLowerInvariant().StartsWith(DatabricksParameters.ServerSidePropertyPrefix)))
690+
{
691+
string propertyName = property.Key.Substring(DatabricksParameters.ServerSidePropertyPrefix.Length);
692+
693+
if (!IsValidPropertyName(propertyName))
694+
{
695+
activity?.AddEvent("connection.server_side_property.filtered", [
696+
new("property_name", propertyName),
697+
new("reason", "Invalid property name format")
698+
]);
699+
continue;
700+
}
701+
702+
result[propertyName] = property.Value;
703+
}
704+
705+
return result;
690706
}
691707

692708
/// <summary>
@@ -695,49 +711,52 @@ private Dictionary<string, string> GetServerSideProperties()
695711
/// <returns>A task representing the asynchronous operation.</returns>
696712
public async Task ApplyServerSidePropertiesAsync()
697713
{
698-
if (!_applySSPWithQueries)
699-
{
700-
return;
701-
}
702-
703-
var serverSideProperties = GetServerSideProperties();
704-
705-
if (serverSideProperties.Count == 0)
706-
{
707-
return;
708-
}
709-
710-
using var statement = new DatabricksStatement(this);
711-
712-
foreach (var property in serverSideProperties)
714+
await this.TraceActivityAsync(async activity =>
713715
{
714-
if (!IsValidPropertyName(property.Key))
716+
if (!_applySSPWithQueries)
715717
{
716-
Debug.WriteLine($"Skipping invalid property name: {property.Key}");
717-
continue;
718+
return;
718719
}
719720

720-
string escapedValue = EscapeSqlString(property.Value);
721-
string query = $"SET {property.Key}={escapedValue}";
722-
statement.SqlQuery = query;
721+
var serverSideProperties = GetServerSideProperties(activity);
723722

724-
try
723+
if (serverSideProperties.Count == 0)
725724
{
726-
await statement.ExecuteUpdateAsync();
725+
return;
727726
}
728-
catch (Exception ex)
727+
728+
activity?.SetTag("connection.server_side_properties.count", serverSideProperties.Count);
729+
730+
using var statement = new DatabricksStatement(this);
731+
732+
foreach (var property in serverSideProperties)
729733
{
730-
Debug.WriteLine($"Error setting server-side property '{property.Key}': {ex.Message}");
734+
string escapedValue = EscapeSqlString(property.Value);
735+
string query = $"SET {property.Key}={escapedValue}";
736+
statement.SqlQuery = query;
737+
738+
try
739+
{
740+
await statement.ExecuteUpdateAsync();
741+
}
742+
catch (Exception ex)
743+
{
744+
activity?.AddEvent("connection.server_side_property.set_failed", [
745+
new("property_name", property.Key),
746+
new("error_message", ex.Message)
747+
]);
748+
}
731749
}
732-
}
750+
});
733751
}
734752

735-
private bool IsValidPropertyName(string propertyName)
753+
internal bool IsValidPropertyName(string propertyName)
736754
{
737-
// Allow only letters and underscores in property names
755+
// Allow property names with letters, numbers, dots, and underscores
756+
// Examples: spark.sql.adaptive.enabled, spark.executor.instances, my_property123
738757
return System.Text.RegularExpressions.Regex.IsMatch(
739758
propertyName,
740-
@"^[a-zA-Z_]+$");
759+
@"^[a-zA-Z0-9_.]+$");
741760
}
742761

743762
private string EscapeSqlString(string value)
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
/*
2+
* Copyright (c) 2025 ADBC Drivers Contributors
3+
*
4+
* This file has been modified from its original version, which is
5+
* under the Apache License:
6+
*
7+
* Licensed to the Apache Software Foundation (ASF) under one
8+
* or more contributor license agreements. See the NOTICE file
9+
* distributed with this work for additional information
10+
* regarding copyright ownership. The ASF licenses this file
11+
* to you under the Apache License, Version 2.0 (the
12+
* "License"); you may not use this file except in compliance
13+
* with the License. You may obtain a copy of the License at
14+
*
15+
* http://www.apache.org/licenses/LICENSE-2.0
16+
*
17+
* Unless required by applicable law or agreed to in writing, software
18+
* distributed under the License is distributed on an "AS IS" BASIS,
19+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20+
* See the License for the specific language governing permissions and
21+
* limitations under the License.
22+
*/
23+
24+
using System.Collections.Generic;
25+
using Apache.Arrow.Adbc.Drivers.Apache.Spark;
26+
using Apache.Arrow.Adbc.Drivers.Databricks;
27+
using Xunit;
28+
29+
namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
30+
{
31+
/// <summary>
32+
/// Unit tests for DatabricksConnection class methods.
33+
/// </summary>
34+
public class DatabricksConnectionUnitTests
35+
{
36+
/// <summary>
37+
/// Creates a minimal DatabricksConnection for testing internal methods.
38+
/// </summary>
39+
private DatabricksConnection CreateMinimalConnection()
40+
{
41+
var properties = new Dictionary<string, string>
42+
{
43+
[SparkParameters.HostName] = "test.databricks.com",
44+
[SparkParameters.Token] = "test-token"
45+
};
46+
return new DatabricksConnection(properties);
47+
}
48+
49+
/// <summary>
50+
/// Tests that valid property names with standard Spark patterns are accepted.
51+
/// </summary>
52+
[Theory]
53+
[InlineData("spark.sql.adaptive.enabled")]
54+
[InlineData("spark.executor.instances")]
55+
[InlineData("spark.databricks.delta.optimizeWrite.enabled")]
56+
[InlineData("my_custom_property")]
57+
[InlineData("property123")]
58+
[InlineData("UPPERCASE_PROPERTY")]
59+
[InlineData("mixedCase.property_123")]
60+
[InlineData("a")]
61+
[InlineData("_underscore")]
62+
[InlineData("spark.sql.shuffle.partitions")]
63+
public void IsValidPropertyName_ValidNames_ReturnsTrue(string propertyName)
64+
{
65+
// Arrange
66+
using var connection = CreateMinimalConnection();
67+
68+
// Act
69+
var result = connection.IsValidPropertyName(propertyName);
70+
71+
// Assert
72+
Assert.True(result, $"Property name '{propertyName}' should be valid");
73+
}
74+
75+
/// <summary>
76+
/// Tests that property names with invalid characters are rejected.
77+
/// </summary>
78+
[Theory]
79+
[InlineData("property-with-hyphen")]
80+
[InlineData("property with space")]
81+
[InlineData("property;with;semicolon")]
82+
[InlineData("property'with'quote")]
83+
[InlineData("property\"with\"doublequote")]
84+
[InlineData("property=with=equals")]
85+
[InlineData("property(with)parens")]
86+
[InlineData("property[with]brackets")]
87+
[InlineData("property{with}braces")]
88+
[InlineData("property/with/slash")]
89+
[InlineData("property\\with\\backslash")]
90+
[InlineData("property@with@at")]
91+
[InlineData("property#with#hash")]
92+
[InlineData("property$with$dollar")]
93+
[InlineData("property%with%percent")]
94+
[InlineData("property&with&ampersand")]
95+
[InlineData("property*with*asterisk")]
96+
[InlineData("property+with+plus")]
97+
[InlineData("property!with!exclamation")]
98+
[InlineData("property?with?question")]
99+
public void IsValidPropertyName_InvalidCharacters_ReturnsFalse(string propertyName)
100+
{
101+
// Arrange
102+
using var connection = CreateMinimalConnection();
103+
104+
// Act
105+
var result = connection.IsValidPropertyName(propertyName);
106+
107+
// Assert
108+
Assert.False(result, $"Property name '{propertyName}' should be invalid");
109+
}
110+
111+
/// <summary>
112+
/// Tests that empty or whitespace property names are rejected.
113+
/// </summary>
114+
[Theory]
115+
[InlineData("")]
116+
[InlineData(" ")]
117+
[InlineData(" ")]
118+
[InlineData("\t")]
119+
[InlineData("\n")]
120+
public void IsValidPropertyName_EmptyOrWhitespace_ReturnsFalse(string propertyName)
121+
{
122+
// Arrange
123+
using var connection = CreateMinimalConnection();
124+
125+
// Act
126+
var result = connection.IsValidPropertyName(propertyName);
127+
128+
// Assert
129+
Assert.False(result, $"Property name '{propertyName}' should be invalid");
130+
}
131+
132+
/// <summary>
133+
/// Tests that property names starting with dots or ending with dots are handled correctly.
134+
/// </summary>
135+
[Theory]
136+
[InlineData(".property")] // Starts with dot - currently allowed
137+
[InlineData("property.")] // Ends with dot - currently allowed
138+
[InlineData(".")] // Just a dot - currently allowed
139+
[InlineData("..")] // Multiple dots - currently allowed
140+
public void IsValidPropertyName_DotEdgeCases_BehaviorDocumented(string propertyName)
141+
{
142+
// Arrange
143+
using var connection = CreateMinimalConnection();
144+
145+
// Act
146+
var result = connection.IsValidPropertyName(propertyName);
147+
148+
// Assert
149+
// Current regex pattern ^[a-zA-Z0-9_.]+$ allows dots anywhere
150+
// This test documents the current behavior
151+
Assert.True(result, $"Property name '{propertyName}' is currently accepted by the regex");
152+
}
153+
154+
/// <summary>
155+
/// Tests property names that start with numbers.
156+
/// </summary>
157+
[Theory]
158+
[InlineData("123property")] // Starts with number - currently allowed
159+
[InlineData("1.property")] // Starts with number followed by dot - currently allowed
160+
public void IsValidPropertyName_StartsWithNumber_CurrentlyAllowed(string propertyName)
161+
{
162+
// Arrange
163+
using var connection = CreateMinimalConnection();
164+
165+
// Act
166+
var result = connection.IsValidPropertyName(propertyName);
167+
168+
// Assert
169+
// Current regex pattern ^[a-zA-Z0-9_.]+$ allows starting with numbers
170+
// This test documents the current behavior
171+
Assert.True(result, $"Property name '{propertyName}' is currently accepted by the regex");
172+
}
173+
}
174+
}

0 commit comments

Comments
 (0)