Skip to content

Commit fc22d12

Browse files
committed
MultiSplitter now returns group names
In order to understand from which of possibly many capturing groups a match is coming, the name of the group has been added to the output. For example: select * from GolfStage.dbo.MultiSplitter( 'Hello stuff NN111, NN112 and the thing X001 in the cloud.', '(?<stuff>[A-Z]{2}[0-9]{3})|(?<thing>[A-Z][0-9]{3})' );
1 parent fc81faa commit fc22d12

17 files changed

+79
-54
lines changed

Examples/Golf/sources/source.sql

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ GO
1616
IF Object_Id('dbo.ColumnSplitter', 'PC') IS NOT NULL
1717
DROP PROCEDURE [dbo].[ColumnSplitter];
1818
GO
19+
IF Object_Id('dbo.MultiColumnSplitter', 'PC') IS NOT NULL
20+
DROP PROCEDURE [dbo].[MultiColumnSplitter];
21+
GO
1922
IF Object_Id('dbo.IsType', 'FS') IS NOT NULL
2023
DROP FUNCTION [dbo].[IsType];
2124
GO
@@ -52,28 +55,38 @@ IF EXISTS (
5255
)
5356
BEGIN TRY
5457
ALTER ASSEMBLY Utilities
55-
FROM 'C:\Users\eldle\OneDrive\Documents\GitHub\sisula\code\Utilities' + @version + '.dll'
58+
FROM 'C:\Users\e-lronnback\GitHub\sisula\code\Utilities' + @version + '.dll'
5659
WITH PERMISSION_SET = SAFE;
5760
PRINT 'The .NET CLR for SQL Server ' + @version + ' was updated.'
5861
END TRY BEGIN CATCH
5962
DECLARE @msg VARCHAR(2000) = ERROR_MESSAGE();
60-
IF(PATINDEX('%identical%', @msg) = 0) PRINT ERROR_MESSAGE();
63+
IF(PATINDEX('%identical%', @msg) = 0)
64+
BEGIN
65+
PRINT ERROR_MESSAGE();
66+
END
67+
ELSE
68+
BEGIN TRY
69+
DROP ASSEMBLY Utilities;
70+
END TRY
71+
BEGIN CATCH
72+
PRINT ERROR_MESSAGE();
73+
END CATCH
6174
END CATCH
6275
ELSE -- assembly does not exist
6376
BEGIN TRY
6477
-- since some version of 2017 assemblies must be explicitly whitelisted
6578
IF(@version >= 2017 AND OBJECT_ID('sys.sp_add_trusted_assembly') IS NOT NULL)
6679
BEGIN
6780
CREATE TABLE #hash([hash] varbinary(64));
68-
EXEC('INSERT INTO #hash SELECT CONVERT(varbinary(64), ''0x'' + H, 1) FROM OPENROWSET(BULK ''C:\Users\eldle\OneDrive\Documents\GitHub\sisula\code\Utilities' + @version + '.SHA512'', SINGLE_CLOB) T(H);');
81+
EXEC('INSERT INTO #hash SELECT CONVERT(varbinary(64), ''0x'' + H, 1) FROM OPENROWSET(BULK ''C:\Users\e-lronnback\GitHub\sisula\code\Utilities' + @version + '.SHA512'', SINGLE_CLOB) T(H);');
6982
DECLARE @hash varbinary(64);
7083
SELECT @hash = [hash] FROM #hash;
7184
IF NOT EXISTS(SELECT [hash] FROM sys.trusted_assemblies WHERE [hash] = @hash)
7285
EXEC sys.sp_add_trusted_assembly @hash, N'Utilities';
7386
END
7487
CREATE ASSEMBLY Utilities
7588
AUTHORIZATION dbo
76-
FROM 'C:\Users\eldle\OneDrive\Documents\GitHub\sisula\code\Utilities' + @version + '.dll'
89+
FROM 'C:\Users\e-lronnback\GitHub\sisula\code\Utilities' + @version + '.dll'
7790
WITH PERMISSION_SET = SAFE;
7891
PRINT 'The .NET CLR for SQL Server ' + @version + ' was installed.'
7992
END TRY BEGIN CATCH
@@ -112,6 +125,14 @@ CREATE PROCEDURE [dbo].ColumnSplitter(
112125
)
113126
AS EXTERNAL NAME Utilities.ColumnSplitter.InitMethod;
114127
GO
128+
CREATE PROCEDURE [dbo].MultiColumnSplitter(
129+
@table AS nvarchar(4000),
130+
@column AS nvarchar(4000),
131+
@pattern AS nvarchar(4000),
132+
@includeColumns AS nvarchar(4000) = null
133+
)
134+
AS EXTERNAL NAME Utilities.MultiColumnSplitter.InitMethod;
135+
GO
115136
IF NOT EXISTS (
116137
SELECT value
117138
FROM sys.configurations
@@ -137,8 +158,8 @@ GO
137158
--
138159
-- Create: PGA_Kaggle_Stats_RawSplit
139160
--
140-
-- Generated: Tue Mar 16 13:06:49 UTC+0100 2021 by eldle
141-
-- From: WARP in the WARP domain
161+
-- Generated: Fri Feb 17 10:36:15 UTC+0100 2023 by e-lronnback
162+
-- From: TSE-5GYVY33 in the CORPNET domain
142163
--------------------------------------------------------------------------
143164
CREATE PROCEDURE [dbo].[PGA_Kaggle_CreateRawSplitTable] (
144165
@agentJobId uniqueidentifier = null,
@@ -215,8 +236,8 @@ GO
215236
-- the target of the BULK INSERT operation, since it cannot insert
216237
-- into a table with multiple columns without a format file.
217238
--
218-
-- Generated: Tue Mar 16 13:06:49 UTC+0100 2021 by eldle
219-
-- From: WARP in the WARP domain
239+
-- Generated: Fri Feb 17 10:36:15 UTC+0100 2023 by e-lronnback
240+
-- From: TSE-5GYVY33 in the CORPNET domain
220241
--------------------------------------------------------------------------
221242
CREATE PROCEDURE [dbo].[PGA_Kaggle_CreateInsertView] (
222243
@agentJobId uniqueidentifier = null,
@@ -289,8 +310,8 @@ GO
289310
-- This job may called multiple times in a workflow when more than
290311
-- one file matching a given filename pattern is found.
291312
--
292-
-- Generated: Tue Mar 16 13:06:49 UTC+0100 2021 by eldle
293-
-- From: WARP in the WARP domain
313+
-- Generated: Fri Feb 17 10:36:15 UTC+0100 2023 by e-lronnback
314+
-- From: TSE-5GYVY33 in the CORPNET domain
294315
--------------------------------------------------------------------------
295316
CREATE PROCEDURE [dbo].[PGA_Kaggle_BulkInsert] (
296317
@filename varchar(2000),
@@ -337,7 +358,7 @@ EXEC GolfDW.metadata._WorkSourceToTarget
337358
FORMAT = ''CSV'',
338359
CODEPAGE = ''ACP'',
339360
FIELDQUOTE = ''"'',
340-
FORMATFILE = ''C:\Users\eldle\OneDrive\Documents\GitHub\sisula\Examples\Golf\formats\source.xml'',
361+
FORMATFILE = ''C:\Users\e-lronnback\GitHub\sisula\Examples\Golf\formats\source.xml'',
341362
FIRSTROW = 2,
342363
TABLOCK
343364
);
@@ -421,8 +442,8 @@ GO
421442
--
422443
-- Create: PGA_Kaggle_Stats_Split
423444
--
424-
-- Generated: Tue Mar 16 13:06:49 UTC+0100 2021 by eldle
425-
-- From: WARP in the WARP domain
445+
-- Generated: Fri Feb 17 10:36:15 UTC+0100 2023 by e-lronnback
446+
-- From: TSE-5GYVY33 in the CORPNET domain
426447
--------------------------------------------------------------------------
427448
CREATE PROCEDURE [dbo].[PGA_Kaggle_CreateSplitViews] (
428449
@agentJobId uniqueidentifier = null,
@@ -564,8 +585,8 @@ GO
564585
--
565586
-- Create: PGA_Kaggle_Stats_Error
566587
--
567-
-- Generated: Tue Mar 16 13:06:49 UTC+0100 2021 by eldle
568-
-- From: WARP in the WARP domain
588+
-- Generated: Fri Feb 17 10:36:15 UTC+0100 2023 by e-lronnback
589+
-- From: TSE-5GYVY33 in the CORPNET domain
569590
--------------------------------------------------------------------------
570591
CREATE PROCEDURE [dbo].[PGA_Kaggle_CreateErrorViews] (
571592
@agentJobId uniqueidentifier = null,
@@ -650,8 +671,8 @@ GO
650671
--
651672
-- Create: PGA_Kaggle_Stats_Typed
652673
--
653-
-- Generated: Tue Mar 16 13:06:49 UTC+0100 2021 by eldle
654-
-- From: WARP in the WARP domain
674+
-- Generated: Fri Feb 17 10:36:15 UTC+0100 2023 by e-lronnback
675+
-- From: TSE-5GYVY33 in the CORPNET domain
655676
--------------------------------------------------------------------------
656677
CREATE PROCEDURE [dbo].[PGA_Kaggle_CreateTypedTables] (
657678
@agentJobId uniqueidentifier = null,
@@ -723,8 +744,8 @@ GO
723744
--
724745
-- Load: PGA_Kaggle_Stats_Split into PGA_Kaggle_Stats_Typed
725746
--
726-
-- Generated: Tue Mar 16 13:06:49 UTC+0100 2021 by eldle
727-
-- From: WARP in the WARP domain
747+
-- Generated: Fri Feb 17 10:36:15 UTC+0100 2023 by e-lronnback
748+
-- From: TSE-5GYVY33 in the CORPNET domain
728749
--------------------------------------------------------------------------
729750
CREATE PROCEDURE [dbo].[PGA_Kaggle_SplitRawIntoTyped] (
730751
@agentJobId uniqueidentifier = null,
@@ -842,8 +863,8 @@ GO
842863
-- Key: Player Name (as primary key)
843864
-- Key: Date (as primary key)
844865
--
845-
-- Generated: Tue Mar 16 13:06:49 UTC+0100 2021 by eldle
846-
-- From: WARP in the WARP domain
866+
-- Generated: Fri Feb 17 10:36:15 UTC+0100 2023 by e-lronnback
867+
-- From: TSE-5GYVY33 in the CORPNET domain
847868
--------------------------------------------------------------------------
848869
CREATE PROCEDURE [dbo].[PGA_Kaggle_AddKeysToTyped] (
849870
@agentJobId uniqueidentifier = null,

Examples/Golf/targets/target.sql

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ GO
1212
-- Map: Birth Date to PL_BID_Player_BirthDate (as static)
1313
-- Map: WorkId to Metadata_PL (as metadata)
1414
--
15-
-- Generated: Tue Mar 16 13:06:49 UTC+0100 2021 by eldle
16-
-- From: WARP in the WARP domain
15+
-- Generated: Fri Feb 17 10:36:16 UTC+0100 2023 by e-lronnback
16+
-- From: TSE-5GYVY33 in the CORPNET domain
1717
--------------------------------------------------------------------------
1818
CREATE PROCEDURE [dbo].[lPL_Player__PGA_Kaggle_Stats_Typed] (
1919
@agentJobId uniqueidentifier = null,
@@ -134,8 +134,8 @@ GO
134134
-- Map: Statistic to SGR_StatisticGroup (as natural key)
135135
-- Map: WorkId to Metadata_SGR (as metadata)
136136
--
137-
-- Generated: Tue Mar 16 13:06:49 UTC+0100 2021 by eldle
138-
-- From: WARP in the WARP domain
137+
-- Generated: Fri Feb 17 10:36:16 UTC+0100 2023 by e-lronnback
138+
-- From: TSE-5GYVY33 in the CORPNET domain
139139
--------------------------------------------------------------------------
140140
CREATE PROCEDURE [dbo].[SGR_StatisticGroup__PGA_Kaggle_Stats_Typed] (
141141
@agentJobId uniqueidentifier = null,
@@ -242,8 +242,8 @@ GO
242242
-- Map: Variable to ST_DET_Statistic_Detail (as natural key)
243243
-- Map: WorkId to Metadata_ST (as metadata)
244244
--
245-
-- Generated: Tue Mar 16 13:06:49 UTC+0100 2021 by eldle
246-
-- From: WARP in the WARP domain
245+
-- Generated: Fri Feb 17 10:36:16 UTC+0100 2023 by e-lronnback
246+
-- From: TSE-5GYVY33 in the CORPNET domain
247247
--------------------------------------------------------------------------
248248
CREATE PROCEDURE [dbo].[lST_Statistic__PGA_Kaggle_Stats_Typed] (
249249
@agentJobId uniqueidentifier = null,
@@ -354,8 +354,8 @@ GO
354354
-- Map: ME_ID to ME_ID (as surrogate key)
355355
-- Map: WorkId to Metadata_ME (as metadata)
356356
--
357-
-- Generated: Tue Mar 16 13:06:49 UTC+0100 2021 by eldle
358-
-- From: WARP in the WARP domain
357+
-- Generated: Fri Feb 17 10:36:16 UTC+0100 2023 by e-lronnback
358+
-- From: TSE-5GYVY33 in the CORPNET domain
359359
--------------------------------------------------------------------------
360360
CREATE PROCEDURE [dbo].[lME_Measurement__PGA_Kaggle_Stats_Typed__Instance] (
361361
@agentJobId uniqueidentifier = null,
@@ -575,8 +575,8 @@ GO
575575
-- Map: Date to ME_VAL_ChangedAt (as history)
576576
-- Map: WorkId to Metadata_ME (as metadata)
577577
--
578-
-- Generated: Tue Mar 16 13:06:49 UTC+0100 2021 by eldle
579-
-- From: WARP in the WARP domain
578+
-- Generated: Fri Feb 17 10:36:16 UTC+0100 2023 by e-lronnback
579+
-- From: TSE-5GYVY33 in the CORPNET domain
580580
--------------------------------------------------------------------------
581581
CREATE PROCEDURE [dbo].[lME_Measurement__PGA_Kaggle_Stats_Typed__Value] (
582582
@agentJobId uniqueidentifier = null,

Examples/Golf/workflows/workflow.sql

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,14 @@ EXEC sp_add_jobstep
3333
EXEC sp_add_jobstep
3434
@subsystem = 'PowerShell',
3535
@command = '
36-
$files = @(Get-ChildItem FileSystem::"C:\Users\eldle\OneDrive\Documents\GitHub\sisula\Examples\Golf\data\incoming" | Where-Object {$_.Name -match ".*\.csv"})
36+
$files = @(Get-ChildItem FileSystem::"C:\Users\e-lronnback\GitHub\sisula\Examples\Golf\data\incoming" | Where-Object {$_.Name -match ".*\.csv"})
3737
If ($files.length -eq 0) {
38-
Throw "No matching files were found in C:\Users\eldle\OneDrive\Documents\GitHub\sisula\Examples\Golf\data\incoming"
38+
Throw "No matching files were found in C:\Users\e-lronnback\GitHub\sisula\Examples\Golf\data\incoming"
3939
} Else {
4040
ForEach ($file in $files) {
4141
$fullFilename = $file.FullName
42-
Move-Item $fullFilename C:\Users\eldle\OneDrive\Documents\GitHub\sisula\Examples\Golf\data\work -force
43-
Write-Output "Moved file: $fullFilename to C:\Users\eldle\OneDrive\Documents\GitHub\sisula\Examples\Golf\data\work"
42+
Move-Item $fullFilename C:\Users\e-lronnback\GitHub\sisula\Examples\Golf\data\work -force
43+
Write-Output "Moved file: $fullFilename to C:\Users\e-lronnback\GitHub\sisula\Examples\Golf\data\work"
4444
}
4545
}
4646
',
@@ -71,17 +71,17 @@ EXEC sp_add_jobstep
7171
EXEC sp_add_jobstep
7272
@subsystem = 'PowerShell',
7373
@command = '
74-
$files = @(Get-ChildItem -Recurse FileSystem::"C:\Users\eldle\OneDrive\Documents\GitHub\sisula\Examples\Golf\data\work" | Where-Object {$_.Name -match ".*\.csv"})
74+
$files = @(Get-ChildItem -Recurse FileSystem::"C:\Users\e-lronnback\GitHub\sisula\Examples\Golf\data\work" | Where-Object {$_.Name -match ".*\.csv"})
7575
If ($files.length -eq 0) {
76-
Throw "No matching files were found in C:\Users\eldle\OneDrive\Documents\GitHub\sisula\Examples\Golf\data\work"
76+
Throw "No matching files were found in C:\Users\e-lronnback\GitHub\sisula\Examples\Golf\data\work"
7777
} Else {
7878
ForEach ($file in $files) {
7979
$fullFilename = $file.FullName
8080
$modifiedDate = $file.LastWriteTime
8181
Invoke-Sqlcmd "EXEC dbo.PGA_Kaggle_BulkInsert ''$fullFilename'', ''$modifiedDate'', @agentJobId = $(ESCAPE_NONE(JOBID)), @agentStepId = $(ESCAPE_NONE(STEPID))" -Database "GolfStage" -ErrorAction Stop -QueryTimeout 0
8282
Write-Output "Loaded file: $fullFilename"
83-
Move-Item $fullFilename C:\Users\eldle\OneDrive\Documents\GitHub\sisula\Examples\Golf\data\archive -force
84-
Write-Output "Moved file: $fullFilename to C:\Users\eldle\OneDrive\Documents\GitHub\sisula\Examples\Golf\data\archive"
83+
Move-Item $fullFilename C:\Users\e-lronnback\GitHub\sisula\Examples\Golf\data\archive -force
84+
Write-Output "Moved file: $fullFilename to C:\Users\e-lronnback\GitHub\sisula\Examples\Golf\data\archive"
8585
}
8686
}
8787
',
@@ -346,9 +346,9 @@ EXEC sp_update_jobstep
346346
-- The workflow definition used when generating the above
347347
DECLARE @xml XML = N'<workflow name="PGA_Kaggle_Workflow">
348348
<variable name="stage" value="GolfStage"/>
349-
<variable name="incomingPath" value="C:\Users\eldle\OneDrive\Documents\GitHub\sisula\Examples\Golf\data\incoming"/>
350-
<variable name="workPath" value="C:\Users\eldle\OneDrive\Documents\GitHub\sisula\Examples\Golf\data\work"/>
351-
<variable name="archivePath" value="C:\Users\eldle\OneDrive\Documents\GitHub\sisula\Examples\Golf\data\archive"/>
349+
<variable name="incomingPath" value="C:\Users\e-lronnback\GitHub\sisula\Examples\Golf\data\incoming"/>
350+
<variable name="workPath" value="C:\Users\e-lronnback\GitHub\sisula\Examples\Golf\data\work"/>
351+
<variable name="archivePath" value="C:\Users\e-lronnback\GitHub\sisula\Examples\Golf\data\archive"/>
352352
<variable name="filenamePattern" value=".*\.csv"/>
353353
<variable name="quitWithSuccess" value="1"/>
354354
<variable name="quitWithFailure" value="2"/>

code/Utilities.cs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ public static void FillRow(Object fromEnumeration, [SqlFacet(MaxSize = -1)] out
5454
}
5555
}
5656

57+
5758
/*
5859
* 2019-02-08 Added by Lars Rönnbäck
5960
*/
@@ -68,7 +69,7 @@ public partial class MultiSplitter {
6869
)
6970
]
7071
public static IEnumerable InitMethod([SqlFacet(MaxSize = -1)] SqlString row, SqlString pattern) {
71-
ICollection<Capture> captures = new Collection<Capture>();
72+
ICollection<Tuple<String, Capture>> named_captures = new Collection<Tuple<String,Capture>>();
7273
foreach(Match match in Regex.Matches(row.ToString(), pattern.ToString(), RegexOptions.None)) {
7374
bool first = true;
7475
foreach (Group group in match.Groups) {
@@ -77,15 +78,17 @@ public static IEnumerable InitMethod([SqlFacet(MaxSize = -1)] SqlString row, Sql
7778
}
7879
else {
7980
foreach(Capture capture in group.Captures) {
80-
captures.Add(capture);
81+
named_captures.Add(new Tuple<String, Capture>(group.Name, capture));
8182
}
8283
}
8384
}
8485
}
85-
return captures;
86+
return named_captures;
8687
}
87-
public static void FillRow(Object fromEnumeration, [SqlFacet(MaxSize = -1)] out SqlString match, out SqlInt32 index) {
88-
Capture capture = (Capture) fromEnumeration;
88+
public static void FillRow(Object fromEnumeration, [SqlFacet(MaxSize = -1)] out SqlString match, out SqlInt32 index, out SqlString group) {
89+
Tuple<String, Capture> named_capture = (Tuple<String, Capture>) fromEnumeration;
90+
group = named_capture.Item1;
91+
Capture capture = named_capture.Item2;
8992
match = (capture.Value == String.Empty) ? SqlString.Null : new SqlString(capture.Value);
9093
index = new SqlInt32(capture.Index);
9194
}

code/Utilities2008.SHA512

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0351a80879fe77e43c2ccbfc4f9c47d85adf23c5a06ff71987fce5ff525722e28cb1cececc6bfb208a52c1fae21586e1923d4f384573c260ab4e5dac7de9dfb2
1+
f787ffebdfdb9db27336c9023a2796a6348bb4543a57bd1fd319df85ef6ff9a01d744a572038fb529df095dd751236d6ee86578ebbc5033ee43314d8b25f7903

code/Utilities2008.dll

512 Bytes
Binary file not shown.

code/Utilities2012.SHA512

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
83490beffa859a518ee4fd8d4e6d877b9c226375d637ee1aebab36a28768e1dbc73ac834e596ceec5e3ec228a86b2ca8b78de48e2eb71c741513cdbe7821a81c
1+
2dfd30c3e41f7ec17684618e8983c2dbb128b98eee93a12bac74a7485aba15ff109dcf2ed649bdc26a1e5820e68f7224e3d55df038d5c323e2b167264d44ef17

code/Utilities2012.dll

512 Bytes
Binary file not shown.

code/Utilities2014.SHA512

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
5ecf3938feb356b195c9b9c56fb4b743632253733b5894525bab3c1dc857adbbf2a911df87aa1f3c58a91da320f2267341780e90064aa82de4a53b29b508b524
1+
3b5d5eabfe5cb89121dae6a28c253f9bb774f9ef19e898055f3b4b680ed7cd00517c61ae0908b7113c43315745e243378e74b4ae77a477245605bb671ef2320b

code/Utilities2014.dll

512 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)