Skip to content

Commit ea91b15

Browse files
committed
add draft of data convert command
1 parent 5da0559 commit ea91b15

File tree

5 files changed

+211
-2
lines changed

5 files changed

+211
-2
lines changed

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ dotnet tool install --global TALXIS.CLI
2525

2626
After installation, you can run the CLI using the `txc` command from any terminal.
2727

28+
To update the TALXIS CLI to the latest version, use the following command:
29+
30+
```sh
31+
dotnet tool update --global TALXIS.CLI
32+
```
33+
2834
## Command Groups
2935

3036
The CLI is organized into modular command groups. Each group provides a set of related commands.
@@ -33,6 +39,16 @@ The CLI is organized into modular command groups. Each group provides a set of r
3339

3440
Data-related utilities for ETL, Power Query, and automation scenarios.
3541

42+
#### `convert` command
43+
44+
Converts tables from an Excel `.xlsx` file into a structured CMT format. Each table in the spreadsheet is exported as an `<entity>` in the XML, with columns as fields and rows as records.
45+
46+
**Usage:**
47+
48+
```sh
49+
txc data convert --input <export.xlsx> --output <data.xml>
50+
```
51+
3652
#### `server` command
3753

3854
Starts a simple local HTTP server exposing endpoints for ETL/data transformation tasks. Useful for integrating with Power Query or other local ETL tools.
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
using DotMake.CommandLine;
2+
using System;
3+
4+
namespace TALXIS.CLI.Data;
5+
6+
[CliCommand(
7+
Name = "convert",
8+
Description = "Convert tables from an XLSX file to XML."
9+
)]
10+
public class ConvertDataCliCommand
11+
{
12+
[CliOption(
13+
Name = "--input",
14+
Description = "Path to the input XLSX file.",
15+
Required = true
16+
)]
17+
public string? InputPath { get; set; }
18+
19+
[CliOption(
20+
Name = "--output",
21+
Description = "Path to the output XML file.",
22+
Required = true
23+
)]
24+
public string? OutputPath { get; set; }
25+
26+
public void Run(CliContext context)
27+
{
28+
if (string.IsNullOrWhiteSpace(InputPath) || string.IsNullOrWhiteSpace(OutputPath))
29+
{
30+
Console.WriteLine("Both --input and --output must be specified.");
31+
return;
32+
}
33+
34+
var xEntities = new System.Xml.Linq.XElement("entities",
35+
new System.Xml.Linq.XAttribute(System.Xml.Linq.XNamespace.Xmlns + "xsd", "http://www.w3.org/2001/XMLSchema"),
36+
new System.Xml.Linq.XAttribute(System.Xml.Linq.XNamespace.Xmlns + "xsi", "http://www.w3.org/2001/XMLSchema-instance"),
37+
new System.Xml.Linq.XAttribute("timestamp", DateTime.UtcNow.ToString("o"))
38+
);
39+
40+
try
41+
{
42+
using (var doc = DocumentFormat.OpenXml.Packaging.SpreadsheetDocument.Open(InputPath, false))
43+
{
44+
var workbookPart = doc.WorkbookPart;
45+
if (workbookPart == null)
46+
{
47+
Console.WriteLine("Invalid XLSX file: missing workbook part.");
48+
return;
49+
}
50+
var sstPart = workbookPart.GetPartsOfType<DocumentFormat.OpenXml.Packaging.SharedStringTablePart>().FirstOrDefault();
51+
var sst = sstPart?.SharedStringTable;
52+
var sheets = workbookPart.Workbook?.Sheets?.Elements<DocumentFormat.OpenXml.Spreadsheet.Sheet>() ?? Array.Empty<DocumentFormat.OpenXml.Spreadsheet.Sheet>();
53+
54+
foreach (var wsPart in workbookPart.WorksheetParts)
55+
{
56+
var ws = wsPart.Worksheet;
57+
var sheetId = workbookPart.GetIdOfPart(wsPart);
58+
var sheet = sheets.FirstOrDefault(s => s.Id == sheetId);
59+
if (sheet == null) continue;
60+
61+
// Find all tables in this worksheet
62+
var tableParts = wsPart.TableDefinitionParts;
63+
foreach (var tablePart in tableParts)
64+
{
65+
var table = tablePart.Table;
66+
var tableName = table?.Name?.Value ?? "Table";
67+
var entityElem = new System.Xml.Linq.XElement("entity",
68+
new System.Xml.Linq.XAttribute("name", tableName),
69+
new System.Xml.Linq.XAttribute("displayname", tableName)
70+
);
71+
var recordsElem = new System.Xml.Linq.XElement("records");
72+
73+
// Get table range
74+
var refRange = table?.Reference?.Value;
75+
if (string.IsNullOrWhiteSpace(refRange))
76+
continue;
77+
var (startCol, startRow, endCol, endRow) = ParseRange(refRange);
78+
79+
// Get header row (first row in range)
80+
var headerRow = GetRow(ws, startRow);
81+
if (headerRow == null)
82+
continue;
83+
var colNames = new List<string>();
84+
for (int col = startCol; col <= endCol; col++)
85+
{
86+
var cell = GetCell(headerRow, col);
87+
var colName = GetCellValue(cell, sst) ?? $"Column{col}";
88+
colNames.Add(colName);
89+
}
90+
91+
// Data rows
92+
for (uint rowIdx = startRow + 1; rowIdx <= endRow; rowIdx++)
93+
{
94+
var row = GetRow(ws, rowIdx);
95+
if (row == null) continue;
96+
var recordElem = new System.Xml.Linq.XElement("record",
97+
new System.Xml.Linq.XAttribute("id", Guid.NewGuid().ToString())
98+
);
99+
for (int col = startCol, i = 0; col <= endCol && i < colNames.Count; col++, i++)
100+
{
101+
var cell = GetCell(row, col);
102+
var value = GetCellValue(cell, sst) ?? string.Empty;
103+
recordElem.Add(new System.Xml.Linq.XElement("field",
104+
new System.Xml.Linq.XAttribute("name", colNames[i]),
105+
new System.Xml.Linq.XAttribute("value", value)
106+
));
107+
}
108+
recordsElem.Add(recordElem);
109+
}
110+
entityElem.Add(recordsElem);
111+
entityElem.Add(new System.Xml.Linq.XElement("m2mrelationships"));
112+
xEntities.Add(entityElem);
113+
}
114+
}
115+
}
116+
}
117+
catch (Exception ex)
118+
{
119+
Console.WriteLine($"Error processing XLSX: {ex.Message}");
120+
return;
121+
}
122+
123+
var xdoc = new System.Xml.Linq.XDocument(xEntities);
124+
try
125+
{
126+
xdoc.Save(OutputPath);
127+
Console.WriteLine($"Converted '{InputPath}' to '{OutputPath}'.");
128+
}
129+
catch (Exception ex)
130+
{
131+
Console.WriteLine($"Error saving XML: {ex.Message}");
132+
}
133+
}
134+
135+
// Helpers for Open XML SDK
136+
private static (int startCol, uint startRow, int endCol, uint endRow) ParseRange(string range)
137+
{
138+
var parts = range.Split(':');
139+
var (startCol, startRow) = ParseCellRef(parts[0]);
140+
var (endCol, endRow) = ParseCellRef(parts[1]);
141+
return (startCol, startRow, endCol, endRow);
142+
}
143+
144+
private static (int col, uint row) ParseCellRef(string cellRef)
145+
{
146+
int col = 0, i = 0;
147+
while (i < cellRef.Length && char.IsLetter(cellRef[i]))
148+
{
149+
col = col * 26 + (char.ToUpper(cellRef[i]) - 'A' + 1);
150+
i++;
151+
}
152+
uint row = uint.Parse(cellRef.Substring(i));
153+
return (col, row);
154+
}
155+
156+
private static DocumentFormat.OpenXml.Spreadsheet.Row? GetRow(DocumentFormat.OpenXml.Spreadsheet.Worksheet ws, uint rowIndex)
157+
{
158+
if (ws == null) return null;
159+
return ws.Descendants<DocumentFormat.OpenXml.Spreadsheet.Row>().FirstOrDefault(r => r.RowIndex != null && r.RowIndex.Value == rowIndex);
160+
}
161+
162+
private static DocumentFormat.OpenXml.Spreadsheet.Cell? GetCell(DocumentFormat.OpenXml.Spreadsheet.Row? row, int colIndex)
163+
{
164+
if (row == null) return null;
165+
string colRef = GetColumnLetter(colIndex) + row.RowIndex;
166+
return row.Elements<DocumentFormat.OpenXml.Spreadsheet.Cell>().FirstOrDefault(c => c.CellReference?.Value == colRef);
167+
}
168+
169+
private static string GetColumnLetter(int colIndex)
170+
{
171+
string col = "";
172+
while (colIndex > 0)
173+
{
174+
int rem = (colIndex - 1) % 26;
175+
col = (char)('A' + rem) + col;
176+
colIndex = (colIndex - 1) / 26;
177+
}
178+
return col;
179+
}
180+
181+
private static string? GetCellValue(DocumentFormat.OpenXml.Spreadsheet.Cell? cell, DocumentFormat.OpenXml.Spreadsheet.SharedStringTable? sst)
182+
{
183+
if (cell == null) return null;
184+
var value = cell.CellValue?.InnerText;
185+
if (cell.DataType?.Value == DocumentFormat.OpenXml.Spreadsheet.CellValues.SharedString && sst != null)
186+
{
187+
if (int.TryParse(value, out int sstIdx) && sstIdx >= 0 && sstIdx < sst.Count())
188+
return sst.ElementAt(sstIdx).InnerText;
189+
}
190+
return value;
191+
}
192+
}

src/TALXIS.CLI.Data/DataCliCommand.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ namespace TALXIS.CLI.Data;
44

55
[CliCommand(
66
Description = "Data-related utilities for ETL, Power Query and migration scenarios.",
7-
Children = new[] { typeof(ServerCliCommand) }
7+
Children = new[] { typeof(ServerCliCommand), typeof(ConvertDataCliCommand) }
88
)]
99
public class DataCliCommand
1010
{

src/TALXIS.CLI.Data/TALXIS.CLI.Data.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
</PropertyGroup>
88

99
<ItemGroup>
10+
<PackageReference Include="DocumentFormat.OpenXml" Version="3.3.0" />
1011
<PackageReference Include="DotMake.CommandLine" Version="2.6.7" />
1112
</ItemGroup>
1213

src/TALXIS.CLI/TxcCliCommand.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ public class TxcCliCommand
1010
{
1111
public void Run(CliContext context)
1212
{
13-
context.ShowHierarchy();
13+
context.ShowHelp();
1414
}
1515
}

0 commit comments

Comments
 (0)