Skip to content

Commit 80f18e6

Browse files
committed
Add parse_text_table()
1 parent f1dd95e commit 80f18e6

File tree

4 files changed

+232
-1
lines changed

4 files changed

+232
-1
lines changed

src/main/java/com/laytonsmith/core/functions/CompositeFunction.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
package com.laytonsmith.core.functions;
22

33
import com.laytonsmith.PureUtilities.Common.StreamUtils;
4+
import com.laytonsmith.core.MSLog;
45
import com.laytonsmith.core.MethodScriptCompiler;
56
import com.laytonsmith.core.ParseTree;
7+
import com.laytonsmith.core.Prefs;
68
import com.laytonsmith.core.Script;
79
import com.laytonsmith.core.compiler.analysis.ParamDeclaration;
810
import com.laytonsmith.core.compiler.analysis.Scope;
@@ -19,6 +21,7 @@
1921
import com.laytonsmith.core.exceptions.ConfigRuntimeException;
2022
import com.laytonsmith.core.exceptions.FunctionReturnException;
2123
import com.laytonsmith.core.natives.interfaces.Mixed;
24+
import java.io.File;
2225

2326
import java.util.HashMap;
2427
import java.util.Map;
@@ -40,13 +43,17 @@ public final Mixed exec(Target t, Environment env, Mixed... args) throws ConfigR
4043
ParseTree tree;
4144
// TODO: Ultimately, this is not scalable. We need to compile and cache these scripts at Java compile time,
4245
// not at runtime the first time a function is used. This is an easier first step though.
46+
File debugFile = null;
47+
if(Prefs.DebugMode()) {
48+
debugFile = new File("/NATIVE-MSCRIPT/" + getName());
49+
}
4350
if(!CACHED_SCRIPTS.containsKey(this.getClass())) {
4451
try {
4552

4653
String script = script();
4754
Scope rootScope = new Scope();
4855
rootScope.addDeclaration(new ParamDeclaration("@arguments", CArray.TYPE, Target.UNKNOWN));
49-
tree = MethodScriptCompiler.compile(MethodScriptCompiler.lex(script, env, null, true),
56+
tree = MethodScriptCompiler.compile(MethodScriptCompiler.lex(script, env, debugFile, true),
5057
env, env.getEnvClasses(), new StaticAnalysis(rootScope, true))
5158
// the root of the tree is null, so go ahead and pull it up
5259
.getChildAt(0);
@@ -77,6 +84,10 @@ public final Mixed exec(Target t, Environment env, Mixed... args) throws ConfigR
7784
} catch (FunctionReturnException ex) {
7885
ret = ex.getReturn();
7986
} catch (ConfigRuntimeException ex) {
87+
if(Prefs.DebugMode()) {
88+
MSLog.GetLogger().e(MSLog.Tags.GENERAL, "Possibly false stacktrace, could be internal error",
89+
ex.getTarget());
90+
}
8091
if(gEnv.GetStackTraceManager().getCurrentStackTrace().isEmpty()) {
8192
ex.setTarget(t);
8293
ConfigRuntimeException.StackTraceElement ste = new ConfigRuntimeException

src/main/java/com/laytonsmith/core/functions/DataTransformations.java

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,4 +525,50 @@ public Version since() {
525525
}
526526

527527
}
528+
529+
@api
530+
public static class parse_text_table extends CompositeFunction {
531+
532+
@Override
533+
public Class<? extends CREThrowable>[] thrown() {
534+
return new Class[]{CREFormatException.class};
535+
}
536+
537+
@Override
538+
public boolean isRestricted() {
539+
return false;
540+
}
541+
542+
@Override
543+
public Boolean runAsync() {
544+
return null;
545+
}
546+
547+
548+
@Override
549+
public String getName() {
550+
return "parse_text_table";
551+
}
552+
553+
@Override
554+
public Integer[] numArgs() {
555+
return new Integer[]{1, 2};
556+
}
557+
558+
@Override
559+
public String docs() {
560+
return getBundledDocs();
561+
}
562+
563+
@Override
564+
public Version since() {
565+
return MSVersion.V3_3_4;
566+
}
567+
568+
@Override
569+
protected String script() {
570+
return getBundledCode();
571+
}
572+
573+
}
528574
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
array {string table, [array options]} Parses tabular data into an array ----
2+
3+
Given a string such as:
4+
5+
<%PRE|
6+
column1 column2 column3
7+
------- ------- -------
8+
data a1 data a2 data a3
9+
data b1 data b2 data b3
10+
%>
11+
12+
this function will parse the data into an associative array:
13+
14+
<%CODE|
15+
array('column1': array('data a1', 'data b1'), 'column2': array('data a2', 'data b2'), 'column3': array('data a3', 'data b3'))
16+
%>
17+
18+
The second parameter is an array of options, which can allow for more flexible input, though is optional, and has
19+
default values for all parameters.
20+
21+
{|
22+
|-
23+
! scope="col" width="6%" | Setting
24+
! scope="col" width="10%" | Type
25+
! scope="col" width="6%" | Default
26+
! scope="col" width="78%" | Description
27+
|-
28+
| columns
29+
| array
30+
| null
31+
| If the string doesn't have column headings in the first line, these can be provided as an array here. Note that if you
32+
provide this parameter, columnWidth is a required parameter as well.
33+
|-
34+
| columnWidth
35+
| array
36+
| null
37+
| For data that isn't consistently formatted, you may need to provide your own values for the column widths. Normally,
38+
this is calculated automatically based on the first and second lines, but if those don't match the data, or aren't
39+
provided, you need to provide this manually. This should be an array of the same size or one less of the columns option,
40+
and should contain the width of each column, optionally skipping the last.
41+
For instance, in the example table shown above, the width should be array(16, 20) or array(16, 20, 7). If the last value
42+
is skipped, this means "the rest of the line".
43+
|-
44+
| tabWidth
45+
| int
46+
| 4
47+
| Before converting the data, all tabs are normalized to spaces based on the tab width of the line. For instance, if the
48+
line of data is <pre>"a\tb\tc"</pre> then this will be converted to <pre>"a b c"</pre>, and then the column width
49+
data is used. In cases where data is separated using exclusively spaces this setting won't matter, as the column width
50+
and data should line up in any case. However, if tabs are used, it may misformat depending on the tab width assumptions
51+
that the data originated from. If you can control the data, it is more reliable to output data using spaces rather than
52+
tabs, or use a tab width of 4.
53+
|-
54+
| skipEmptyLines
55+
| boolean
56+
| true
57+
| If true, empty lines are totally skipped. If false, blank lines will add zero width strings in all the columns in their place.
58+
|}
59+
60+
When using the automatic column width detection, it isn't required to have any particular character used as the header separator
61+
in the second line. Nor is it required to fill the line. It's merely required to have one or more space between each
62+
column, and then the column width is measured between the start of each character sequence. For instance, the following table
63+
would be properly parsed as well:
64+
65+
<%PRE|
66+
column1 column2 column3
67+
---------- ------- -
68+
a b c
69+
d e f
70+
%>
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
string @data = @arguments[0];
2+
array @options = array_get(@arguments, 1, associative_array());
3+
4+
array @ret = associative_array();
5+
array @columns = array_get(@options, 'columns', array())[];
6+
array @columnWidth = array_get(@options, 'columnWidth', array())[];
7+
int @tabWidth = array_get(@options, 'tabWidth', 4);
8+
boolean @skipEmptyLines = array_get(@options, 'skipEmptyLines', true);
9+
10+
array @lines = reg_split('\n|\r\n|\n\r', @data);
11+
12+
closure @normalizeLine = iclosure(@line, @tabWidth) {
13+
@output = "";
14+
for(@i = 0, @i < length(@line), @i++) {
15+
if(@line[@i] != '\t') {
16+
@output .= @line[@i];
17+
} else {
18+
@output .= string_multiply(" ", @tabWidth - (length(@output) % @tabWidth));
19+
}
20+
}
21+
return(@output);
22+
};
23+
24+
closure @splitLine = iclosure(@line, @columnWidths) {
25+
array @sections = array();
26+
@lastX = 0;
27+
foreach(@width in @columnWidths) {
28+
try {
29+
if(@width == math_const('INFINITY') || @lastX + @width > length(@line)) {
30+
@sections[] = substr(@line, @lastX);
31+
} else {
32+
@sections[] = substr(@line, @lastX, @lastX + @width);
33+
}
34+
} catch (Exception @ex) {
35+
// The line ends before the last column, fill with empty cells.
36+
@sections[] = "";
37+
}
38+
@lastX += @width
39+
}
40+
return(@sections);
41+
};
42+
43+
if(array_size(@columns) != 0) {
44+
// Validate input args
45+
if(array_size(@columnWidth) < array_size(@columns)) {
46+
@columnWidth[] = math_const('INFINITY');
47+
}
48+
if(array_size(@columnWidth) != array_size(@columns)) {
49+
throw(FormatException, "columnWidth must be the same size as, or one less than the size of the columns array.");
50+
}
51+
} else {
52+
if(array_size(@lines) < 2) {
53+
throw(FormatException, "The input data must have at least 2 lines, the column names, and the header separator.");
54+
}
55+
// We need to calculate the columns and columnWidth ourselves.
56+
// The general approach here is to simply count the columns between
57+
// the beginning of the line and the start of the first character after
58+
// a space/tab character. This determines the width, then we go back and
59+
// use that data to calculate the values that go in @columns based on the first line.
60+
@columnNames = execute(@lines[0], @tabWidth, @normalizeLine);
61+
@columnSeparator = execute(@lines[1], @tabWidth, @normalizeLine);
62+
@inSpaces = false;
63+
@width = 0;
64+
for(@i = 0, @i < length(@columnSeparator), @i++) {
65+
@char = @columnSeparator[@i];
66+
if(@inSpaces && @char != ' ') {
67+
// New column here. Finalize this column and reset.
68+
@columnWidth[] = @width;
69+
@width = 0;
70+
@inSpaces = false;
71+
} else if(@char == ' ') {
72+
@inSpaces = true;
73+
}
74+
@width++;
75+
}
76+
// Push infinity on the end, which is the last column width
77+
@columnWidth[] = math_const('INFINITY');
78+
// Now we know the column widths, parse the column names
79+
@columnNames = array_map(execute(@columnNames, @columnWidth, @splitLine), closure(@item) {return(trim(@item))});
80+
81+
// remove the first two lines
82+
array_remove(@lines, 0);
83+
array_remove(@lines, 0);
84+
}
85+
86+
foreach(@column in @columnNames) {
87+
@ret[@column] = array();
88+
}
89+
90+
foreach(@line in @lines) {
91+
if(@skipEmptyLines) {
92+
if(trim(@line) == "") {
93+
continue();
94+
}
95+
}
96+
@columns = execute(execute(@line, @tabWidth, @normalizeLine), @columnWidth, @splitLine);
97+
for(@i = 0, @i < array_size(@columns), @i++) {
98+
@ret[@columnNames[@i]][] = trim(@columns[@i]);
99+
}
100+
}
101+
102+
return(@ret);
103+
104+
// DONE??

0 commit comments

Comments
 (0)