Skip to content

Commit d45dfda

Browse files
committed
Data extraction API improvements.
1 parent 904d006 commit d45dfda

File tree

5 files changed

+220
-32
lines changed

5 files changed

+220
-32
lines changed

data-extraction/src/js/api/DataExtractorApi.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ export interface DataExtractor {
5353
* Must be unique among all data extractors.
5454
*/
5555
id: string;
56+
57+
/**
58+
* Filters the data to be extracted.
59+
*/
60+
dataCtor?: string;
5661
getExtractions(
5762
data: unknown,
5863
extractionCollector: ExtractionCollector,
@@ -73,7 +78,11 @@ export interface DataExtractorContext {
7378
*/
7479
evalFn: <TEval>(expression: string) => TEval;
7580

81+
expression: string | undefined;
82+
7683
variablesInScope: Record<string, () => unknown>;
84+
85+
extract(value: unknown): VisualizationData | undefined;
7786
}
7887

7988
export interface DataExtraction {
@@ -86,10 +95,12 @@ export interface DataExtraction {
8695
* A unique id identifying this extraction among all extractions.
8796
* Required to express extraction preferences.
8897
*/
89-
id: string;
98+
id?: string;
99+
90100
/**
91101
* A user friendly name of this extraction.
92102
*/
93-
name: string;
103+
name?: string;
104+
94105
extractData(): VisualizationData;
95106
}

data-extraction/src/js/api/DataExtractorApiImpl.ts

Lines changed: 108 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@ import {
77
ExtractionCollector,
88
DataExtractorContext,
99
} from "./DataExtractorApi";
10-
import { DataExtractorInfo } from "../../DataExtractionResult";
10+
import {
11+
DataExtractorInfo,
12+
VisualizationData,
13+
} from "../../DataExtractionResult";
1114
import { registerDefaultExtractors } from "./default-extractors";
1215
import { LoadDataExtractorsFn } from "./LoadDataExtractorsFn";
1316
import * as helpers from "../helpers";
@@ -41,36 +44,56 @@ export class DataExtractorApiImpl implements DataExtractorApi {
4144
preferredDataExtractorId: string | undefined,
4245
variablesInScope: Record<string, () => unknown>
4346
): JSONString<DataResult> {
44-
const extractions = new Array<DataExtraction>();
45-
const extractionCollector: ExtractionCollector = {
46-
addExtraction(extraction) {
47-
extractions.push(extraction);
48-
},
49-
};
50-
51-
const context: DataExtractorContext = {
52-
evalFn,
53-
variablesInScope,
54-
};
55-
56-
DataExtractorApiImpl.lastContext = context;
57-
const value = valueFn();
58-
59-
const extractors = new Array<DataExtractor>();
47+
class ContextImpl implements DataExtractorContext {
48+
constructor(
49+
public readonly variablesInScope: Record<string, () => unknown>,
50+
public readonly expression: string | undefined,
51+
public readonly evalFn: <T>(expression: string) => T,
52+
private readonly _api: DataExtractorApiImpl,
53+
private readonly _parent: ContextImpl | undefined
54+
) {}
55+
56+
get _level(): number {
57+
return this._parent ? this._parent._level + 1 : 0;
58+
}
6059

61-
for (const fn of this.extractorSources.values()) {
62-
fn((extractor) => {
63-
extractors.push(extractor);
64-
}, helpers);
60+
extract(value: any): VisualizationData | undefined {
61+
if (this._level > 10) {
62+
throw new Error(
63+
"extract() called too many times recursively"
64+
);
65+
}
66+
67+
const extractions = this._api.getExtractions(
68+
value,
69+
new ContextImpl(
70+
this.variablesInScope,
71+
undefined,
72+
this.evalFn,
73+
this._api,
74+
this
75+
)
76+
);
77+
if (extractions.length === 0) {
78+
return undefined;
79+
}
80+
return extractions[0].extractData();
81+
}
6582
}
6683

67-
for (const e of [...this.extractors.values(), ...extractors]) {
68-
e.getExtractions(value, extractionCollector, context);
69-
}
84+
const rootContext = new ContextImpl(
85+
variablesInScope,
86+
removeEnd(removeStart(valueFn.toString(), "() => ("), ")").trim(),
87+
evalFn,
88+
this,
89+
undefined
90+
);
7091

92+
DataExtractorApiImpl.lastContext = rootContext;
93+
const value = valueFn();
94+
const extractions = this.getExtractions(value, rootContext);
7195
DataExtractorApiImpl.lastContext = undefined;
7296

73-
extractions.sort((a, b) => b.priority - a.priority);
7497
let usedExtraction = extractions[0];
7598
if (!usedExtraction) {
7699
return this.toJson({ kind: "NoExtractors" } as DataResult);
@@ -87,8 +110,8 @@ export class DataExtractorApiImpl implements DataExtractorApi {
87110

88111
function mapExtractor(e: DataExtraction): DataExtractorInfo {
89112
return {
90-
id: e.id as any,
91-
name: e.name,
113+
id: e.id! as any,
114+
name: e.name!,
92115
priority: e.priority,
93116
};
94117
}
@@ -104,6 +127,50 @@ export class DataExtractorApiImpl implements DataExtractorApi {
104127
} as DataResult);
105128
}
106129

130+
public getExtractions(
131+
value: unknown,
132+
context: DataExtractorContext
133+
): DataExtraction[] {
134+
const extractions = new Array<DataExtraction>();
135+
const extractors = new Array<DataExtractor>();
136+
137+
for (const fn of this.extractorSources.values()) {
138+
fn((extractor) => {
139+
extractors.push(extractor);
140+
}, helpers);
141+
}
142+
143+
for (const e of [...this.extractors.values(), ...extractors]) {
144+
if (e.dataCtor !== undefined) {
145+
if (
146+
typeof value !== "object" ||
147+
value === null ||
148+
value.constructor.name !== e.dataCtor
149+
) {
150+
continue;
151+
}
152+
}
153+
e.getExtractions(
154+
value,
155+
{
156+
addExtraction(extraction) {
157+
if (extraction.id === undefined) {
158+
extraction.id = e.id;
159+
}
160+
if (extraction.name === undefined) {
161+
extraction.name = e.id;
162+
}
163+
extractions.push(extraction);
164+
},
165+
},
166+
context
167+
);
168+
}
169+
extractions.sort((a, b) => b.priority - a.priority);
170+
171+
return extractions;
172+
}
173+
107174
public registerDefaultExtractors(preferExisting: boolean = false): void {
108175
// TODO consider preferExisting
109176
registerDefaultExtractors(this);
@@ -120,3 +187,17 @@ export class DataExtractorApiImpl implements DataExtractorApi {
120187
}
121188
}
122189
}
190+
191+
function removeStart(str: string, start: string): string {
192+
if (str.startsWith(start)) {
193+
return str.substr(start.length);
194+
}
195+
return str;
196+
}
197+
198+
function removeEnd(str: string, end: string): string {
199+
if (str.endsWith(end)) {
200+
return str.substr(0, str.length - end.length);
201+
}
202+
return str;
203+
}

data-extraction/src/js/api/default-extractors/StringRangeExtractor.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,11 @@ export class StringRangeExtractor implements DataExtractor {
5050

5151
for (let item of (data as (number | [number, number])[]).slice(1)) {
5252
if (typeof item === "string") {
53-
item = context.evalFn(item);
53+
try {
54+
item = context.evalFn(item);
55+
} catch (e) {
56+
return;
57+
}
5458
if (item === undefined) {
5559
continue;
5660
}

data-extraction/src/js/helpers/find.ts

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,95 @@ export function find(predicate: (obj: unknown) => boolean): unknown {
4242

4343
return undefined;
4444
}
45+
46+
export function findVar(
47+
options: { nameSimilarTo?: string; ctor?: string },
48+
predicate?: (value: any) => boolean
49+
): unknown | undefined {
50+
if (!DataExtractorApiImpl.lastContext) {
51+
throw new Error("No data extractor context!");
52+
}
53+
54+
let bestValue = undefined;
55+
let bestValueScore = undefined; // minimized
56+
57+
for (const [name, value] of Object.entries(
58+
DataExtractorApiImpl.lastContext.variablesInScope
59+
)) {
60+
const v = value();
61+
if (options.ctor !== undefined) {
62+
if (
63+
typeof v !== "object" ||
64+
!v ||
65+
v.constructor.name !== options.ctor
66+
) {
67+
continue;
68+
}
69+
}
70+
if (predicate) {
71+
if (!predicate(v)) {
72+
continue;
73+
}
74+
}
75+
let score = 0;
76+
if (options.nameSimilarTo !== undefined) {
77+
score += similarityScore(name, options.nameSimilarTo);
78+
} else {
79+
return v;
80+
}
81+
if (bestValueScore === undefined || score < bestValueScore) {
82+
bestValue = v;
83+
bestValueScore = score;
84+
}
85+
}
86+
87+
return bestValue;
88+
}
89+
90+
function similarityScore(a: string, b: string): number {
91+
const distance = levenshteinDistance(a, b);
92+
93+
const aSorted = a.split("").sort().join("");
94+
const bSorted = b.split("").sort().join("");
95+
const distance2 = levenshteinDistance(aSorted, bSorted);
96+
97+
return distance * 10 + distance2;
98+
}
99+
100+
function levenshteinDistance(a: string, b: string): number {
101+
if (a.length === 0) return b.length;
102+
if (b.length === 0) return a.length;
103+
104+
const matrix = [];
105+
106+
// increment along the first column of each row
107+
let i;
108+
for (i = 0; i <= b.length; i++) {
109+
matrix[i] = [i];
110+
}
111+
112+
// increment each column in the first row
113+
let j;
114+
for (j = 0; j <= a.length; j++) {
115+
matrix[0][j] = j;
116+
}
117+
118+
// Fill in the rest of the matrix
119+
for (i = 1; i <= b.length; i++) {
120+
for (j = 1; j <= a.length; j++) {
121+
if (b.charAt(i - 1) == a.charAt(j - 1)) {
122+
matrix[i][j] = matrix[i - 1][j - 1];
123+
} else {
124+
matrix[i][j] = Math.min(
125+
matrix[i - 1][j - 1] + 1, // substitution
126+
Math.min(
127+
matrix[i][j - 1] + 1, // insertion
128+
matrix[i - 1][j] + 1
129+
)
130+
); // deletion
131+
}
132+
}
133+
}
134+
135+
return matrix[b.length][a.length];
136+
}

extension/src/VisualizationBackend/JsVisualizationSupport.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ class JsVisualizationBackend extends VisualizationBackendBase {
146146
: "undefined";
147147

148148
const body = `${getExpressionForDataExtractorApi()}.getData(
149-
e => (${expression}),
149+
() => (${expression}),
150150
expr => eval(expr),
151151
${preferredExtractorExpr},
152152
{${variableNames.map((n) => `${n}: () => ${n}`).join(",")}},
@@ -181,7 +181,7 @@ class JsVisualizationBackend extends VisualizationBackendBase {
181181
if (result.kind === "NoExtractors") {
182182
throw new Error("No extractors");
183183
} else if (result.kind === "Error") {
184-
throw new Error(result.message);
184+
throw new Error(result.message + "\n" + (result as any).stack);
185185
} else if (result.kind === "Data") {
186186
return {
187187
kind: "data",

0 commit comments

Comments
 (0)