Skip to content

Commit b49759e

Browse files
committed
Using guessed formats to guide the user in picking formats.
Includes a few bug fixes to the guesser!
1 parent 4f7dd3f commit b49759e

File tree

9 files changed

+122
-69
lines changed

9 files changed

+122
-69
lines changed

lib/importer/nunjucks/importer/macros/format_picker.njk

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,21 +59,26 @@
5959
</thead>
6060
<tbody class="govuk-table__body">
6161
{% set mappingsLen = mapping | length %}
62+
{% set possibleFormatsByColumn = importerPossibleColumnFormats(params.data) %}
6263
{% for h in headings.data %}
6364
{% set hIndex = loop.index0 %}
6465
{% set currentValue = importerErrorMappingData(error, hIndex) %}
65-
{% set possibleFormats = importerPossibleColumnFormats(params.data, hIndex) %}
66+
{% set possibleFormats = possibleFormatsByColumn[hIndex] %}
6667

6768
<tr class="govuk-table__row" id="field-{{ h.name | slugify }}">
6869
<th scope="row" class="govuk-table__header">{{ h.name }}</th>
6970
<td class="govuk-table__cell">{{ h.examples }}</td>
7071
<td class="govuk-table__cell govuk-table__cell--numeric">
7172
{% if possibleFormats %}
7273
<select class="govuk-select" style="float: right;" name="format-{{h.index}}">
73-
<option name=""></option>
7474
{% for format in possibleFormats %}
75-
<option value="{{format.name}}" {% if format.name == currentValue %}selected{% endif %}>
76-
{{format.displayName}}
75+
<option value="{{format.name}}"
76+
{% if format.name == currentValue %}selected{% endif %}>
77+
{% if format.likely %}
78+
{{format.displayName}} (*)
79+
{% else %}
80+
{{format.displayName}}
81+
{% endif %}
7782
</option>
7883
{% endfor %}
7984
</select>

lib/importer/nunjucks/views/error-handling/server-error.njk

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,6 @@
4545
{% endblock %}
4646

4747

48-
{% block footer %}
49-
{{ govukFooter({}) }}
50-
{% endblock %}
51-
52-
5348
{% block pageScripts %}
5449
<script>
5550
;(() => {

lib/importer/src/dudk/backend.js

Lines changed: 41 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ exports.SessionSetFile = (sid, filename) => {
7070
///
7171

7272
exports.SessionSetAttributes = (sid, attrDefs) => {
73-
sessionStore.apply(sid, (s) => { s.attribudeDefs = attrDefs })
73+
sessionStore.apply(sid, (s) => { s.attributeDefs = attrDefs })
7474
}
7575

7676
exports.SessionSetHeaderRange = (sid, range) => {
@@ -124,25 +124,57 @@ exports.SessionSetFormatChoices = (sid, choices) => {
124124
// have the required information.
125125
exports.SessionGetColumnsNeedingFormats = (sid) => {
126126
const session = sessionStore.get(sid);
127-
const attributes = session.attribudeDefs;
127+
const attributes = session.attributeDefs;
128128
const mappings = session.mappingRules;
129129
const supportedTypes = attributeTypes.supportedTypes;
130130

131131
let columnsNeedingFormats = new Array();
132132

133133
Object.entries(mappings).forEach((entry) => {
134134
const [colIdx, columnAttribute] = entry;
135-
const columnDef = attributes.find((attr) => attr.name == columnAttribute);
136-
const columnTypeName = columnDef.type;
137-
const columnType = supportedTypes.get(columnTypeName);
138-
if (columnType.formats) {
139-
columnsNeedingFormats.push(colIdx);
135+
if(columnAttribute != "") {
136+
const columnDef = attributes.find((attr) => attr.name == columnAttribute);
137+
const columnTypeName = columnDef.type;
138+
const columnType = supportedTypes.get(columnTypeName);
139+
if (columnType.formats) {
140+
columnsNeedingFormats.push(colIdx);
141+
}
140142
}
141143
});
142144

143145
return columnsNeedingFormats;
144146
};
145147

148+
// Given guesses as returned by SessionGuessTypes and a list of domain-model
149+
// fields (with name and type fields), returns the type guesses with each column
150+
// augmented with a `fields` field - a Map mapping likely field names for this
151+
// column to an array of possible formats for that field.
152+
153+
exports.SessionSuggestFields = (sid, typeGuesses, domainModelFields) => {
154+
let result = [];
155+
156+
typeGuesses.forEach((colTypeGuess) => {
157+
let fieldFormats = new Map(); // Map from field names to likely formats
158+
// Examine every field, and check if they have a type that this column might
159+
// contain
160+
domainModelFields.forEach((field) => {
161+
if(colTypeGuess.types.has(field.type)) {
162+
// Add this field to the possibilities for the column, using the guessed
163+
// formats
164+
fieldFormats.set(field.name, colTypeGuess.types.get(field.type));
165+
}
166+
});
167+
168+
// Extend the column type guess with the "fields" field
169+
let typeGuessesAndFields = Object.assign({}, colTypeGuess);
170+
typeGuessesAndFields.fields = fieldFormats;
171+
result.push(typeGuessesAndFields);
172+
});
173+
174+
return result;
175+
};
176+
177+
146178
///
147179
/// Information about the loaded input file
148180
///
@@ -514,7 +546,7 @@ exports.SessionGuessTypes = (sid, range) => {
514546
}
515547

516548
// How many columns do we have?
517-
const columns = range.end.column-range.start.column;
549+
const columns = (range.end.column-range.start.column)+1;
518550

519551
let guesses = new Array(columns);
520552
if (range.end.row < range.start.row) {
@@ -535,7 +567,7 @@ exports.SessionGuessTypes = (sid, range) => {
535567
const merges = sheet["!merges"];
536568

537569
// Examine each row in turn, looking at the columns in parallel
538-
for (let row = range.start.row; row < range.end.row; row++) {
570+
for (let row = range.start.row; row <= range.end.row; row++) {
539571
let rowData = extractColsFromRow(getMergedRow(data, merges, row),
540572
range.start.column, range.end.column + 1);
541573
for(let col = 0; col < columns; col++) {
@@ -590,34 +622,6 @@ exports.SessionGetSupportedTypes = (sid) => {
590622
return attributeTypes.supportedTypes;
591623
};
592624

593-
// Given guesses as returned by SessionGuessTypes and a list of domain-model
594-
// fields (with name and type fields), returns the type guesses with each column
595-
// augmented with a `fields` field - a Map mapping likely field names for this
596-
// column to an array of possible formats for that field.
597-
598-
exports.SessionSuggestFields = (sid, typeGuesses, domainModelFields) => {
599-
let result = [];
600-
601-
typeGuesses.forEach((colTypeGuess) => {
602-
let fieldFormats = new Map(); // Map from field names to likely formats
603-
// Examine every field, and check if they have a type that this column might
604-
// contain
605-
domainModelFields.forEach((field) => {
606-
if(colTypeGuess.types.has(field.type)) {
607-
// Add this field to the possibilities for the column, using the guessed
608-
// formats
609-
fieldFormats.set(field.name, colTypeGuess.types.get(field.type));
610-
}
611-
});
612-
613-
// Extend the column type guess with the "fields" field
614-
let typeGuessesAndFields = Object.assign({}, colTypeGuess);
615-
typeGuessesAndFields.fields = fieldFormats;
616-
result.push(typeGuessesAndFields);
617-
});
618-
619-
return result;
620-
};
621625

622626
// Return the unique values in each column in the range. Return no more than
623627
// maxValues values for any given column. Return format is an array, one entry

lib/importer/src/dudk/backend.test.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,7 @@ test('guessing types', () => {
603603
const dataRange = {
604604
sheet: 'Sheet1',
605605
start: { row: 1, column: 0 },
606-
end: { row: 6, column: 9 }
606+
end: { row: 6, column: 8 }
607607
};
608608

609609
const guesses = backend.SessionGuessTypes(sid, dataRange);

lib/importer/src/dudk/types/attribute-types.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ exports.supportedTypes = new Map([
111111
["boolean", {displayName: "Y/N", description: "A 'Yes' or 'No'"}],
112112
["date", {displayName: "Date", description: "A full date (eg Year, Month and Day number)",
113113
formats: new Map([
114-
["native", {displayName: "Spreadsheet", description: "A date cell in a spreadsheet"}],
114+
["native", {displayName: "Spreadsheet Date", description: "A date cell in a spreadsheet"}],
115115
["ymd", {displayName: "Y/M/D", description: "A date written in Year, Month, Day order"}],
116116
["ydm", {displayName: "Y/D/M", description: "A date written in Year, Day, Month order"}],
117117
["dmy", {displayName: "D/M/Y", description: "A date written in Day, Month, Year order"}],

lib/importer/src/dudk/types/guesser.js

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,18 @@ const TYPE_PATTERNS = [
3535

3636
exports.TYPE_PATTERNS = TYPE_PATTERNS;
3737

38+
// Does the format string look like a date? See "Common Date-Time Formats" at
39+
// https://docs.sheetjs.com/docs/csf/features/dates for the source of these
40+
// patterns.
41+
const formatStringPartRE = new RegExp("[^a-zA-Z]+");
42+
// Note that m and mm can also represent minutes in time formats
43+
const formatStringDateParts = ["yy","yyyy","m","mm","mmm","mmmm","mmmmm","d","dd","ddd","dddd"];
44+
function isDateFormat(fmt) {
45+
const parts = fmt.split(formatStringPartRE);
46+
// Is every token in the format string valid for a date format?
47+
return parts.every((part) => formatStringDateParts.includes(part));
48+
}
49+
3850
// Given a sheet.js field, return two values. First is a list of possible types
3951
// for it, as a map from type names to either:
4052

@@ -64,6 +76,13 @@ exports.ListPossibleTypes = (field) => {
6476
case "z": // Empty cell
6577
// Either way, we can't narrow the set of possible types, but it's a blank cell
6678
return [false, true];
79+
case "d": // Date or time or both
80+
if(isDateFormat(field.z)) {
81+
possibleTypes.set("date", ["native"]);
82+
}
83+
// FIXME: Add cases for time formats, eg "h:mm", when we support them
84+
return [possibleTypes, isBlank];
85+
break;
6786
case "s":
6887
// String
6988
if(isBlank) {

lib/importer/src/functions.js

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -56,30 +56,54 @@ const importerErrorMappingData = (error, key) => {
5656

5757
//--------------------------------------------------------------------
5858
// As long as column->field mappings have been set up, this function will return
59-
// a list of possible formats for a specified column (by index). The return
60-
// value will be false if the column is mapped to a field whose type does not
61-
// require formats, otherwise it will be a list of objects with 'name' (internal
62-
// code), 'displayName' (human-facing name) and 'description' (longer description) fields.
59+
// an array indexed on column, with each element being list of possible formats
60+
// for that column. The array element for a column will be false if the column is mapped to a
61+
// field whose type does not require formats, otherwise it will be a list of
62+
// objects with 'name' (internal code), 'displayName' (human-facing name) and
63+
// 'description' (longer description) fields.
6364
// --------------------------------------------------------------------
64-
const importerPossibleColumnFormats = (data, index) => {
65+
const importerPossibleColumnFormats = (data) => {
66+
// FIXME: This should probably move into the backend, as a SessionSuggestFormats function
6567
const session_data = data[IMPORTER_SESSION_KEY];
6668
const session = new session_lib.Session(session_data);
6769
const supportedTypes = backend_lib.SessionGetSupportedTypes(session.backendSid);
6870
const mappings = backend_lib.SessionGetMappingRules(session.backendSid);
69-
const columnField = mappings[index];
70-
const columnDef = session.fields.find((field) => field.name == columnField);
71-
const columnTypeName = columnDef.type;
72-
const columnType = supportedTypes.get(columnTypeName);
73-
if (columnType.formats) {
74-
const options = Array.from(columnType.formats.entries()).map((fmtEntry) => ({
75-
name: fmtEntry[0],
76-
displayName: fmtEntry[1].displayName,
77-
description: fmtEntry[1].description
78-
}));
79-
return options;
80-
} else {
81-
return false;
82-
}
71+
const hRange = backend_lib.SessionGetHeaderRange(session.backendSid, session.sheet)
72+
const fRange = backend_lib.SessionGetFooterRange(session.backendSid, session.sheet)
73+
const dataRange = backend_lib.SessionSuggestDataRange(session.backendSid, hRange, fRange);
74+
const typeGuesses = backend_lib.SessionGuessTypes(session.backendSid, dataRange);
75+
76+
return Object.entries(mappings).map((entry) => {
77+
const [colIndex, columnField] = entry;
78+
if (!columnField) return false; // Skip unmapped columns
79+
const columnGuesses = typeGuesses[colIndex];
80+
const columnDef = session.fields.find((field) => field.name == columnField);
81+
const columnTypeName = columnDef.type;
82+
const columnType = supportedTypes.get(columnTypeName);
83+
let likelyFormats = new Set(); // Names of formats this column is likely to have, according to the guesser
84+
if (columnGuesses.types && columnGuesses.types.has(columnTypeName)) {
85+
const guessedFormats = columnGuesses.types.get(columnTypeName);
86+
if(guessedFormats && columnType.formats) {
87+
likelyFormats = new Set(Array.from(columnType.formats.keys()).filter((fmtName) => {
88+
return guessedFormats.includes(fmtName);
89+
}));
90+
}
91+
}
92+
93+
if (columnType.formats) {
94+
const options = Array.from(columnType.formats.entries()).map((fmtEntry) => {
95+
return {
96+
name: fmtEntry[0],
97+
displayName: fmtEntry[1].displayName,
98+
description: fmtEntry[1].description,
99+
likely: likelyFormats.has(fmtEntry[0])
100+
};
101+
});
102+
return options;
103+
} else {
104+
return false;
105+
}
106+
});
83107
}
84108

85109
//--------------------------------------------------------------------

lib/importer/templates/format.html

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@
1313
<div class="govuk-grid-column-full">
1414
<h1 class="govuk-heading-l">Identify columns</h1>
1515
<p class="govuk-body">
16-
Select a format for any fields that need it. FIXME: Write better text here...
16+
One or more columns may contain data represented in one of a number
17+
of formats, such as dates. Please select the format your file uses
18+
in the table below. Formats that appear to match the data found in
19+
your file are marked with an asterisk (*).
1720
</p>
1821

1922
<h2 class="govuk-heading-m">{{sheet}}</h2>

prototypes/basic/app/views/format.html

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,10 @@
4646
<div class="govuk-grid-column-full">
4747
<h1 class="govuk-heading-l">Choose formats</h1>
4848
<p class="govuk-body">
49-
Select a format for any fields that need it. FIXME: Write better text here...
49+
One or more columns may contain data represented in one of a number
50+
of formats, such as dates. Please select the format your file uses
51+
in the table below. Formats that appear to match the data found in
52+
your file are marked with an asterisk (*).
5053
</p>
5154

5255
<h2 class="govuk-heading-m">{{sheet}}</h2>

0 commit comments

Comments
 (0)