Skip to content

Commit a64c387

Browse files
committed
More cleaning of ASCT+B normalization
1 parent e8957e8 commit a64c387

File tree

2 files changed

+9
-7
lines changed

2 files changed

+9
-7
lines changed

src/normalization/normalize-asct-b.js

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ function normalizeCtData(context, data) {
158158
// Get the references
159159
const references = row.references
160160
.filter(({id}) => checkNotEmpty(id))
161+
.filter((ref) => ref !== 'No DOI')
161162
.map((ref) => {
162163
const refString = ref.id;
163164
return checkIsDoi(refString) ? normalizeDoi(refString) : normalizeString(refString);
@@ -290,22 +291,22 @@ function normalizeAsctbRecord(context, data) {
290291
// Generate protein biomarker instances
291292
const bpInstances = row.biomarkers_protein
292293
.map((item, order) => generateBmInstance(context, recordNumber, item, order))
293-
.filter(({ source_concept }) => passIdFilterCriteria(context, source_concept));
294+
.filter(({ source_concept }) => passIdFilterCriteria(context, source_concept));
294295

295296
// Generate lipid biomarker instances
296297
const blInstances = row.biomarkers_lipids
297298
.map((item, order) => generateBmInstance(context, recordNumber, item, order))
298-
.filter(({ source_concept }) => passIdFilterCriteria(context, source_concept));
299+
.filter(({ source_concept }) => passIdFilterCriteria(context, source_concept));
299300

300301
// Generate metabolites biomarker instances
301302
const bmInstances = row.biomarkers_meta
302303
.map((item, order) => generateBmInstance(context, recordNumber, item, order))
303-
.filter(({ source_concept }) => passIdFilterCriteria(context, source_concept));
304+
.filter(({ source_concept }) => passIdFilterCriteria(context, source_concept));
304305

305306
// Generate proteoform biomarker instances
306307
const bfInstances = row.biomarkers_prot
307308
.map((item, order) => generateBmInstance(context, recordNumber, item, order))
308-
.filter(({ source_concept }) => passIdFilterCriteria(context, source_concept));
309+
.filter(({ source_concept }) => passIdFilterCriteria(context, source_concept));
309310

310311
// Generate FTU instances
311312
const ftuInstances = row.ftu_types
@@ -480,8 +481,9 @@ function generateReferenceInstance(context, recordNumber, data, index) {
480481
record_number: recordNumber,
481482
order_number: orderNumber,
482483
};
483-
if (name) {
484-
obj.external_id = name;
484+
const cleanName = checkIsDoi(name) ? normalizeDoi(name) : normalizeString(name);
485+
if (cleanName) {
486+
obj.external_id = cleanName;
485487
}
486488
return obj;
487489
}

src/normalization/patches.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ export function normalizeDoi(doi) {
335335
export function normalizeString(str) {
336336
return str.replace(/"/g, "'") // Replace all double quotes with single quotes
337337
.replace(/\r/g, "") // Remove carriage return characters
338-
.replace(/\s+/g, " ") // Replace multiple spaces (including leading/trailing spaces) with a single space
338+
.replace(/[\r\n\s\u2000-\u200A\u202F\u205F\u3000\u2028\u2029]+/g, " ") // Replace multiple spaces (including leading/trailing spaces) with a single space
339339
.trim();
340340
}
341341

0 commit comments

Comments
 (0)