1+ import path from 'path' ;
2+
3+ import csvParser from 'csv-parser' ;
14import { fileCollectionFromPath } from 'filelist-utils' ;
25import OCL from 'openchemlib' ;
3- import csvParser from 'csv-parser' ;
6+ import unzipper from 'unzipper' ; // Using unzipper to extract the contents of the ZIP
7+
48import debugLibrary from '../../../../utils/Debug.js' ;
5- import unzipper from 'unzipper' ; // Using unzipper to extract the contents of the ZIP
6- import fs from 'fs' ;
7- import path from 'path' ;
89import { getNoStereosFromCache } from '../../../../utils/getNoStereosFromCache.js' ;
910
1011const debug = debugLibrary ( 'parseCoconuts' ) ;
1112
1213/**
1314 * @description Parse the coconuts CSV file from the ZIP and yield results for MongoDB
1415 * @param {* } zipPath path to the zip file
15- * @param {* } connection MongoDB connection (for logging)
16+ * @param {* } connection MongoDB connection
1617 * @yields {Object} yields MongoDB-ready document
1718 */
1819export async function * parseCoconuts ( zipPath , connection ) {
@@ -30,23 +31,25 @@ export async function* parseCoconuts(zipPath, connection) {
3031 } ) ;
3132
3233 // Sort files by last modified and select the most recent one
33- let fileToRead = fileCollection . files . sort ( ( a , b ) => b . lastModified - a . lastModified ) [ 0 ] ;
34+ let fileToRead = fileCollection . files . sort (
35+ ( a , b ) => b . lastModified - a . lastModified ,
36+ ) [ 0 ] ;
3437
3538 // Adjust relativePath based on environment
3639 if ( process . env . NODE_ENV === 'test' ) {
37- fileToRead . relativePath = folderPath . replace ( 'data/' , '' ) + fileToRead . relativePath ;
40+ fileToRead . relativePath =
41+ folderPath . replace ( 'data/' , '' ) + fileToRead . relativePath ;
3842 } else {
39- fileToRead . relativePath = folderPath . replace ( 'full/' , '' ) + fileToRead . relativePath ;
43+ fileToRead . relativePath =
44+ folderPath . replace ( 'full/' , '' ) + fileToRead . relativePath ;
4045 }
4146
42-
43-
4447 // Extract the ZIP file using unzipper
4548 const zipFilePath = path . resolve ( fileToRead . relativePath ) ;
4649 const directory = await unzipper . Open . file ( zipFilePath ) ;
47-
50+
4851 // Find the CSV file inside the ZIP
49- const csvFile = directory . files . find ( file => file . path . endsWith ( '.csv' ) ) ;
52+ const csvFile = directory . files . find ( ( file ) => file . path . endsWith ( '.csv' ) ) ;
5053
5154 if ( ! csvFile ) {
5255 throw new Error ( 'CSV file not found in ZIP archive' ) ;
@@ -55,36 +58,28 @@ export async function* parseCoconuts(zipPath, connection) {
5558 // Open the CSV file as a stream
5659 const csvStream = csvFile . stream ( ) . pipe ( csvParser ( ) ) ;
5760
58-
5961 // Parsing each row in the CSV stream
6062 for await ( const row of csvStream ) {
61- // console.log('Row:', row.canonical_smiles);
62-
6363 try {
6464 // Skip if required fields are missing
6565 if ( ! row . identifier || ! row . canonical_smiles ) continue ;
6666 // Parse the molecule using OpenChemLib
6767
68- const oclMolecule = OCL . Molecule . fromSmiles (
69- row . canonical_smiles ,
70- ) ;
68+ const oclMolecule = OCL . Molecule . fromSmiles ( row . canonical_smiles ) ;
7169 const ocl = await getNoStereosFromCache (
7270 oclMolecule ,
7371 connection ,
7472 'coconuts' ,
7573 ) ;
76-
7774
7875 // Process taxonomies and comments
7976 const taxonomies = [ ] ;
8077 if ( row . organisms !== '' ) {
8178 const organismsList = row . organisms . split ( '|' ) ;
8279 for ( const entry of organismsList ) {
83-
84- taxonomies . push ( { species : entry } ) ;
85-
80+ taxonomies . push ( { species : entry } ) ;
8681 }
87- }
82+ }
8883
8984 // Prepare the result document
9085 const result = {
@@ -94,25 +89,20 @@ export async function* parseCoconuts(zipPath, connection) {
9489 } ,
9590 } ;
9691
97- if ( row . cas !== '' ) result . data . cas = row . cas ;
92+ if ( row . cas !== '' ) result . data . cas = row . cas ;
9893 if ( row . iupac_name ) result . data . iupacName = row . iupac_name ;
9994 if ( taxonomies . length > 0 ) result . data . taxonomies = taxonomies ;
100- if ( row . name !== '' ) result . data . name = row . name ;
95+ if ( row . name !== '' ) result . data . name = row . name ;
10196
102-
10397 yield result ;
10498 } catch ( e ) {
105- debug . error (
106- `Error processing row ${ row . identifier } : ${ e . message } ` ,
107- {
108- collection : 'coconuts' ,
109- connection,
110- stack : e . stack ,
111- } ,
112- ) ;
99+ debug . error ( `Error processing row ${ row . identifier } : ${ e . message } ` , {
100+ collection : 'coconuts' ,
101+ connection,
102+ stack : e . stack ,
103+ } ) ;
113104 }
114105 }
115-
116106 } catch ( e ) {
117107 if ( connection ) {
118108 await debug . fatal ( e . message , {
0 commit comments