@@ -39,9 +39,8 @@ static int csv_row_add_field(fossil_media_csv_row_t *row, const char *field) {
3939 return 0 ;
4040}
4141
42- /* Parse CSV text */
43- fossil_media_csv_doc_t *
44- fossil_media_csv_parse (const char * csv_text , char delimiter , fossil_media_csv_error_t * err_out ) {
42+ /* Enhanced CSV parser: handles quoted fields, embedded newlines, whitespace, empty fields, trailing newlines, custom delimiter */
43+ fossil_media_csv_doc_t * fossil_media_csv_parse (const char * csv_text , char delimiter , fossil_media_csv_error_t * err_out ) {
4544 if (err_out ) * err_out = FOSSIL_MEDIA_CSV_OK ;
4645 if (!csv_text ) {
4746 if (err_out ) * err_out = FOSSIL_MEDIA_CSV_ERR_INVALID_ARG ;
@@ -59,70 +58,145 @@ fossil_media_csv_parse(const char *csv_text, char delimiter, fossil_media_csv_er
5958 char buffer [4096 ];
6059 size_t buf_len = 0 ;
6160 int in_quotes = 0 ;
61+ int field_started = 0 ;
62+ int error = 0 ;
6263
63- while (* p ) {
64- char c = * p ++ ;
64+ while (* p && ! error ) {
65+ char c = * p ;
6566
6667 if (in_quotes ) {
6768 if (c == '"' ) {
68- if (* p == '"' ) { /* Escaped quote */
69- buffer [buf_len ++ ] = '"' ;
70- p ++ ;
69+ if (* (p + 1 ) == '"' ) { /* Escaped quote */
70+ if (buf_len < sizeof (buffer ) - 1 ) {
71+ buffer [buf_len ++ ] = '"' ;
72+ p += 2 ;
73+ continue ;
74+ } else {
75+ error = 1 ;
76+ break ;
77+ }
7178 } else {
72- in_quotes = 0 ; /* End quote */
79+ in_quotes = 0 ;
80+ p ++ ;
81+ continue ;
7382 }
7483 } else {
75- buffer [buf_len ++ ] = c ;
84+ if (buf_len < sizeof (buffer ) - 1 ) {
85+ buffer [buf_len ++ ] = c ;
86+ p ++ ;
87+ continue ;
88+ } else {
89+ error = 1 ;
90+ break ;
91+ }
7692 }
7793 } else {
7894 if (c == '"' ) {
7995 in_quotes = 1 ;
96+ field_started = 1 ;
97+ p ++ ;
98+ continue ;
8099 } else if (c == delimiter ) {
81100 buffer [buf_len ] = '\0' ;
82- if (csv_row_add_field (& current_row , buffer ) < 0 ) goto fail ;
101+ if (csv_row_add_field (& current_row , buffer ) < 0 ) {
102+ error = 1 ;
103+ break ;
104+ }
83105 buf_len = 0 ;
106+ field_started = 0 ;
107+ p ++ ;
108+ continue ;
84109 } else if (c == '\n' || c == '\r' ) {
85- /* End of row */
86110 buffer [buf_len ] = '\0' ;
87- if (csv_row_add_field (& current_row , buffer ) < 0 ) goto fail ;
111+ if (field_started || buf_len > 0 || current_row .field_count > 0 || c == '\n' || c == '\r' ) {
112+ if (csv_row_add_field (& current_row , buffer ) < 0 ) {
113+ error = 1 ;
114+ break ;
115+ }
116+ }
88117 buf_len = 0 ;
118+ field_started = 0 ;
89119
90- /* Append row */
91120 fossil_media_csv_row_t * new_rows = realloc (doc -> rows , (doc -> row_count + 1 ) * sizeof (* doc -> rows ));
92- if (!new_rows ) goto fail ;
121+ if (!new_rows ) {
122+ error = 1 ;
123+ break ;
124+ }
93125 doc -> rows = new_rows ;
94126 doc -> rows [doc -> row_count ++ ] = current_row ;
95127 current_row .fields = NULL ;
96128 current_row .field_count = 0 ;
97129
98- /* Skip CRLF pairs */
99- if (c == '\r' && * p == '\n' ) p ++ ;
130+ if (c == '\r' && * (p + 1 ) == '\n' ) p ++ ;
131+ p ++ ;
132+ continue ;
133+ } else if (isspace ((unsigned char )c ) && !field_started ) {
134+ p ++ ;
135+ continue ;
100136 } else {
101- buffer [buf_len ++ ] = c ;
137+ if (buf_len < sizeof (buffer ) - 1 ) {
138+ buffer [buf_len ++ ] = c ;
139+ field_started = 1 ;
140+ p ++ ;
141+ continue ;
142+ } else {
143+ error = 1 ;
144+ break ;
145+ }
102146 }
103147 }
104-
105- if (buf_len >= sizeof (buffer ) - 1 ) goto fail ; /* Field too long */
106148 }
107149
108- /* Final field/row if not empty */
109- if (buf_len > 0 || current_row .field_count > 0 ) {
150+ if (!error && (buf_len > 0 || field_started || current_row .field_count > 0 )) {
110151 buffer [buf_len ] = '\0' ;
111- if (csv_row_add_field (& current_row , buffer ) < 0 ) goto fail ;
112- fossil_media_csv_row_t * new_rows = realloc (doc -> rows , (doc -> row_count + 1 ) * sizeof (* doc -> rows ));
113- if (!new_rows ) goto fail ;
114- doc -> rows = new_rows ;
115- doc -> rows [doc -> row_count ++ ] = current_row ;
152+ if (csv_row_add_field (& current_row , buffer ) < 0 ) {
153+ error = 1 ;
154+ } else {
155+ fossil_media_csv_row_t * new_rows = realloc (doc -> rows , (doc -> row_count + 1 ) * sizeof (* doc -> rows ));
156+ if (!new_rows ) {
157+ error = 1 ;
158+ } else {
159+ doc -> rows = new_rows ;
160+ doc -> rows [doc -> row_count ++ ] = current_row ;
161+ }
162+ }
116163 }
117164
118- return doc ;
165+ if (!error && doc -> row_count == 0 && * csv_text ) {
166+ const char * q = csv_text ;
167+ while (* q ) {
168+ if (* q == '\n' || * q == '\r' ) {
169+ fossil_media_csv_row_t empty_row = {NULL , 0 };
170+ fossil_media_csv_row_t * new_rows = realloc (doc -> rows , (doc -> row_count + 1 ) * sizeof (* doc -> rows ));
171+ if (!new_rows ) {
172+ error = 1 ;
173+ break ;
174+ }
175+ doc -> rows = new_rows ;
176+ doc -> rows [doc -> row_count ++ ] = empty_row ;
177+ if (* q == '\r' && * (q + 1 ) == '\n' ) q ++ ;
178+ }
179+ q ++ ;
180+ }
181+ }
182+
183+ if (!error && doc -> row_count == 1 && doc -> rows [0 ].field_count == 1 && doc -> rows [0 ].fields [0 ][0 ] == '\0' && csv_text [0 ] == '\0' ) {
184+ free (doc -> rows [0 ].fields [0 ]);
185+ free (doc -> rows [0 ].fields );
186+ free (doc -> rows );
187+ doc -> rows = NULL ;
188+ doc -> row_count = 0 ;
189+ }
190+
191+ if (error ) {
192+ if (err_out ) * err_out = FOSSIL_MEDIA_CSV_ERR_MEMORY ;
193+ fossil_media_csv_free (doc );
194+ for (size_t i = 0 ; i < current_row .field_count ; i ++ ) free (current_row .fields [i ]);
195+ free (current_row .fields );
196+ return NULL ;
197+ }
119198
120- fail :
121- if (err_out ) * err_out = FOSSIL_MEDIA_CSV_ERR_MEMORY ;
122- fossil_media_csv_free (doc );
123- for (size_t i = 0 ; i < current_row .field_count ; i ++ ) free (current_row .fields [i ]);
124- free (current_row .fields );
125- return NULL ;
199+ return doc ;
126200}
127201
128202/* Free CSV doc */
0 commit comments