Skip to content

Commit 6b059ab

Browse files
Merge pull request #17 from dreamer-coding/main
Parser patch update
2 parents 23d6506 + 143e147 commit 6b059ab

27 files changed

+2401
-1568
lines changed

code/logic/csv.c

Lines changed: 108 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,8 @@ static int csv_row_add_field(fossil_media_csv_row_t *row, const char *field) {
3939
return 0;
4040
}
4141

42-
/* Parse CSV text */
43-
fossil_media_csv_doc_t *
44-
fossil_media_csv_parse(const char *csv_text, char delimiter, fossil_media_csv_error_t *err_out) {
42+
/* Enhanced CSV parser: handles quoted fields, embedded newlines, whitespace, empty fields, trailing newlines, custom delimiter */
43+
fossil_media_csv_doc_t *fossil_media_csv_parse(const char *csv_text, char delimiter, fossil_media_csv_error_t *err_out) {
4544
if (err_out) *err_out = FOSSIL_MEDIA_CSV_OK;
4645
if (!csv_text) {
4746
if (err_out) *err_out = FOSSIL_MEDIA_CSV_ERR_INVALID_ARG;
@@ -59,70 +58,145 @@ fossil_media_csv_parse(const char *csv_text, char delimiter, fossil_media_csv_er
5958
char buffer[4096];
6059
size_t buf_len = 0;
6160
int in_quotes = 0;
61+
int field_started = 0;
62+
int error = 0;
6263

63-
while (*p) {
64-
char c = *p++;
64+
while (*p && !error) {
65+
char c = *p;
6566

6667
if (in_quotes) {
6768
if (c == '"') {
68-
if (*p == '"') { /* Escaped quote */
69-
buffer[buf_len++] = '"';
70-
p++;
69+
if (*(p + 1) == '"') { /* Escaped quote */
70+
if (buf_len < sizeof(buffer) - 1) {
71+
buffer[buf_len++] = '"';
72+
p += 2;
73+
continue;
74+
} else {
75+
error = 1;
76+
break;
77+
}
7178
} else {
72-
in_quotes = 0; /* End quote */
79+
in_quotes = 0;
80+
p++;
81+
continue;
7382
}
7483
} else {
75-
buffer[buf_len++] = c;
84+
if (buf_len < sizeof(buffer) - 1) {
85+
buffer[buf_len++] = c;
86+
p++;
87+
continue;
88+
} else {
89+
error = 1;
90+
break;
91+
}
7692
}
7793
} else {
7894
if (c == '"') {
7995
in_quotes = 1;
96+
field_started = 1;
97+
p++;
98+
continue;
8099
} else if (c == delimiter) {
81100
buffer[buf_len] = '\0';
82-
if (csv_row_add_field(&current_row, buffer) < 0) goto fail;
101+
if (csv_row_add_field(&current_row, buffer) < 0) {
102+
error = 1;
103+
break;
104+
}
83105
buf_len = 0;
106+
field_started = 0;
107+
p++;
108+
continue;
84109
} else if (c == '\n' || c == '\r') {
85-
/* End of row */
86110
buffer[buf_len] = '\0';
87-
if (csv_row_add_field(&current_row, buffer) < 0) goto fail;
111+
if (field_started || buf_len > 0 || current_row.field_count > 0 || c == '\n' || c == '\r') {
112+
if (csv_row_add_field(&current_row, buffer) < 0) {
113+
error = 1;
114+
break;
115+
}
116+
}
88117
buf_len = 0;
118+
field_started = 0;
89119

90-
/* Append row */
91120
fossil_media_csv_row_t *new_rows = realloc(doc->rows, (doc->row_count + 1) * sizeof(*doc->rows));
92-
if (!new_rows) goto fail;
121+
if (!new_rows) {
122+
error = 1;
123+
break;
124+
}
93125
doc->rows = new_rows;
94126
doc->rows[doc->row_count++] = current_row;
95127
current_row.fields = NULL;
96128
current_row.field_count = 0;
97129

98-
/* Skip CRLF pairs */
99-
if (c == '\r' && *p == '\n') p++;
130+
if (c == '\r' && *(p + 1) == '\n') p++;
131+
p++;
132+
continue;
133+
} else if (isspace((unsigned char)c) && !field_started) {
134+
p++;
135+
continue;
100136
} else {
101-
buffer[buf_len++] = c;
137+
if (buf_len < sizeof(buffer) - 1) {
138+
buffer[buf_len++] = c;
139+
field_started = 1;
140+
p++;
141+
continue;
142+
} else {
143+
error = 1;
144+
break;
145+
}
102146
}
103147
}
104-
105-
if (buf_len >= sizeof(buffer) - 1) goto fail; /* Field too long */
106148
}
107149

108-
/* Final field/row if not empty */
109-
if (buf_len > 0 || current_row.field_count > 0) {
150+
if (!error && (buf_len > 0 || field_started || current_row.field_count > 0)) {
110151
buffer[buf_len] = '\0';
111-
if (csv_row_add_field(&current_row, buffer) < 0) goto fail;
112-
fossil_media_csv_row_t *new_rows = realloc(doc->rows, (doc->row_count + 1) * sizeof(*doc->rows));
113-
if (!new_rows) goto fail;
114-
doc->rows = new_rows;
115-
doc->rows[doc->row_count++] = current_row;
152+
if (csv_row_add_field(&current_row, buffer) < 0) {
153+
error = 1;
154+
} else {
155+
fossil_media_csv_row_t *new_rows = realloc(doc->rows, (doc->row_count + 1) * sizeof(*doc->rows));
156+
if (!new_rows) {
157+
error = 1;
158+
} else {
159+
doc->rows = new_rows;
160+
doc->rows[doc->row_count++] = current_row;
161+
}
162+
}
116163
}
117164

118-
return doc;
165+
if (!error && doc->row_count == 0 && *csv_text) {
166+
const char *q = csv_text;
167+
while (*q) {
168+
if (*q == '\n' || *q == '\r') {
169+
fossil_media_csv_row_t empty_row = {NULL, 0};
170+
fossil_media_csv_row_t *new_rows = realloc(doc->rows, (doc->row_count + 1) * sizeof(*doc->rows));
171+
if (!new_rows) {
172+
error = 1;
173+
break;
174+
}
175+
doc->rows = new_rows;
176+
doc->rows[doc->row_count++] = empty_row;
177+
if (*q == '\r' && *(q + 1) == '\n') q++;
178+
}
179+
q++;
180+
}
181+
}
182+
183+
if (!error && doc->row_count == 1 && doc->rows[0].field_count == 1 && doc->rows[0].fields[0][0] == '\0' && csv_text[0] == '\0') {
184+
free(doc->rows[0].fields[0]);
185+
free(doc->rows[0].fields);
186+
free(doc->rows);
187+
doc->rows = NULL;
188+
doc->row_count = 0;
189+
}
190+
191+
if (error) {
192+
if (err_out) *err_out = FOSSIL_MEDIA_CSV_ERR_MEMORY;
193+
fossil_media_csv_free(doc);
194+
for (size_t i = 0; i < current_row.field_count; i++) free(current_row.fields[i]);
195+
free(current_row.fields);
196+
return NULL;
197+
}
119198

120-
fail:
121-
if (err_out) *err_out = FOSSIL_MEDIA_CSV_ERR_MEMORY;
122-
fossil_media_csv_free(doc);
123-
for (size_t i = 0; i < current_row.field_count; i++) free(current_row.fields[i]);
124-
free(current_row.fields);
125-
return NULL;
199+
return doc;
126200
}
127201

128202
/* Free CSV doc */

0 commit comments

Comments
 (0)