Skip to content

Commit ecfcf57

Browse files
committed
Delay literal unescape mechanism after preprocessing
1 parent 54e9271 commit ecfcf57

File tree

4 files changed

+180
-214
lines changed

4 files changed

+180
-214
lines changed

src/defs.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,19 @@
1010

1111
/* definitions */
1212

13+
/* Common macro functions */
14+
#define is_whitespace(c) (c == ' ' || c == '\t')
15+
#define is_newline(c) (c == '\r' || c == '\n')
16+
#define is_alnum(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || \
17+
(c >= '0' && c <= '9') || (c == '_'))
18+
#define is_digit(c) ((c >= '0' && c <= '9'))
19+
#define is_hex(c) (is_digit(c) || (c >= 'a' && c <= 'f') || \
20+
(c >= 'A' && c <= 'F'))
21+
1322
/* Limitations */
1423
#define MAX_TOKEN_LEN 256
1524
#define MAX_ID_LEN 64
25+
#define MAX_ESCAPED_CHAR_LEN 5
1626
#define MAX_LINE_LEN 256
1727
#define MAX_VAR_LEN 32
1828
#define MAX_TYPE_LEN 32

src/globals.c

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,137 @@ char *intern_string(char *str)
667667
return interned;
668668
}
669669

670+
int hex_digit_value(char c)
671+
{
672+
if (c >= '0' && c <= '9')
673+
return c - '0';
674+
if (c >= 'a' && c <= 'f')
675+
return c - 'a' + 10;
676+
if (c >= 'A' && c <= 'F')
677+
return c - 'A' + 10;
678+
return -1;
679+
}
680+
681+
int unescape_string(const char *input, char *output, int output_size)
682+
{
683+
if (!input || !output || output_size == 0)
684+
return -1;
685+
686+
int i = 0, j = 0;
687+
688+
while (input[i] != '\0' && j < output_size - 1) {
689+
if (input[i] == '\\') {
690+
i++;
691+
692+
switch (input[i]) {
693+
case 'a':
694+
output[j++] = '\a';
695+
i++;
696+
break;
697+
case 'b':
698+
output[j++] = '\b';
699+
i++;
700+
break;
701+
case 'f':
702+
output[j++] = '\f';
703+
i++;
704+
break;
705+
case 'e':
706+
output[j++] = 23;
707+
i++;
708+
break;
709+
case 'n':
710+
output[j++] = '\n';
711+
i++;
712+
break;
713+
case 'r':
714+
output[j++] = '\r';
715+
i++;
716+
break;
717+
case 't':
718+
output[j++] = '\t';
719+
i++;
720+
break;
721+
case 'v':
722+
output[j++] = '\v';
723+
i++;
724+
break;
725+
case '\\':
726+
output[j++] = '\\';
727+
i++;
728+
break;
729+
case '\'':
730+
output[j++] = '\'';
731+
i++;
732+
break;
733+
case '"':
734+
output[j++] = '"';
735+
i++;
736+
break;
737+
case '?':
738+
output[j++] = '\?';
739+
i++;
740+
break;
741+
case 'x': {
742+
/* Hexadecimal escape sequence: \xhh */
743+
i++; // Skip 'x'
744+
745+
if (!is_hex(input[i]))
746+
return -1;
747+
748+
int value = 0;
749+
750+
while (is_hex(input[i])) {
751+
value = value * 16 + hex_digit_value(input[i]);
752+
i++;
753+
}
754+
755+
output[j++] = (char) value;
756+
break;
757+
}
758+
759+
case '0':
760+
case '1':
761+
case '2':
762+
case '3':
763+
case '4':
764+
case '5':
765+
case '6':
766+
case '7': {
767+
/* Octal escape sequence: \ooo (up to 3 digits) */
768+
int value = 0;
769+
int digit_count = 0;
770+
771+
while (input[i] >= '0' && input[i] <= '7' && digit_count < 3) {
772+
value = value * 8 + (input[i] - '0');
773+
i++;
774+
digit_count++;
775+
}
776+
777+
output[j++] = (char) value;
778+
break;
779+
}
780+
default:
781+
/* Unknown escape sequence - treat as literal character */
782+
output[j++] = input[i];
783+
i++;
784+
break;
785+
}
786+
} else {
787+
/* Regular characters */
788+
output[j++] = input[i++];
789+
}
790+
}
791+
792+
output[j] = '\0';
793+
794+
// Check if we ran out of output space
795+
if (input[i] != '\0')
796+
return -1;
797+
798+
return j;
799+
}
800+
670801
type_t *add_type(void)
671802
{
672803
if (types_idx >= MAX_TYPES) {

0 commit comments

Comments
 (0)