@@ -74,6 +74,11 @@ typedef struct {
7474 int code ;
7575} TokenMap ;
7676
77+ static int is_ident_char (int c )
78+ {
79+ return isalpha ((unsigned char )c ) || isdigit ((unsigned char )c ) || c == '$' ;
80+ }
81+
7782static const TokenMap token_map [] = {
7883 {"WHITE" , 5 },
7984 {"RED" , 28 },
@@ -330,6 +335,128 @@ static char *transform_basic_line(const char *input)
330335 return out .buf ;
331336}
332337
338+ /* Normalize certain keywords in a BASIC source line to restore
339+ * CBM-style whitespace that may have been stripped, e.g.:
340+ * IFB3<1THENIFE>10ORD(7)=0THEN GOTO 890
341+ * becomes:
342+ * IF B3<1 THEN IF E>10 OR D(7)=0 THEN GOTO 890
343+ * The transformation is applied only outside of quoted strings.
344+ */
345+ static char * normalize_keywords_line (const char * input )
346+ {
347+ StrBuf out ;
348+ int in_string = 0 ;
349+ size_t i = 0 ;
350+
351+ sb_init (& out );
352+
353+ while (input [i ] != '\0' ) {
354+ char c = input [i ];
355+
356+ if (c == '\"' ) {
357+ in_string = !in_string ;
358+ sb_append_char (& out , c );
359+ i ++ ;
360+ continue ;
361+ }
362+
363+ if (!in_string ) {
364+ char c1 = (char )toupper ((unsigned char )c );
365+ char c2 = (char )toupper ((unsigned char )input [i + 1 ]);
366+ char c3 = (char )toupper ((unsigned char )input [i + 2 ]);
367+ char c4 = (char )toupper ((unsigned char )input [i + 3 ]);
368+
369+ /* IF followed immediately by identifier/digit without space */
370+ if (c1 == 'I' && c2 == 'F' ) {
371+ char next = input [i + 2 ];
372+ if (next != '\0' && !isspace ((unsigned char )next ) && next != ':' ) {
373+ /* Insert space before IF if needed */
374+ if (out .len > 0 ) {
375+ char prev = out .buf [out .len - 1 ];
376+ if (!isspace ((unsigned char )prev ) && prev != ':' && prev != '(' ) {
377+ sb_append_char (& out , ' ' );
378+ }
379+ }
380+ sb_append_str (& out , "IF" );
381+ i += 2 ;
382+ /* Ensure space after IF */
383+ sb_append_char (& out , ' ' );
384+ continue ;
385+ }
386+ }
387+
388+ /* THEN */
389+ if (c1 == 'T' && c2 == 'H' && c3 == 'E' && c4 == 'N' ) {
390+ /* Insert space before THEN if needed */
391+ if (out .len > 0 ) {
392+ char prev = out .buf [out .len - 1 ];
393+ if (!isspace ((unsigned char )prev ) && prev != ':' && prev != '(' ) {
394+ sb_append_char (& out , ' ' );
395+ }
396+ }
397+ sb_append_str (& out , "THEN" );
398+ i += 4 ;
399+ /* Skip any existing spaces after THEN */
400+ while (isspace ((unsigned char )input [i ])) {
401+ i ++ ;
402+ }
403+ /* Ensure one space after THEN if next char is non-separator */
404+ if (input [i ] != '\0' && input [i ] != ':' && !isspace ((unsigned char )input [i ])) {
405+ sb_append_char (& out , ' ' );
406+ }
407+ continue ;
408+ }
409+
410+ /* AND / OR infix operators without spaces.
411+ * Only treat as operators when they are not embedded in identifiers
412+ * (e.g., avoid splitting FOR into F OR, or ORD into OR D).
413+ */
414+ if (c1 == 'A' && c2 == 'N' && c3 == 'D' ) {
415+ char prev_in = (i > 0 ) ? input [i - 1 ] : ' ' ;
416+ char next_in = input [i + 3 ];
417+ if (!is_ident_char (prev_in ) && !is_ident_char (next_in )) {
418+ /* Surround AND with spaces */
419+ if (out .len > 0 ) {
420+ char prev = out .buf [out .len - 1 ];
421+ if (!isspace ((unsigned char )prev ) && prev != '(' ) {
422+ sb_append_char (& out , ' ' );
423+ }
424+ }
425+ sb_append_str (& out , "AND" );
426+ i += 3 ;
427+ if (input [i ] != '\0' && !isspace ((unsigned char )input [i ]) && input [i ] != ')' ) {
428+ sb_append_char (& out , ' ' );
429+ }
430+ continue ;
431+ }
432+ }
433+ if (c1 == 'O' && c2 == 'R' ) {
434+ char prev_in = (i > 0 ) ? input [i - 1 ] : ' ' ;
435+ char next_in = input [i + 2 ];
436+ if (!is_ident_char (prev_in ) && !is_ident_char (next_in )) {
437+ if (out .len > 0 ) {
438+ char prev = out .buf [out .len - 1 ];
439+ if (!isspace ((unsigned char )prev ) && prev != '(' ) {
440+ sb_append_char (& out , ' ' );
441+ }
442+ }
443+ sb_append_str (& out , "OR" );
444+ i += 2 ;
445+ if (input [i ] != '\0' && !isspace ((unsigned char )input [i ]) && input [i ] != ')' ) {
446+ sb_append_char (& out , ' ' );
447+ }
448+ continue ;
449+ }
450+ }
451+ }
452+
453+ sb_append_char (& out , c );
454+ i ++ ;
455+ }
456+
457+ return out .buf ;
458+ }
459+
333460/* Platform-specific handling for ANSI escape sequences.
334461 * On Unix-like systems (macOS/Linux), standard ANSI escapes work in most terminals.
335462 * On Windows, we enable virtual terminal processing where available so that
@@ -663,7 +790,7 @@ static int starts_with_kw(char *p, const char *kw)
663790 return 0 ;
664791 }
665792 }
666- if (p [i ] == '\0' || p [i ] == ' ' || p [i ] == '\t' || p [i ] == ':' || p [i ] == '(' || p [i ] == '$' ) {
793+ if (p [i ] == '\0' || p [i ] == ' ' || p [i ] == '\t' || p [i ] == ':' || p [i ] == '(' || p [i ] == '$' || p [ i ] == '\"' ) {
667794 return 1 ;
668795 }
669796 return 0 ;
@@ -1364,8 +1491,11 @@ static void statement_get(char **p)
13641491 if (ch == EOF ) {
13651492 * vp = make_str ("" );
13661493 } else if (ch == '\n' || ch == '\r' ) {
1367- /* Treat pure newline as "no key" */
1368- * vp = make_str ("" );
1494+ /* Map Enter/Return to PETSCII-style CHR$(13) so ASC(Y$)=13 works. */
1495+ char buf [2 ];
1496+ buf [0 ] = 13 ;
1497+ buf [1 ] = '\0' ;
1498+ * vp = make_str (buf );
13691499 } else {
13701500 char buf [2 ];
13711501 buf [0 ] = (char )ch ;
@@ -1868,8 +1998,10 @@ static int read_identifier(char **p, char *buf, int buf_size)
18681998{
18691999 int i ;
18702000 i = 0 ;
1871- while ((isalpha ((unsigned char )(* * p )) || isdigit ((unsigned char )(* * p )) || * * p == '$' ) && i < buf_size - 1 ) {
1872- buf [i ++ ] = * * p ;
2001+ while (isalpha ((unsigned char )(* * p )) || isdigit ((unsigned char )(* * p )) || * * p == '$' ) {
2002+ if (i < buf_size - 1 ) {
2003+ buf [i ++ ] = * * p ;
2004+ }
18732005 (* p )++ ;
18742006 }
18752007 buf [i ] = '\0' ;
@@ -3055,7 +3187,11 @@ static void load_program(const char *path)
30553187 p ++ ;
30563188 }
30573189 transformed = transform_basic_line (p );
3058- add_or_replace_line (number , transformed );
3190+ {
3191+ char * normalized = normalize_keywords_line (transformed );
3192+ add_or_replace_line (number , normalized );
3193+ free (normalized );
3194+ }
30593195 free (transformed );
30603196 } else {
30613197 /* Numberless mode: reject later lines that suddenly introduce
@@ -3073,7 +3209,9 @@ static void load_program(const char *path)
30733209 auto_line_no += 10 ;
30743210 {
30753211 char * transformed = transform_basic_line (p );
3076- add_or_replace_line (number , transformed );
3212+ char * normalized = normalize_keywords_line (transformed );
3213+ add_or_replace_line (number , normalized );
3214+ free (normalized );
30773215 free (transformed );
30783216 }
30793217 }
0 commit comments