Skip to content
This repository was archived by the owner on Sep 26, 2025. It is now read-only.

Commit e204c94

Browse files
committed
Fix parsing UTF8 with BOM at the start (issue #58).
1 parent a02e8ae commit e204c94

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

compiler/sc1.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
* License for the specific language governing permissions and limitations
2424
* under the License.
2525
*
26-
* Version: $Id: sc1.c 6932 2023-04-03 13:56:19Z thiadmer $
26+
* Version: $Id: sc1.c 6965 2023-07-20 15:44:35Z thiadmer $
2727
*/
2828
#include <assert.h>
2929
#include <ctype.h>
@@ -592,6 +592,7 @@ int pc_compile(int argc, char *argv[])
592592
if (inpf_org==NULL)
593593
error(100,inpfname);
594594
freading=TRUE;
595+
sc_is_utf8=(short)scan_utf8(inpf_org,inpfname);
595596
outf=(FILE*)pc_openasm(outfname); /* first write to assembler file (may be temporary) */
596597
if (outf==NULL)
597598
error(101,outfname);

compiler/sci18n.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* is allocated statically, so loading SBCS tables cannot fail (if the tables
1313
* themselves are valid, of course).
1414
*
15-
* Copyright (c) CompuPhase, 2004-2016
15+
* Copyright (c) CompuPhase, 2004-2023
1616
*
1717
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
1818
* use this file except in compliance with the License. You may obtain a copy
@@ -26,7 +26,7 @@
2626
* License for the specific language governing permissions and limitations
2727
* under the License.
2828
*
29-
* Version: $Id: sci18n.c 6932 2023-04-03 13:56:19Z thiadmer $
29+
* Version: $Id: sci18n.c 6965 2023-07-20 15:44:35Z thiadmer $
3030
*/
3131
#include <assert.h>
3232
#include <stdio.h>
@@ -413,8 +413,8 @@ SC_FUNC int scan_utf8(FILE *fp,const char *filename)
413413
} /* while */
414414
pc_resetsrc(fp,resetpos);
415415
if (bom_found) {
416-
unsigned char bom[3];
417-
pc_readsrc(fp,bom,3); /* read the BOM again to strip it from the file */
416+
unsigned char bom[4];
417+
pc_readsrc(fp,bom,4); /* read the BOM again to strip it from the file */
418418
assert(bom[0]==0xef && bom[1]==0xbb && bom[2]==0xbf);
419419
if (!utf8)
420420
error(77,filename); /* malformed UTF-8 encoding */

0 commit comments

Comments
 (0)