@@ -4,77 +4,85 @@ use regex::Regex;
44
55use std:: fs:: File ;
66use std:: io:: prelude:: * ;
7- use std:: io:: { BufRead , BufWriter } ;
7+ use std:: io:: BufWriter ;
88
99// Generate character mapping tables directly from the specification.
1010fn main ( ) {
11- // Input from the RFC.
12- let reader = include_bytes ! ( "rfc3454.txt" ) ;
13-
14- // Output to a Rust source file.
1511 let out_file = File :: create ( "../src/rfc3454.rs" ) . unwrap ( ) ;
1612 let mut writer = BufWriter :: new ( out_file) ;
1713
18- // Generate tables.
19- include_table ( & mut writer, & mut & reader[ ..] , "A.1" ) ;
20- include_table ( & mut writer, & mut & reader[ ..] , "B.2" ) ;
14+ write ! ( writer, "// AUTOGENERATED CODE - DO NOT EDIT\n \n " ) . unwrap ( ) ;
15+
16+ lookup_table ( "A.1" , & mut writer) ;
17+ mapping_table ( "B.2" , & mut writer) ;
2118}
2219
23- // Generate code for the named mapping table.
24- fn include_table < R : BufRead , W : Write > ( writer : & mut W , reader : & mut R , tablename : & str ) {
25- // Scan to start of table.
26- loop {
27- let mut line = String :: new ( ) ;
28- reader. read_line ( & mut line) . unwrap ( ) ;
29- if line. contains ( "Start Table" ) && line. contains ( tablename) {
30- break ;
20+ fn lookup_table < W : Write > ( table : & str , writer : & mut W ) {
21+ write ! (
22+ writer,
23+ "pub const {}: &'static [(char, char)] = &[\n " ,
24+ table. replace( "." , "_" )
25+ ) . unwrap ( ) ;
26+
27+ let regex = Regex :: new ( "^([0-9A-F]+)(?:-([0-9A-F]+))?$" ) . unwrap ( ) ;
28+ table_lines ( table, |line| {
29+ let captures = regex. captures ( line) . unwrap ( ) ;
30+ let start = captures. get ( 1 ) . unwrap ( ) . as_str ( ) ;
31+ let end = captures. get ( 2 ) . map_or ( start, |c| c. as_str ( ) ) ;
32+ write ! ( writer, " ('\\ u{{{}}}', '\\ u{{{}}}'),\n " , start, end) . unwrap ( ) ;
33+ } ) ;
34+
35+ write ! ( writer, "];\n \n " ) . unwrap ( ) ;
36+ }
37+
38+ fn mapping_table < W : Write > ( table : & str , writer : & mut W ) {
39+ write ! (
40+ writer,
41+ "pub const {}: &'static [(char, &'static str)] = &[\n " ,
42+ table. replace( "." , "_" ) ,
43+ ) . unwrap ( ) ;
44+
45+ let regex = Regex :: new (
46+ "^([0-9A-F]+); ([0-9A-F]+)(?: ([0-9A-F]+))?(?: ([0-9A-F]+))?(?: ([0-9A-F]+))?;" ,
47+ ) . unwrap ( ) ;
48+ table_lines ( table, |line| {
49+ let captures = regex. captures ( line) . unwrap ( ) ;
50+ let mut it = captures. iter ( ) . filter_map ( |i| i) ;
51+ it. next ( ) ; // skip whole match
52+
53+ let input = it. next ( ) . unwrap ( ) . as_str ( ) ;
54+ write ! ( writer, " ('\\ u{{{}}}', \" " , input) . unwrap ( ) ;
55+
56+ for output in it {
57+ write ! ( writer, "\\ u{{{}}}" , output. as_str( ) ) . unwrap ( ) ;
3158 }
32- }
3359
34- // Output table declaration.
35- write ! ( writer, "pub const {}: &[(char, char, &str)] = &[\n " , tablename. replace( "." , "_" ) ) . unwrap ( ) ;
60+ write ! ( writer, "\" ),\n " ) . unwrap ( ) ;
61+ } ) ;
62+
63+ write ! ( writer, "];\n \n " ) . unwrap ( ) ;
64+ }
3665
37- // For each line:
38- let target_re = Regex :: new ( r"([0-9A-F]+)(-([0-9A-F]+))?(; ([0-9A-F]+)( ([0-9A-F]+))?( ([0-9A-F]+))?( ([0-9A-F]+))?;)?" ) . unwrap ( ) ;
39- loop {
40- let mut line = String :: new ( ) ;
41- reader . read_line ( & mut line ) . unwrap ( ) ;
66+ fn table_lines < F > ( table : & str , mut f : F )
67+ where
68+ F : FnMut ( & str ) ,
69+ {
70+ let mut lines = include_str ! ( "rfc3454.txt" ) . split ( '\n' ) ;
4271
43- // Done when reach the end of the table.
72+ // fast forward to the start of the table
73+ lines. find ( |line| line. contains ( "Start Table" ) && line. contains ( table) ) ;
74+
75+ for line in lines {
76+ let line = line. trim ( ) ;
4477 if line. contains ( "End Table" ) {
4578 break ;
4679 }
4780
48- // Skip RFC metadata.
49- if line. contains ( "Hoffman & Blanchet" ) || line. contains ( "RFC 3454" ) {
81+ // Skip page headers/footers
82+ if line. is_empty ( ) || line . contains ( "Hoffman & Blanchet" ) || line. contains ( "RFC 3454" ) {
5083 continue ;
5184 }
5285
53- // Generate an entry for each data line.
54- if let Some ( captures) = target_re. captures ( & line) {
55- // start char
56- let start = captures. get ( 1 ) . unwrap ( ) . as_str ( ) ;
57-
58- // end char (inclusive)
59- let end = captures. get ( 3 ) . map_or ( start, |m| m. as_str ( ) ) ;
60-
61- // 0-4 character replacement string
62- let mut replace = String :: new ( ) ;
63- for & i in [ 5 , 7 , 9 , 11 ] . iter ( ) {
64- match captures. get ( i) {
65- None => break ,
66- Some ( c) => {
67- replace. push_str ( "\\ u{" ) ;
68- replace. push_str ( c. as_str ( ) ) ;
69- replace. push_str ( "}" ) ;
70- }
71- }
72- }
73-
74- write ! ( writer, " ('\\ u{{{}}}', '\\ u{{{}}}', \" {}\" ),\n " , start, end, replace) . unwrap ( )
75- }
86+ f ( line) ;
7687 }
77-
78- // End table definition.
79- write ! ( writer, "];\n \n " ) . unwrap ( ) ;
8088}
0 commit comments