-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtree.scm
More file actions
176 lines (151 loc) · 6.07 KB
/
tree.scm
File metadata and controls
176 lines (151 loc) · 6.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
;; ============================================================
;; CST Representation
;; ============================================================
;;
;; Nonterminal node: (symbol child ...)
;; e.g. (expr (num "43") (*token* plus "+") (num "2"))
;;
;; Token node: (*token* symbol string)
;; e.g. (*token* plus "+")
;;
;; Anonymous leaf: "string"
;; e.g. " " or "; comment"
;;
;; Invariant: concatenating all leaves left-to-right reconstructs
;; the original source text ("every byte accounted for").
;; ============================================================
;; Token nodes
;; ============================================================
(define *token-tag* '*token*)
(define (make-token name text)
(list *token-tag* name text))
(define (token? x)
(and (pair? x)
(eq? (car x) *token-tag*)))
(define (token-name x) (cadr x))
(define (token-text x) (caddr x))
;; ============================================================
;; CST node predicates and accessors
;; ============================================================
(define (cst-node? x)
(and (pair? x)
(symbol? (car x))
(not (token? x))))
(define (cst-node-name x) (car x))
(define (cst-node-children x) (cdr x))
(define (cst-leaf? x) (string? x))
;; ============================================================
;; CST -> source text reconstruction
;; ============================================================
;; Flatten all leaves left-to-right to reconstruct original source.
(define (cst->string cst)
(let ((port (open-output-string)))
(let walk ((node cst))
(cond
((string? node)
(display node port))
((token? node)
(display (token-text node) port))
((cst-node? node)
(for-each walk (cst-node-children node)))
(else
(error 'cst->string "unexpected CST node" node))))
(get-output-string port)))
;; ============================================================
;; Child-type extraction
;; ============================================================
;; Given a nonterminal node instance, return the list of child
;; type symbols. Anonymous string leaves become the symbol
;; *anonymous*.
;;
;; Example:
;; (cst-child-types '(expr (num "43") (*token* plus "+") (num "2")))
;; => (num plus num)
(define *anonymous-sym* '*anonymous*)
(define (cst-child-type child)
(cond
((token? child) (token-name child))
((cst-node? child) (cst-node-name child))
((string? child) *anonymous-sym*)
(else (error 'cst-child-type "unexpected child" child))))
(define (cst-child-types node)
(map cst-child-type (cst-node-children node)))
;; ============================================================
;; CST traversal (walk)
;; ============================================================
;; Walk a CST corpus, calling visitor at each nonterminal node.
;; visitor: (node parent-type position grandparent-type left-sibling-type) -> void
;;
;; - node: the current nonterminal node
;; - parent-type: symbol name of the parent, or #f at the root
;; - position: integer index of this node within parent's children, or 0 at root
;; - grandparent-type: symbol name of the grandparent, or #f
;; - left-sibling-type: type symbol of the previous sibling, or #f if first child/root
(define (cst-walk-node node parent-type position grandparent-type left-sibling-type visitor)
(when (cst-node? node)
(visitor node parent-type position grandparent-type left-sibling-type)
(let loop ((children (cst-node-children node))
(i 0)
(prev-type #f))
(unless (null? children)
(let ((child-type (cst-child-type (car children))))
(cst-walk-node (car children)
(cst-node-name node)
i
parent-type
prev-type
visitor)
(loop (cdr children) (+ i 1) child-type))))))
(define (cst-walk corpus visitor)
(for-each
(lambda (tree)
(cst-walk-node tree #f 0 #f #f visitor))
corpus))
;; ============================================================
;; Corpus utilities
;; ============================================================
;; Collect all distinct nonterminal type names in a corpus.
(define (cst-collect-types corpus)
(let ((types '()))
(cst-walk corpus
(lambda (node parent-type position grandparent-type left-sibling-type)
(let ((name (cst-node-name node)))
(unless (memq name types)
(set! types (cons name types))))))
(reverse types)))
;; Collect all instances of a given nonterminal type from a corpus.
(define (cst-collect-instances corpus type-name)
(let ((instances '()))
(cst-walk corpus
(lambda (node parent-type position grandparent-type left-sibling-type)
(when (eq? (cst-node-name node) type-name)
(set! instances (cons node instances)))))
(reverse instances)))
;; Collect all token types and their texts from a corpus.
;; Returns an alist: ((token-name . (text1 text2 ...)) ...)
(define (cst-collect-token-types corpus)
(let ((tokens '()))
(cst-walk corpus
(lambda (node parent-type position grandparent-type left-sibling-type)
(for-each
(lambda (child)
(when (token? child)
(let* ((name (token-name child))
(text (token-text child))
(existing (assq name tokens)))
(if existing
(unless (member text (cdr existing))
(set-cdr! existing (cons text (cdr existing))))
(set! tokens (cons (cons name (list text)) tokens))))))
(cst-node-children node))))
(reverse tokens)))
;; Collect all distinct child-type patterns for a given nonterminal.
(define (cst-collect-child-patterns corpus type-name)
(let ((patterns '()))
(cst-walk corpus
(lambda (node parent-type position grandparent-type left-sibling-type)
(when (eq? (cst-node-name node) type-name)
(let ((pat (cst-child-types node)))
(unless (member pat patterns)
(set! patterns (cons pat patterns)))))))
(reverse patterns)))