forked from Evref-BL/Pharo-Tree-Sitter
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTSFASTImporter.class.st
More file actions
182 lines (132 loc) · 6.38 KB
/
TSFASTImporter.class.st
File metadata and controls
182 lines (132 loc) · 6.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
"
## Description
I am a generic importer for a FAST model.
I will create all the nodes and relations of the FAST model taking a root node as parameter.
I will do an exact match to the Tree Sitter AST but I have a subclass that can allow to tweak the model to generate.
# Implementation details
## Context
- The context contains the stack of all elements ""parent"" to the node that is currently been visited.
- The #currentFMProperty can either be nil or a FMProperty. If it is a property, it means that the nodes been visited are part of a field of their parent that has the same name as a contained entities property of the fast entity. Thus we save it to save the children in this property instead of the generic one.
- #containedEntitiesPropertiesMap will save for each kind of FAST class the possible children properties for perf reasons.
### Source positions management
TreeSitter is providing the positions of the nodes in the parsed string in number of bytes but the current implementation of FAST requires the positions in number of characters.
In the origin implementation we were computing for each nodes the number of characters from the start and end positions in number of bytes.
Now we are taking a different direction. We know that we provide the source code to tree sitter encoded un UTF8.
With this information we build a map cached in #bytesToCharactersMap that will associate to the index of each leading bytes, the index of the corresponding character.
This allows to build once the index map and to just use it to convert bytes positions into characters positions which is speeding up a lot the import.
"
Class {
#name : 'TSFASTImporter',
#superclass : 'TSVisitor',
#instVars : [
'classesPrefix',
'model',
'originString',
'containedEntitiesPropertiesMap',
'context',
'currentFMProperty',
'bytesToCharactersMap'
],
#category : 'TreeSitter-FAST-Utils',
#package : 'TreeSitter-FAST-Utils'
}
{ #category : 'private' }
TSFASTImporter >> bytesToCharacterMap [
"We consider that the string is UTF8 encoded in the FAST importer. If we parse a file in UTF16 or another encoding, we should decode it and encode it in UTF8.
In Famix we cannot do that since the source code is in files. But in FAST we keep the source code in a Pharo string allowing to do this."
^ bytesToCharactersMap ifNil: [ bytesToCharactersMap := ZnUTF8Encoder default mapBytesToCharactersFor: self originString ]
]
{ #category : 'private' }
TSFASTImporter >> characterPositionAtByte: aNumber [
^ self bytesToCharacterMap at: aNumber ifAbsent: [ SubscriptOutOfBounds signalFor: aNumber ]
]
{ #category : 'accessing' }
TSFASTImporter >> classesPrefix [
^ classesPrefix
]
{ #category : 'private' }
TSFASTImporter >> containedEntitiesPropertiesFor: aClass [
"I am a cache to know for a FAST class the list of Fame properties they have to define a contained entity."
^ containedEntitiesPropertiesMap at: aClass ifAbsentPut: [ aClass mooseDescription allProperties select: #isChildrenProperty ]
]
{ #category : 'actions' }
TSFASTImporter >> import: aTSNode [
model := self newInstanceOfClassNamed: self classesPrefix , 'Model'.
aTSNode accept: self.
^ model
]
{ #category : 'initialization' }
TSFASTImporter >> initialize [
super initialize.
containedEntitiesPropertiesMap := IdentityDictionary new.
context := Stack new
]
{ #category : 'actions' }
TSFASTImporter >> instantiateFastEntityFrom: aTSNode [
"We instantiate the right class, add it to the model and set the sources positions."
| fastEntity |
fastEntity := self newInstanceOfClassNamed: self classesPrefix , aTSNode type pascalized.
model add: fastEntity.
fastEntity startPos: (self characterPositionAtByte: aTSNode startByte) + 1.
fastEntity endPos: (self characterPositionAtByte: aTSNode endByte).
^ fastEntity
]
{ #category : 'accessing' }
TSFASTImporter >> languageName: aString [
classesPrefix := 'FAST' , aString
]
{ #category : 'accessing' }
TSFASTImporter >> newInstanceOfClassNamed: aString [
^ (aString asClassInEnvironment: self class environment) new
]
{ #category : 'accessing' }
TSFASTImporter >> originString [
^ originString
]
{ #category : 'accessing' }
TSFASTImporter >> originString: aString [
originString := aString
]
{ #category : 'accessing' }
TSFASTImporter >> tsLanguage: anObject [
self deprecated: 'This is not used and can be removed.' t
]
{ #category : 'visiting' }
TSFASTImporter >> visitChildren: aTSNode in: fastEntity [
"Now we will visit the children after adding myself as the top context so that they can find their parent."
| previousProperty |
context push: fastEntity.
"When visiting the children of the children we might lose the current property so we save it."
previousProperty := currentFMProperty.
aTSNode collectFieldNameOfNamedChild keysAndValuesDo: [ :field :nodes |
"If the field has the name of a property, we save this property so that my children can use it to set themselves in the right variable"
(self containedEntitiesPropertiesFor: fastEntity class)
detect: [ :property | property name = field ]
ifFound: [ :property | currentFMProperty := property ].
"Nodes can be a simple node or a collection of nodes."
nodes isCollection ifTrue: [ nodes do: [ :node | node accept: self ] ] ifFalse: [ nodes accept: self ].
currentFMProperty := nil ].
currentFMProperty := previousProperty.
context pop
]
{ #category : 'visiting' }
TSFASTImporter >> visitNode: aTSNode [
| fastEntity |
fastEntity := self instantiateFastEntityFrom: aTSNode.
"If the context is not empty, I'll set the newly built fast entity to the top of the context.
If it is empty this is the root node and we set its source."
context
ifEmpty: [ fastEntity source: self originString. ]
ifNotEmpty: [
"I have two way to set myself in my parent:
- EIther my parent have a children property of the same name as the field the node belongs to and we set it there. This is the case if the current property is not nil
- Else I add the entity to the generic children."
currentFMProperty
ifNil: [ context top addGenericChildren: fastEntity ]
ifNotNil: [ :property |
property isMultivalued
ifTrue: [ context top perform: ('add' , property implementingSelector asSingular capitalized , ':') asSymbol with: fastEntity ]
ifFalse: [ context top perform: property implementingSelector asMutator with: fastEntity ] ] ].
aTSNode hasChildren ifTrue: [ self visitChildren: aTSNode in: fastEntity ].
^ fastEntity
]