-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTokens.hs
More file actions
199 lines (178 loc) · 6.42 KB
/
Tokens.hs
File metadata and controls
199 lines (178 loc) · 6.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
{-# LANGUAGE OverloadedStrings #-}
module Tokens where
import Prelude hiding (length,exp)
import SourceSpan
import GHC
import Control.Monad
import Data.Aeson hiding (encode)
import qualified Data.Map.Lazy as M
import Data.Text (Text)
import Data.Sequence (Seq,(><))
import qualified Data.Sequence as S
import Data.Foldable (toList)
import Data.Ord
import Data.Generics
encodeTokens :: Document -> ParsedModule -> [Value]
encodeTokens doc parsedModule =
let (keywords,comments) = pm_annotations parsedModule
tokenList = S.fromList [ encodeToken sp (keywordCategory annKey)
| ((_,annKey),srcSpans) <- M.toList keywords
, sp <- srcSpans
]
commentList = encodeComment <$> joinLists (M.elems comments)
in toList $ fmap snd $ S.sortBy (comparing fst) $ tokenList >< commentList >< topLevel
where
joinLists :: [[a]] -> Seq a
joinLists = join . S.fromList . fmap S.fromList
encodeToken :: SrcSpan -> Text -> (SourceSpan,Value)
encodeToken srcSpan category =
let sp = lineColumnToOffsetLength doc srcSpan
in case sp of
Just s ->
(s,
object
[ "offset" .= offset s
, "length" .= length s
, "category" .= category
])
Nothing ->
error "cannot encode token due to unhelpful source span"
keywordCategory :: AnnKeywordId -> Text
keywordCategory k =
case k of
AnnAs -> keyword
AnnAt -> operator
AnnBang -> operator
AnnBackquote -> delimiter
AnnBy -> keyword
AnnCase -> conditional
AnnClass -> structure
AnnClose -> meta
AnnCloseC -> parenthesis
AnnCloseP -> parenthesis
AnnCloseS -> parenthesis
AnnColon -> operator
AnnComma -> delimiter
AnnCommaTuple -> delimiter
AnnDarrow -> typ
AnnData -> structure
AnnDcolon -> typ
AnnDefault -> keyword
AnnDeriving -> keyword
AnnDo -> statement
AnnDot -> operator
AnnDotdot -> operator
AnnElse -> conditional
AnnEqual -> operator
AnnExport -> statement
AnnFamily -> structure
AnnForall -> typ
AnnForeign -> typ
AnnFunId -> identifier
AnnGroup -> keyword
AnnHeader -> meta
AnnHiding -> keyword
AnnIf -> conditional
AnnImport -> keyword
AnnIn -> keyword
AnnInfix -> keyword
AnnInstance -> typ
AnnLam -> operator
AnnLarrow -> operator
AnnLet -> keyword
AnnMdo -> keyword
AnnMinus -> operator
AnnModule -> keyword
AnnNewtype -> typ
AnnName -> identifier
AnnOf -> keyword
AnnOpen -> meta
AnnOpenC -> parenthesis
AnnOpenP -> parenthesis
AnnOpenPE -> parenthesis
AnnOpenPTE -> parenthesis
AnnOpenS -> parenthesis
AnnPackageName -> identifier
AnnPattern -> keyword
AnnProc -> keyword
AnnQualified -> keyword
AnnRarrow -> operator
AnnRec -> keyword
AnnRole -> structure
AnnSafe -> keyword
AnnSemi -> delimiter
AnnSimpleQuote -> character
AnnStatic -> keyword
AnnThen -> conditional
AnnThIdSplice -> meta
AnnThIdTySplice -> meta
AnnThTyQuote -> meta
AnnTilde -> operator
AnnTildehsh -> operator
AnnType -> typ
AnnUnit -> typ
AnnUsing -> keyword
AnnVal -> constant
AnnValStr -> string
AnnVbar -> operator
AnnWhere -> keyword
Annlarrowtail -> operator
Annrarrowtail -> operator
AnnLarrowtail -> operator
AnnRarrowtail -> operator
AnnEofPos -> whitespace
encodeComment :: Located AnnotationComment -> (SourceSpan,Value)
encodeComment com =
encodeToken (getLoc com) comment
topLevel :: Seq (SourceSpan,Value)
topLevel = extractTokensFromAST (parsedSource parsedModule)
extractTokensFromAST :: Data a => a -> Seq (SourceSpan,Value)
extractTokensFromAST = gmapQl mappend mempty extractTokensFromAST
`extQ` encodeModuleName
`extQ` encodeType
`extQ` encodeTypeDecl
`extQ` encodeSignature
`extQ` encodeExpr
extractTokensFromSubTerms :: Data a => a -> Seq (SourceSpan,Value)
extractTokensFromSubTerms = gmapQl mappend mempty extractTokensFromAST
encode :: Text -> Located a -> Seq (SourceSpan,Value)
encode cat name = S.singleton $ encodeToken (getLoc name) cat
encodeModuleName :: Located ModuleName -> Seq (SourceSpan,Value)
encodeModuleName = encode identifier
encodeTypeDecl :: TyClDecl RdrName -> Seq (SourceSpan,Value)
encodeTypeDecl decl = encode typ $ case decl of
FamDecl famDecl -> fdLName famDecl
SynDecl name _ _ _ -> name
DataDecl name _ _ _ -> name
ClassDecl _ name _ _ _ _ _ _ _ _ -> name
encodeSignature :: Sig RdrName -> Seq (SourceSpan,Value)
encodeSignature sig = case sig of
TypeSig name t1 t2 -> (encode identifier =<< S.fromList name) >< extractTokensFromAST (t1,t2)
PatSynSig name t1 t2 t3 t4 -> encode identifier name >< extractTokensFromAST (t1,t2,t3,t4)
GenericSig name t -> (encode identifier =<< S.fromList name) >< extractTokensFromAST t
InlineSig name t -> encode identifier name >< extractTokensFromAST t
SpecSig name t1 t2 -> encode typ name >< extractTokensFromAST (t1,t2)
_ -> extractTokensFromSubTerms sig
encodeType :: Located (HsType RdrName) -> Seq (SourceSpan,Value)
encodeType = encode typ
encodeExpr :: Located (HsExpr RdrName) -> Seq (SourceSpan,Value)
encodeExpr exp = case unLoc exp of
HsVar {} -> encode identifier exp
HsLit {} -> encode constant exp
HsOverLit {} -> encode constant exp
_ -> extractTokensFromSubTerms exp
comment = "comment"
parenthesis = "parenthesis"
conditional = "conditional"
typ = "type"
delimiter = "delimiter"
string = "string"
structure = "structure"
identifier = "identifier"
constant = "constant"
meta = "meta"
statement = "statement"
keyword = "keyword"
operator = "operator"
character = "character"
whitespace = "whitespace"