66module Nixfmt.Lexer (lexeme , pushTrivia , takeTrivia , whole ) where
77
88import Control.Monad.State.Strict (MonadState , evalStateT , get , modify , put )
9- import Data.Char (isSpace )
9+ import Data.Char (isAlphaNum , isSpace )
1010import Data.List (dropWhileEnd )
1111import Data.Maybe (fromMaybe )
1212import Data.Text as Text (
1313 Text ,
14+ all ,
1415 isPrefixOf ,
1516 length ,
1617 lines ,
@@ -29,6 +30,7 @@ import Data.Void (Void)
2930import Nixfmt.Types (
3031 Ann (.. ),
3132 Parser ,
33+ Token (TDoubleQuote , TDoubleSingleQuote ),
3234 TrailingComment (.. ),
3335 Trivia ,
3436 Trivium (.. ),
@@ -43,9 +45,11 @@ import Text.Megaparsec (
4345 chunk ,
4446 getSourcePos ,
4547 hidden ,
48+ lookAhead ,
4649 many ,
4750 manyTill ,
4851 notFollowedBy ,
52+ optional ,
4953 some ,
5054 try ,
5155 unPos ,
@@ -59,6 +63,8 @@ data ParseTrivium
5963 PTLineComment Text Pos
6064 | -- Track whether it is a doc comment
6165 PTBlockComment Bool [Text ]
66+ | -- | Language annotation like /* lua */ (single line, non-doc)
67+ PTLanguageAnnotation Text
6268 deriving (Show )
6369
6470preLexeme :: Parser a -> Parser a
@@ -148,6 +154,7 @@ convertLeading =
148154 PTBlockComment _ [] -> []
149155 PTBlockComment False [c] -> [LineComment $ " " <> strip c]
150156 PTBlockComment isDoc cs -> [BlockComment isDoc cs]
157+ PTLanguageAnnotation c -> [LanguageAnnotation c]
151158 )
152159
153160isTrailing :: ParseTrivium -> Bool
@@ -156,17 +163,93 @@ isTrailing (PTBlockComment False []) = True
156163isTrailing (PTBlockComment False [_]) = True
157164isTrailing _ = False
158165
159- convertTrivia :: [ParseTrivium ] -> Pos -> (Maybe TrailingComment , Trivia )
160- convertTrivia pts nextCol =
166+ -- Check if a text is a valid language identifier for language annotations
167+ isLanguageIdentifier :: Text -> Bool
168+ isLanguageIdentifier content =
169+ let stripped = strip content
170+ in not (Text. null stripped)
171+ && Text. length stripped <= 30 -- TODO: make configurable or remove limit
172+ && Text. all (\ c -> isAlphaNum c || c `elem` [' -' , ' +' , ' .' , ' _' , ' $' , ' {' , ' }' ]) stripped
173+
174+ -- Check if next token is a string literal
175+ isStringToken :: Maybe Token -> Bool
176+ isStringToken (Just TDoubleQuote ) = True
177+ isStringToken (Just TDoubleSingleQuote ) = True
178+ isStringToken _ = False
179+
180+ -- Convert a single block comment to language annotation if it matches criteria
181+ toLangAnnotation :: Text -> Maybe Token -> Maybe ParseTrivium
182+ toLangAnnotation content nextToken
183+ | isStringToken nextToken && isLanguageIdentifier content =
184+ Just (PTLanguageAnnotation (strip content))
185+ | otherwise = Nothing
186+
187+ convertTrivia :: [ParseTrivium ] -> Pos -> Maybe Token -> (Maybe TrailingComment , Trivia )
188+ convertTrivia pts nextCol nextToken =
161189 let (trailing, leading) = span isTrailing pts
162- in case (trailing, leading) of
190+ (trailing', leading') = processTrailing trailing leading
191+ leading'' = case trailing' of
192+ [] | not (Prelude. null trailing) -> leading' -- trailing was converted, don't process leading
193+ _ -> processLeading leading' -- process leading normally
194+ in case (trailing', leading'') of
163195 -- Special case: if the trailing comment visually forms a block with the start of the following line,
164196 -- then treat it like part of those comments instead of a distinct trailing comment.
165197 -- This happens especially often after `{` or `[` tokens, where the comment of the first item
166198 -- starts on the same line ase the opening token.
167- ([PTLineComment _ pos], (PTNewlines 1 ) : (PTLineComment _ pos') : _) | pos == pos' -> (Nothing , convertLeading pts)
168- ([PTLineComment _ pos], [PTNewlines 1 ]) | pos == nextCol -> (Nothing , convertLeading pts)
169- _ -> (convertTrailing trailing, convertLeading leading)
199+ ([PTLineComment _ pos], (PTNewlines 1 ) : (PTLineComment _ pos') : _)
200+ | pos == pos' -> (Nothing , convertLeading pts)
201+ ([PTLineComment _ pos], [PTNewlines 1 ])
202+ | pos == nextCol -> (Nothing , convertLeading pts)
203+ _ -> (convertTrailing trailing', convertLeading leading'')
204+ where
205+ hasLineComment = Prelude. any (\ case PTLineComment {} -> True ; _ -> False )
206+ hasLangCandidate = Prelude. any (\ case PTBlockComment False [c] -> isLanguageIdentifier c; _ -> False )
207+
208+ -- Convert the rightmost qualifying block comment to language annotation
209+ convertLastBlockToLang triviaList = go (reverse triviaList) []
210+ where
211+ go [] _ = Nothing
212+ go (PTBlockComment False [content] : rest) processed
213+ | Just langAnnotation <- toLangAnnotation content nextToken =
214+ Just (reverse rest ++ processed, langAnnotation)
215+ go (t : rest) processed = go rest (t : processed)
216+
217+ -- Process trailing trivia for language annotations
218+ processTrailing trailing leading
219+ -- Single trailing block comment before string
220+ | [PTBlockComment False [content]] <- trailing,
221+ Just langAnnotation <- toLangAnnotation content nextToken =
222+ ([] , langAnnotation : leading)
223+ -- Multiple trailing comments before string (no line comments)
224+ | isStringToken nextToken && not (hasLineComment trailing) && hasLangCandidate trailing,
225+ Just (newTrailing, langAnnotation) <- convertLastBlockToLang trailing =
226+ (newTrailing, langAnnotation : leading)
227+ -- No conversion needed
228+ | otherwise = (trailing, leading)
229+
230+ -- Process leading trivia for language annotations
231+ processLeading leading
232+ -- First item is convertible block comment
233+ | PTBlockComment False [content] : rest <- leading,
234+ Just langAnnotation <- toLangAnnotation content nextToken =
235+ langAnnotation : rest
236+ -- Find convertible comment deeper in the list
237+ | Just (newLeading, langAnnotation) <- convertLastBlockToLang leading =
238+ newLeading ++ [langAnnotation]
239+ -- No conversion needed
240+ | otherwise = leading
241+
242+ -- Parser to peek at the next token type without consuming input
243+ parseNextTokenType :: Parser Token
244+ parseNextTokenType = do
245+ -- Skip any trivia that might appear before the next token
246+ _ <- many (hidden $ lineComment <|> blockComment <|> newlines)
247+ -- Skip any remaining whitespace
248+ _ <- manyP (\ x -> isSpace x && x /= ' \n ' && x /= ' \r ' )
249+ TDoubleQuote
250+ <$ chunk " \" "
251+ <|> TDoubleSingleQuote
252+ <$ chunk " ''"
170253
171254trivia :: Parser [ParseTrivium ]
172255trivia = many $ hidden $ lineComment <|> blockComment <|> newlines
@@ -188,7 +271,11 @@ lexeme p = do
188271 parsedTrivia <- trivia
189272 -- This is the position of the next lexeme after the currently parsed one
190273 SourcePos {sourceColumn = col} <- getSourcePos
191- let (trailing, nextLeading) = convertTrivia parsedTrivia col
274+
275+ -- Add lookahead for next token
276+ nextToken <- optional (try $ lookAhead $ preLexeme parseNextTokenType)
277+
278+ let (trailing, nextLeading) = convertTrivia parsedTrivia col nextToken
192279 pushTrivia nextLeading
193280 return $
194281 Ann
0 commit comments