Skip to content

Commit 461ff10

Browse files
committed
Starting out language annotation support implementation
Initial support for language annotations like `/* lua */` that should remain as block comments when directly preceding string literals, while other block comments get converted to line comments. - Detect language annotations: single-line, non-doc comments with valid language identifiers - Preserve as `/* lang */` block comment syntax instead of converting to `# lang` line comments - Works with both regular strings `"..."` and indented strings `''...''`
1 parent 149c060 commit 461ff10

File tree

5 files changed

+104
-12
lines changed

5 files changed

+104
-12
lines changed

src/Nixfmt/Lexer.hs

Lines changed: 95 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
module Nixfmt.Lexer (lexeme, pushTrivia, takeTrivia, whole) where
77

88
import Control.Monad.State.Strict (MonadState, evalStateT, get, modify, put)
9-
import Data.Char (isSpace)
9+
import Data.Char (isAlphaNum, isSpace)
1010
import Data.List (dropWhileEnd)
1111
import Data.Maybe (fromMaybe)
1212
import Data.Text as Text (
1313
Text,
14+
all,
1415
isPrefixOf,
1516
length,
1617
lines,
@@ -29,6 +30,7 @@ import Data.Void (Void)
2930
import Nixfmt.Types (
3031
Ann (..),
3132
Parser,
33+
Token (TDoubleQuote, TDoubleSingleQuote),
3234
TrailingComment (..),
3335
Trivia,
3436
Trivium (..),
@@ -43,9 +45,11 @@ import Text.Megaparsec (
4345
chunk,
4446
getSourcePos,
4547
hidden,
48+
lookAhead,
4649
many,
4750
manyTill,
4851
notFollowedBy,
52+
optional,
4953
some,
5054
try,
5155
unPos,
@@ -59,6 +63,8 @@ data ParseTrivium
5963
PTLineComment Text Pos
6064
| -- Track whether it is a doc comment
6165
PTBlockComment Bool [Text]
66+
| -- | Language annotation like /* lua */ (single line, non-doc)
67+
PTLanguageAnnotation Text
6268
deriving (Show)
6369

6470
preLexeme :: Parser a -> Parser a
@@ -148,6 +154,7 @@ convertLeading =
148154
PTBlockComment _ [] -> []
149155
PTBlockComment False [c] -> [LineComment $ " " <> strip c]
150156
PTBlockComment isDoc cs -> [BlockComment isDoc cs]
157+
PTLanguageAnnotation c -> [LanguageAnnotation c]
151158
)
152159

153160
isTrailing :: ParseTrivium -> Bool
@@ -156,17 +163,93 @@ isTrailing (PTBlockComment False []) = True
156163
isTrailing (PTBlockComment False [_]) = True
157164
isTrailing _ = False
158165

159-
convertTrivia :: [ParseTrivium] -> Pos -> (Maybe TrailingComment, Trivia)
160-
convertTrivia pts nextCol =
166+
-- Check if a text is a valid language identifier for language annotations
167+
isLanguageIdentifier :: Text -> Bool
168+
isLanguageIdentifier content =
169+
let stripped = strip content
170+
in not (Text.null stripped)
171+
&& Text.length stripped <= 30 -- TODO: make configurable or remove limit
172+
&& Text.all (\c -> isAlphaNum c || c `elem` ['-', '+', '.', '_', '$', '{', '}']) stripped
173+
174+
-- Check if next token is a string literal
175+
isStringToken :: Maybe Token -> Bool
176+
isStringToken (Just TDoubleQuote) = True
177+
isStringToken (Just TDoubleSingleQuote) = True
178+
isStringToken _ = False
179+
180+
-- Convert a single block comment to language annotation if it matches criteria
181+
toLangAnnotation :: Text -> Maybe Token -> Maybe ParseTrivium
182+
toLangAnnotation content nextToken
183+
| isStringToken nextToken && isLanguageIdentifier content =
184+
Just (PTLanguageAnnotation (strip content))
185+
| otherwise = Nothing
186+
187+
convertTrivia :: [ParseTrivium] -> Pos -> Maybe Token -> (Maybe TrailingComment, Trivia)
188+
convertTrivia pts nextCol nextToken =
161189
let (trailing, leading) = span isTrailing pts
162-
in case (trailing, leading) of
190+
(trailing', leading') = processTrailing trailing leading
191+
leading'' = case trailing' of
192+
[] | not (Prelude.null trailing) -> leading' -- trailing was converted, don't process leading
193+
_ -> processLeading leading' -- process leading normally
194+
in case (trailing', leading'') of
163195
-- Special case: if the trailing comment visually forms a block with the start of the following line,
164196
-- then treat it like part of those comments instead of a distinct trailing comment.
165197
-- This happens especially often after `{` or `[` tokens, where the comment of the first item
166198
-- starts on the same line ase the opening token.
167-
([PTLineComment _ pos], (PTNewlines 1) : (PTLineComment _ pos') : _) | pos == pos' -> (Nothing, convertLeading pts)
168-
([PTLineComment _ pos], [PTNewlines 1]) | pos == nextCol -> (Nothing, convertLeading pts)
169-
_ -> (convertTrailing trailing, convertLeading leading)
199+
([PTLineComment _ pos], (PTNewlines 1) : (PTLineComment _ pos') : _)
200+
| pos == pos' -> (Nothing, convertLeading pts)
201+
([PTLineComment _ pos], [PTNewlines 1])
202+
| pos == nextCol -> (Nothing, convertLeading pts)
203+
_ -> (convertTrailing trailing', convertLeading leading'')
204+
where
205+
hasLineComment = Prelude.any (\case PTLineComment{} -> True; _ -> False)
206+
hasLangCandidate = Prelude.any (\case PTBlockComment False [c] -> isLanguageIdentifier c; _ -> False)
207+
208+
-- Convert the rightmost qualifying block comment to language annotation
209+
convertLastBlockToLang triviaList = go (reverse triviaList) []
210+
where
211+
go [] _ = Nothing
212+
go (PTBlockComment False [content] : rest) processed
213+
| Just langAnnotation <- toLangAnnotation content nextToken =
214+
Just (reverse rest ++ processed, langAnnotation)
215+
go (t : rest) processed = go rest (t : processed)
216+
217+
-- Process trailing trivia for language annotations
218+
processTrailing trailing leading
219+
-- Single trailing block comment before string
220+
| [PTBlockComment False [content]] <- trailing,
221+
Just langAnnotation <- toLangAnnotation content nextToken =
222+
([], langAnnotation : leading)
223+
-- Multiple trailing comments before string (no line comments)
224+
| isStringToken nextToken && not (hasLineComment trailing) && hasLangCandidate trailing,
225+
Just (newTrailing, langAnnotation) <- convertLastBlockToLang trailing =
226+
(newTrailing, langAnnotation : leading)
227+
-- No conversion needed
228+
| otherwise = (trailing, leading)
229+
230+
-- Process leading trivia for language annotations
231+
processLeading leading
232+
-- First item is convertible block comment
233+
| PTBlockComment False [content] : rest <- leading,
234+
Just langAnnotation <- toLangAnnotation content nextToken =
235+
langAnnotation : rest
236+
-- Find convertible comment deeper in the list
237+
| Just (newLeading, langAnnotation) <- convertLastBlockToLang leading =
238+
newLeading ++ [langAnnotation]
239+
-- No conversion needed
240+
| otherwise = leading
241+
242+
-- Parser to peek at the next token type without consuming input
243+
parseNextTokenType :: Parser Token
244+
parseNextTokenType = do
245+
-- Skip any trivia that might appear before the next token
246+
_ <- many (hidden $ lineComment <|> blockComment <|> newlines)
247+
-- Skip any remaining whitespace
248+
_ <- manyP (\x -> isSpace x && x /= '\n' && x /= '\r')
249+
TDoubleQuote
250+
<$ chunk "\""
251+
<|> TDoubleSingleQuote
252+
<$ chunk "''"
170253

171254
trivia :: Parser [ParseTrivium]
172255
trivia = many $ hidden $ lineComment <|> blockComment <|> newlines
@@ -188,7 +271,11 @@ lexeme p = do
188271
parsedTrivia <- trivia
189272
-- This is the position of the next lexeme after the currently parsed one
190273
SourcePos{sourceColumn = col} <- getSourcePos
191-
let (trailing, nextLeading) = convertTrivia parsedTrivia col
274+
275+
-- Add lookahead for next token
276+
nextToken <- optional (try $ lookAhead $ preLexeme parseNextTokenType)
277+
278+
let (trailing, nextLeading) = convertTrivia parsedTrivia col nextToken
192279
pushTrivia nextLeading
193280
return $
194281
Ann

src/Nixfmt/Pretty.hs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ instance Pretty TrailingComment where
8686
instance Pretty Trivium where
8787
pretty EmptyLine = emptyline
8888
pretty (LineComment c) = comment ("#" <> c) <> hardline
89+
pretty (LanguageAnnotation lang) = comment ("/* " <> lang <> " */") <> hardspace
8990
pretty (BlockComment isDoc c) =
9091
comment (if isDoc then "/**" else "/*")
9192
<> hardline
@@ -109,6 +110,8 @@ prettyItems (Items items) = sepBy hardline items
109110

110111
instance Pretty [Trivium] where
111112
pretty [] = mempty
113+
-- Special case: if trivia consists only of a single language annotation, render it inline without a preceding hardline
114+
pretty [langAnnotation@(LanguageAnnotation _)] = pretty langAnnotation
112115
pretty trivia = hardline <> hcat trivia
113116

114117
instance (Pretty a) => Pretty (Ann a) where

src/Nixfmt/Types.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ data Trivium
7272
| -- Multi-line comments with /* or /**. Multiple # comments are treated as a list of `LineComment`.
7373
-- The bool indicates a doc comment (/**)
7474
BlockComment Bool [Text]
75+
| -- | Language annotation comments like /* lua */ that should remain as block comments before strings
76+
LanguageAnnotation Text
7577
deriving (Eq, Show)
7678

7779
type Trivia = [Trivium]

test/diff/language-annotation/out-pure.nix

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@
4242
";
4343

4444
# Multiple block comments in sequence
45-
sequentialComments = # first second
46-
''
45+
sequentialComments = # first
46+
/* second */ ''
4747
some content
4848
'';
4949

test/diff/language-annotation/out.nix

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@
4242
";
4343

4444
# Multiple block comments in sequence
45-
sequentialComments = # first second
46-
''
45+
sequentialComments = # first
46+
/* second */ ''
4747
some content
4848
'';
4949

0 commit comments

Comments
 (0)