@@ -168,6 +168,11 @@ fn value(self: *Parser, gpa: Allocator) ParseError!Node.OptionalIndex {
168168 self .token_it .seekBy (-1 );
169169 return self .listBracketed (gpa );
170170 },
171+ .block_literal , .block_folded = > {
172+ // block scalar
173+ self .token_it .seekBy (-1 );
174+ return self .blockScalar (gpa );
175+ },
171176 else = > return .none ,
172177 }
173178}
@@ -574,6 +579,197 @@ fn leafValue(self: *Parser, gpa: Allocator) ParseError!Node.OptionalIndex {
574579 return error .MalformedYaml ;
575580}
576581
582+ fn blockScalar (self : * Parser , gpa : Allocator ) ParseError ! Node.OptionalIndex {
583+ const node_index : Node.Index = @enumFromInt (try self .nodes .addOne (gpa ));
584+ const node_start = self .token_it .pos ;
585+
586+ // Get the block indicator (| or >)
587+ const indicator_tok = self .token_it .next () orelse return error .UnexpectedEof ;
588+ const is_literal = indicator_tok .id == .block_literal ;
589+
590+ log .debug ("(block_scalar) begin {s}@{d}" , .{ @tagName (indicator_tok .id ), node_start });
591+
592+ // The parent indentation is where the key was
593+ // We need to find the indentation of the line containing the block indicator
594+ // For simplicity, we'll track back to find the key's column
595+ var parent_col : usize = 0 ;
596+ if (@intFromEnum (node_start ) > 0 ) {
597+ var check_pos : usize = @intFromEnum (node_start );
598+ while (check_pos > 0 ) {
599+ check_pos -= 1 ;
600+ const check_tok = self .tokens .items (.token )[check_pos ];
601+ if (check_tok .id == .new_line ) {
602+ // Found the previous line, now find first non-space token on current line
603+ var line_pos = check_pos + 1 ;
604+ while (line_pos < @intFromEnum (node_start )) : (line_pos += 1 ) {
605+ const line_tok = self .tokens .items (.token )[line_pos ];
606+ if (line_tok .id != .space and line_tok .id != .tab ) {
607+ parent_col = self .getCol (@enumFromInt (line_pos ));
608+ break ;
609+ }
610+ }
611+ break ;
612+ }
613+ }
614+ }
615+
616+ log .debug ("(block_scalar) parent_col = {d}" , .{parent_col });
617+
618+ // Skip optional chomping indicator and/or indentation indicator
619+ // For simplicity, we'll just skip any literal that immediately follows
620+ self .eatCommentsAndSpace (&.{ .new_line });
621+ _ = self .eatToken (.literal , &.{ .new_line , .comment });
622+
623+ // Expect newline after block indicator
624+ self .eatCommentsAndSpace (&.{});
625+ const next_tok = self .token_it .peek ();
626+ if (next_tok == null or next_tok .? .id != .new_line ) {
627+ // Must have a newline after block indicator
628+ if (next_tok ) | tok | {
629+ log .debug ("(block_scalar) expected newline but got {s}" , .{@tagName (tok .id )});
630+ }
631+ }
632+ _ = self .eatToken (.new_line , &.{});
633+
634+ // Determine base indentation from first content line
635+ var base_indent : ? u32 = null ;
636+ var content_start : ? Token.Index = null ;
637+ var content_end : Token.Index = node_start ;
638+
639+ // Collect all indented lines
640+ while (self .token_it .peek ()) | tok | {
641+ switch (tok .id ) {
642+ .space , .tab = > {
643+ // Could be indentation - advance
644+ content_end = self .token_it .pos ;
645+ _ = self .token_it .next ();
646+ },
647+ .new_line = > {
648+ // Empty line or end of line - include it
649+ content_end = self .token_it .pos ;
650+ _ = self .token_it .next ();
651+ },
652+ .comment = > {
653+ // Comments at parent level or less end the block scalar
654+ const comment_col = self .getCol (self .token_it .pos );
655+ if (comment_col <= parent_col ) {
656+ break ;
657+ }
658+ // Otherwise include the comment in content
659+ content_end = self .token_it .pos ;
660+ _ = self .token_it .next ();
661+ },
662+ .doc_start , .doc_end , .eof = > {
663+ // Document markers end the block scalar
664+ break ;
665+ },
666+ else = > {
667+ // Any other token could be content
668+ const line_col = self .getCol (self .token_it .pos );
669+
670+ // First content line establishes the base indentation
671+ if (base_indent == null ) {
672+ base_indent = @intCast (line_col );
673+ content_start = self .token_it .pos ;
674+ log .debug ("(block_scalar) base_indent = {d}" , .{base_indent .? });
675+ }
676+
677+ // If indentation is less than or equal to parent, we've reached the end
678+ if (line_col <= parent_col ) {
679+ log .debug ("(block_scalar) ending: line_col={d} <= parent_col={d}" , .{line_col , parent_col });
680+ break ;
681+ }
682+
683+ content_end = self .token_it .pos ;
684+ _ = self .token_it .next ();
685+ },
686+ }
687+ }
688+
689+ // If no content was found, return empty string
690+ if (content_start == null ) {
691+ self .nodes .set (@intFromEnum (node_index ), .{
692+ .tag = .string_value ,
693+ .scope = .{
694+ .start = node_start ,
695+ .end = content_end ,
696+ },
697+ .data = .{ .string = .{ .index = @enumFromInt (0 ), .len = 0 } },
698+ });
699+ return node_index .toOptional ();
700+ }
701+
702+ // Extract the raw text
703+ const raw = self .rawString (content_start .? , content_end );
704+
705+ // Process the content based on type
706+ var result_bytes : std .ArrayListUnmanaged (u8 ) = .empty ;
707+ defer result_bytes .deinit (gpa );
708+
709+ // Split into lines and process
710+ var lines = std .mem .splitScalar (u8 , raw , '\n ' );
711+ var first_line = true ;
712+
713+ while (lines .next ()) | line | {
714+ // Strip base indentation
715+ const stripped = if (base_indent ) | bi | blk : {
716+ var count : u32 = 0 ;
717+ var i : usize = 0 ;
718+ while (i < line .len and count < bi ) : (i += 1 ) {
719+ if (line [i ] == ' ' ) {
720+ count += 1 ;
721+ } else if (line [i ] == '\t ' ) {
722+ count += 1 ;
723+ } else {
724+ break ;
725+ }
726+ }
727+ break :blk line [i .. ];
728+ } else line ;
729+
730+ if (! first_line ) {
731+ if (is_literal ) {
732+ // Literal style: preserve line breaks
733+ try result_bytes .append (gpa , '\n ' );
734+ } else {
735+ // Folded style: replace with space (simplified)
736+ if (result_bytes .items .len > 0 and stripped .len > 0 ) {
737+ try result_bytes .append (gpa , ' ' );
738+ }
739+ }
740+ }
741+ first_line = false ;
742+
743+ try result_bytes .appendSlice (gpa , stripped );
744+ }
745+
746+ // Add final newline for literal style
747+ if (is_literal and result_bytes .items .len > 0 ) {
748+ try result_bytes .append (gpa , '\n ' );
749+ }
750+
751+ // Store the string
752+ const string_index : u32 = @intCast (self .string_bytes .items .len );
753+ try self .string_bytes .appendSlice (gpa , result_bytes .items );
754+
755+ const node_end = content_end ;
756+ log .debug ("(block_scalar) end content: {s}" , .{result_bytes .items });
757+
758+ self .nodes .set (@intFromEnum (node_index ), .{
759+ .tag = .string_value ,
760+ .scope = .{
761+ .start = node_start ,
762+ .end = node_end ,
763+ },
764+ .data = .{ .string = .{
765+ .index = @enumFromInt (string_index ),
766+ .len = @intCast (result_bytes .items .len ),
767+ } },
768+ });
769+
770+ return node_index .toOptional ();
771+ }
772+
577773fn eatCommentsAndSpace (self : * Parser , comptime exclusions : []const Token.Id ) void {
578774 log .debug ("eatCommentsAndSpace" , .{});
579775 outer : while (self .token_it .next ()) | tok | {
0 commit comments