Skip to content

Commit 9afa8f2

Browse files
committed
Add Java API for getting correct markdown parts from descriptions
1 parent a760c77 commit 9afa8f2

File tree

11 files changed

+496
-19
lines changed

11 files changed

+496
-19
lines changed

core/esmf-aspect-meta-model-interface/src/main/java/org/eclipse/esmf/metamodel/HasDescription.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import java.util.List;
1818
import java.util.Locale;
1919
import java.util.Set;
20+
import java.util.stream.Collectors;
2021

2122
import org.eclipse.esmf.metamodel.datatype.LangString;
2223

@@ -78,4 +79,11 @@ default String getDescription( final Locale locale ) {
7879
return getDescription( Locale.ENGLISH );
7980
} );
8081
}
82+
83+
default Set<String> getDescriptions( final Locale locale ) {
84+
return getDescriptions().stream()
85+
.filter( description -> description.getLanguageTag().equals( locale ) )
86+
.map( LangString::getValue )
87+
.collect( Collectors.toSet() );
88+
}
8189
}

core/esmf-aspect-meta-model-java/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@
6363
<artifactId>record-builder-processor</artifactId>
6464
<scope>provided</scope>
6565
</dependency>
66+
<dependency>
67+
<groupId>org.commonmark</groupId>
68+
<artifactId>commonmark</artifactId>
69+
<version>0.24.0</version>
70+
</dependency>
6671

6772
<!-- Test dependencies -->
6873
<dependency>
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
/*
2+
* Copyright (c) 2025 Robert Bosch Manufacturing Solutions GmbH
3+
*
4+
* See the AUTHORS file(s) distributed with this work for additional
5+
* information regarding authorship.
6+
*
7+
* This Source Code Form is subject to the terms of the Mozilla Public
8+
* License, v. 2.0. If a copy of the MPL was not distributed with this
9+
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
10+
*
11+
* SPDX-License-Identifier: MPL-2.0
12+
*/
13+
14+
package org.eclipse.esmf.aspectmodel.utils;
15+
16+
import java.util.ArrayList;
17+
import java.util.List;
18+
import java.util.Set;
19+
import java.util.regex.Matcher;
20+
import java.util.regex.Pattern;
21+
22+
/**
23+
* Utility class for extracting and rendering structured content blocks (such as NOTE, EXAMPLE, SOURCE)
24+
* from SAMM-compliant Markdown descriptions.
25+
* <p>
26+
* This class supports parsing multi-line Markdown-style input and extracting semantically significant
27+
* sections such as {@code > NOTE: ...}, {@code > EXAMPLE: ...}, and {@code > SOURCE: ...}.
28+
* These blocks can be retrieved as plain text or rendered into HTML using {@link MarkdownHtmlRenderer}.
29+
*/
30+
public class DescriptionsUtils {
31+
32+
private DescriptionsUtils() {
33+
}
34+
35+
/**
36+
* A regex pattern used to identify special SAMM-style Markdown blocks.
37+
* Matches lines beginning with {@code > NOTE:}, {@code > EXAMPLE:}, or {@code > SOURCE:},
38+
* optionally followed by a number (e.g., {@code > EXAMPLE 2: ...}).
39+
*/
40+
private static final Pattern BLOCK_PATTERN = Pattern.compile(
41+
"^>\\s*(NOTE|EXAMPLE|SOURCE)(\\s+\\d+)?:\\s*(.*)",
42+
Pattern.CASE_INSENSITIVE
43+
);
44+
45+
/**
46+
* Extracts all {@code NOTE} blocks from the given set of Markdown description strings.
47+
*
48+
* @param descriptions A set of multi-line Markdown descriptions.
49+
* @return A list of extracted NOTE block contents.
50+
*/
51+
public static List<String> notes( final Set<String> descriptions ) {
52+
return extractBlock( descriptions, "NOTE" );
53+
}
54+
55+
/**
56+
* Extracts all {@code EXAMPLE} blocks from the given set of Markdown description strings.
57+
*
58+
* @param descriptions A set of multi-line Markdown descriptions.
59+
* @return A list of extracted EXAMPLE block contents.
60+
*/
61+
public static List<String> examples( final Set<String> descriptions ) {
62+
return extractBlock( descriptions, "EXAMPLE" );
63+
}
64+
65+
/**
66+
* Extracts all {@code SOURCE} blocks from the given set of Markdown description strings.
67+
*
68+
* @param descriptions A set of multi-line Markdown descriptions.
69+
* @return A list of extracted SOURCE block contents.
70+
*/
71+
public static List<String> sources( final Set<String> descriptions ) {
72+
return extractBlock( descriptions, "SOURCE" );
73+
}
74+
75+
/**
76+
* Renders the given set of Markdown description strings into semantic HTML.
77+
* Uses {@link MarkdownHtmlRenderer} to process both special blocks and general Markdown syntax.
78+
*
79+
* @param descriptions A set of Markdown description strings.
80+
* @return The HTML representation of the combined input.
81+
*/
82+
public static String toHtml( final Set<String> descriptions ) {
83+
return MarkdownHtmlRenderer.renderHtmlFromDescriptions( descriptions );
84+
}
85+
86+
/**
87+
* Extracts all blocks of a specified type (e.g., NOTE, EXAMPLE, SOURCE) from a set of Markdown strings.
88+
* <p>
89+
* Each block is expected to begin with a {@code > TYPE:} line and may span multiple lines,
90+
* each of which begins with {@code >}.
91+
*
92+
* @param descriptions A set of multi-line Markdown description strings.
93+
* @param type The type of block to extract ("NOTE", "EXAMPLE", or "SOURCE").
94+
* @return A list of extracted block contents for the specified type.
95+
*/
96+
private static List<String> extractBlock( final Set<String> descriptions, final String type ) {
97+
List<String> result = new ArrayList<>();
98+
for ( String desc : descriptions ) {
99+
extractFromDescription( desc, type, result );
100+
}
101+
return result;
102+
}
103+
104+
private static void extractFromDescription( final String desc, final String type, final List<String> result ) {
105+
String[] lines = desc.split( "\\R" );
106+
boolean[] insideBlock = { false };
107+
StringBuilder blockContent = new StringBuilder();
108+
109+
for ( String line : lines ) {
110+
handleLine( line, type, insideBlock, blockContent, result );
111+
}
112+
113+
if ( insideBlock[0] && !blockContent.isEmpty() ) {
114+
result.add( blockContent.toString().strip() );
115+
}
116+
}
117+
118+
private static void handleLine( final String line, final String type, boolean[] insideBlock,
119+
StringBuilder blockContent, List<String> result ) {
120+
Matcher matcher = BLOCK_PATTERN.matcher( line );
121+
if ( matcher.find() ) {
122+
String currentType = matcher.group( 1 ).toUpperCase();
123+
String content = matcher.group( 3 ); // Corrected: group(3) is the actual content
124+
125+
flushBlock( insideBlock, blockContent, result );
126+
127+
if ( currentType.equals( type.toUpperCase() ) ) {
128+
blockContent.append( content ).append( "\n" );
129+
insideBlock[0] = true;
130+
} else {
131+
insideBlock[0] = false;
132+
}
133+
} else if ( insideBlock[0] && line.startsWith( ">" ) ) {
134+
blockContent.append( line.substring( 1 ).stripLeading() ).append( "\n" );
135+
} else if ( insideBlock[0] ) {
136+
flushBlock( insideBlock, blockContent, result );
137+
}
138+
}
139+
140+
private static void flushBlock( boolean[] insideBlock, StringBuilder blockContent, List<String> result ) {
141+
if ( insideBlock[0] && !blockContent.isEmpty() ) {
142+
result.add( blockContent.toString().strip() );
143+
blockContent.setLength( 0 );
144+
insideBlock[0] = false;
145+
}
146+
}
147+
}
148+
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
/*
2+
* Copyright (c) 2025 Robert Bosch Manufacturing Solutions GmbH
3+
*
4+
* See the AUTHORS file(s) distributed with this work for additional
5+
* information regarding authorship.
6+
*
7+
* This Source Code Form is subject to the terms of the Mozilla Public
8+
* License, v. 2.0. If a copy of the MPL was not distributed with this
9+
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
10+
*
11+
* SPDX-License-Identifier: MPL-2.0
12+
*/
13+
14+
package org.eclipse.esmf.aspectmodel.utils;
15+
16+
import java.util.ArrayList;
17+
import java.util.LinkedHashMap;
18+
import java.util.List;
19+
import java.util.Map;
20+
import java.util.Set;
21+
import java.util.regex.Matcher;
22+
import java.util.regex.Pattern;
23+
import java.util.stream.Collectors;
24+
25+
import org.commonmark.node.Node;
26+
import org.commonmark.parser.Parser;
27+
import org.commonmark.renderer.html.HtmlRenderer;
28+
29+
/**
30+
* A utility class for converting SAMM-flavored Markdown descriptions into HTML.
31+
* <p>
32+
* This renderer supports a limited subset of Markdown syntax and introduces
33+
* custom processing for specific annotated blocks commonly used in SAMM descriptions,
34+
* such as {@code > NOTE: ...}, {@code > EXAMPLE: ...}, and {@code > SOURCE: ...}.
35+
* These blocks are extracted and rendered into semantically meaningful HTML
36+
* structures (e.g., {@code <div class="note">}, {@code <ul class="example-list">}, etc.).
37+
* Remaining content is rendered using the CommonMark parser.
38+
*/
39+
public class MarkdownHtmlRenderer {
40+
41+
private static final String CLOSE_DIV_TAG = "</div>";
42+
43+
/**
44+
* A reusable CommonMark parser instance for processing standard Markdown syntax.
45+
*/
46+
private static final Parser PARSER = Parser.builder().build();
47+
48+
/**
49+
* A reusable CommonMark HTML renderer instance.
50+
*/
51+
private static final HtmlRenderer RENDERER = HtmlRenderer.builder().build();
52+
53+
/**
54+
* Private constructor to prevent instantiation. This class is intended to be used statically.
55+
*/
56+
private MarkdownHtmlRenderer() {
57+
}
58+
59+
/**
60+
* Converts a set of multi-line Markdown descriptions into a single HTML string.
61+
* Each entry in the set is processed independently and merged in the resulting output.
62+
*
63+
* @param descriptions A set of Markdown description blocks to render.
64+
* @return Combined HTML output representing all given descriptions.
65+
*/
66+
public static String renderHtmlFromDescriptions( final Set<String> descriptions ) {
67+
StringBuilder result = new StringBuilder();
68+
for ( String desc : descriptions ) {
69+
result.append( processSpecialBlocks( desc ) ).append( "\n" );
70+
}
71+
return result.toString();
72+
}
73+
74+
/**
75+
* Parses a single Markdown block:
76+
* <ul>
77+
* <li>Identifies and extracts special block types: NOTE, EXAMPLE, and SOURCE</li>
78+
* <li>Renders those blocks using custom HTML wrappers</li>
79+
* <li>Processes the remaining Markdown using the CommonMark renderer</li>
80+
* </ul>
81+
*
82+
* @param rawMarkdown The full Markdown string to process.
83+
* @return The rendered HTML output.
84+
*/
85+
private static String processSpecialBlocks( final String rawMarkdown ) {
86+
String[] lines = stripLines( rawMarkdown );
87+
StringBuilder markdownBuffer = new StringBuilder();
88+
Map<String, List<String>> specialBlocks = collectSpecialBlocks( lines, markdownBuffer );
89+
90+
StringBuilder html = new StringBuilder();
91+
specialBlocks.forEach( ( type, items ) -> html.append( renderSpecialBlock( type, items ) ) );
92+
93+
Node parsed = PARSER.parse( markdownBuffer.toString() );
94+
html.append( RENDERER.render( parsed ) );
95+
return html.toString();
96+
}
97+
98+
/**
99+
* Renders a list of extracted special blocks into HTML.
100+
* <p>
101+
* - For {@code NOTE} and {@code SOURCE}, each entry is rendered in a {@code <div>} with a matching class.<br>
102+
* - For {@code EXAMPLE}, a single example is rendered as a {@code <div>}; multiple examples as a {@code <ul>}.
103+
*
104+
* @param type The type of the special block (e.g., "NOTE", "EXAMPLE", "SOURCE").
105+
* @param items The list of block contents for that type.
106+
* @return The rendered HTML string for the block.
107+
*/
108+
private static String renderSpecialBlock( final String type, final List<String> items ) {
109+
if ( items.isEmpty() ) {
110+
return "";
111+
}
112+
113+
return switch ( type ) {
114+
case "NOTE", "SOURCE" -> items.stream()
115+
.map( text -> "<div class=\"" + type.toLowerCase() + "\">"
116+
+ renderMarkdownInline( text.strip() ) + CLOSE_DIV_TAG + "\n" )
117+
.collect( Collectors.joining() );
118+
119+
case "EXAMPLE" -> {
120+
if ( items.size() == 1 ) {
121+
yield "<div class=\"example\">" + renderMarkdownInline( items.get( 0 ).strip() ) + CLOSE_DIV_TAG + "\n";
122+
} else {
123+
StringBuilder sb = new StringBuilder( "<ul class=\"example-list\">\n" );
124+
for ( String item : items ) {
125+
sb.append( "<li>" ).append( renderMarkdownInline( item.strip() ) ).append( "</li>\n" );
126+
}
127+
sb.append( "</ul>\n" );
128+
yield sb.toString();
129+
}
130+
}
131+
132+
default -> items.stream()
133+
.map( text -> "<div class=\"block\">" + renderMarkdownInline( text.strip() ) + CLOSE_DIV_TAG + "\n" )
134+
.collect( Collectors.joining() );
135+
};
136+
}
137+
138+
/**
139+
* Collects all special block entries (NOTE, EXAMPLE, SOURCE) from the input lines.
140+
* Lines not belonging to special blocks are appended to the {@code markdownBuffer}.
141+
*
142+
* @param lines Stripped lines from the raw markdown block.
143+
* @param markdownBuffer Buffer to store non-special markdown content.
144+
* @return A map of special block types to their associated content.
145+
*/
146+
private static Map<String, List<String>> collectSpecialBlocks( final String[] lines, final StringBuilder markdownBuffer ) {
147+
Pattern pattern = Pattern.compile( "^>\\s*(NOTE|EXAMPLE|SOURCE)(\\s+\\d+)?:\\s*(.*)", Pattern.CASE_INSENSITIVE );
148+
Map<String, List<String>> specialBlocks = new LinkedHashMap<>();
149+
150+
String currentType = null;
151+
StringBuilder block = new StringBuilder();
152+
153+
for ( String line : lines ) {
154+
Matcher matcher = pattern.matcher( line );
155+
if ( matcher.find() ) {
156+
flushBlock( currentType, block, specialBlocks );
157+
currentType = matcher.group( 1 ).toUpperCase();
158+
block.append( matcher.group( 3 ) ).append( "\n" );
159+
} else if ( currentType != null && line.startsWith( ">" ) ) {
160+
block.append( line.substring( 1 ).stripLeading() ).append( "\n" );
161+
} else {
162+
flushBlock( currentType, block, specialBlocks );
163+
currentType = null;
164+
markdownBuffer.append( line ).append( "\n" );
165+
}
166+
}
167+
168+
flushBlock( currentType, block, specialBlocks );
169+
return specialBlocks;
170+
}
171+
172+
/**
173+
* Flushes the current block to the target map if non-empty.
174+
*
175+
* @param currentType The type of block being collected.
176+
* @param block The current content buffer for the block.
177+
* @param target The target map of blocks.
178+
*/
179+
private static void flushBlock( final String currentType, final StringBuilder block, final Map<String, List<String>> target ) {
180+
if ( currentType != null && !block.isEmpty() ) {
181+
target.computeIfAbsent( currentType, k -> new ArrayList<>() ).add( block.toString().strip() );
182+
block.setLength( 0 );
183+
}
184+
}
185+
186+
/**
187+
* Splits the raw markdown string into lines and strips leading whitespace from each line.
188+
*
189+
* @param rawMarkdown The original multi-line markdown string.
190+
* @return An array of trimmed lines.
191+
*/
192+
private static String[] stripLines( final String rawMarkdown ) {
193+
String[] rawLines = rawMarkdown.split( "\\R", -1 );
194+
String[] lines = new String[rawLines.length];
195+
for ( int i = 0; i < rawLines.length; i++ ) {
196+
lines[i] = rawLines[i].stripLeading();
197+
}
198+
return lines;
199+
}
200+
201+
/**
202+
* Renders a single markdown line (inline) to HTML using CommonMark.
203+
* This is used for special blocks (e.g., NOTE/EXAMPLE/SOURCE) where
204+
* markdown is allowed but not block-level structure.
205+
*
206+
* @param text Markdown content.
207+
* @return HTML output as string.
208+
*/
209+
private static String renderMarkdownInline( final String text ) {
210+
Node node = PARSER.parse( text );
211+
return RENDERER.render( node ).trim();
212+
}
213+
}
214+
215+

0 commit comments

Comments
 (0)