Skip to content

Commit 53dca56

Browse files
committed
Merge remote-tracking branch 'upstream/master' into gpu-sampling
2 parents 0f17ccd + b1846f1 commit 53dca56

File tree

6 files changed

+213
-5
lines changed

6 files changed

+213
-5
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2501,9 +2501,11 @@ static void ggml_vk_wait_events(vk_context& ctx, std::vector<vk::Event>&& events
25012501
static constexpr uint32_t flash_attention_num_small_rows = 32;
25022502
static constexpr uint32_t scalar_flash_attention_num_small_rows = 1;
25032503

2504-
static uint32_t get_fa_scalar_num_large_rows(uint32_t hsv) {
2504+
static uint32_t get_fa_scalar_num_large_rows(uint32_t hsk, uint32_t hsv) {
25052505
if (hsv >= 192) {
25062506
return 2;
2507+
} else if ((hsv | hsk) & 8) {
2508+
return 4;
25072509
} else {
25082510
return 8;
25092511
}
@@ -2535,9 +2537,9 @@ static std::array<uint32_t, 2> fa_rows_cols(FaCodePath path, uint32_t hsk, uint3
25352537
if ((hsv | hsk) & 8) {
25362538
// HSV/HSK not being a multiple of 16 makes D_split smaller, which makes cols_per_iter
25372539
// larger, and Bc needs to be >= cols_per_thread. 64 is large enough, 32 is not.
2538-
return {get_fa_scalar_num_large_rows(hsv), 64};
2540+
return {get_fa_scalar_num_large_rows(hsk, hsv), 64};
25392541
} else {
2540-
return {get_fa_scalar_num_large_rows(hsv), 32};
2542+
return {get_fa_scalar_num_large_rows(hsk, hsv), 32};
25412543
}
25422544
}
25432545
}
@@ -7740,7 +7742,7 @@ static bool ggml_vk_flash_attn_scalar_shmem_support(const vk_device& device, con
77407742
// Needs to be kept up to date on shader changes
77417743
GGML_UNUSED(hsv);
77427744
const uint32_t wg_size = scalar_flash_attention_workgroup_size;
7743-
const uint32_t Br = get_fa_scalar_num_large_rows(hsv);
7745+
const uint32_t Br = get_fa_scalar_num_large_rows(hsk, hsv);
77447746
const uint32_t Bc = scalar_flash_attention_Bc;
77457747

77467748
const uint32_t tmpsh = wg_size * sizeof(float);
@@ -7871,7 +7873,7 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx
78717873
case FA_SCALAR:
78727874
case FA_COOPMAT1:
78737875
// We may switch from coopmat1 to scalar, so use the scalar limit for both
7874-
max_gqa = get_fa_scalar_num_large_rows(HSV);
7876+
max_gqa = get_fa_scalar_num_large_rows(HSK, HSV);
78757877
break;
78767878
case FA_COOPMAT2:
78777879
max_gqa = get_fa_num_small_rows(FA_COOPMAT2);

tests/test-backend-ops.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7859,6 +7859,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
78597859
}
78607860
}
78617861

7862+
// Qwen3-VL-8B https://github.com/ggml-org/llama.cpp/issues/17012
7863+
test_cases.emplace_back(new test_flash_attn_ext(72, 72, 16, {1, 1}, 5776, 5776, false, false, 0, 0, GGML_PREC_F32, GGML_TYPE_F16));
7864+
78627865
for (int kv : { 4096, 8192, 16384, }) {
78637866
for (int hs : { 64, 128, }) {
78647867
for (int nr : { 1, 4, }) {

tools/server/public/index.html.gz

-2.81 KB
Binary file not shown.

tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import rehypeKatex from 'rehype-katex';
99
import rehypeStringify from 'rehype-stringify';
1010
import { copyCodeToClipboard } from '$lib/utils/copy';
11+
import { rehypeRestoreTableHtml } from '$lib/markdown/table-html-restorer';
1112
import { preprocessLaTeX } from '$lib/utils/latex-protection';
1213
import { browser } from '$app/environment';
1314
import '$styles/katex-custom.scss';
@@ -60,6 +61,7 @@
6061
.use(remarkRehype) // Convert Markdown AST to rehype
6162
.use(rehypeKatex) // Render math using KaTeX
6263
.use(rehypeHighlight) // Add syntax highlighting
64+
.use(rehypeRestoreTableHtml) // Restore limited HTML (e.g., <br>, <ul>) inside Markdown tables
6365
.use(rehypeStringify); // Convert to HTML string
6466
});
6567
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/**
2+
* Matches <br>, <br/>, <br /> tags (case-insensitive).
3+
* Used to detect line breaks in table cell text content.
4+
*/
5+
export const BR_PATTERN = /<br\s*\/?\s*>/gi;
6+
7+
/**
8+
* Matches a complete <ul>...</ul> block.
9+
* Captures the inner content (group 1) for further <li> extraction.
10+
* Case-insensitive, allows multiline content.
11+
*/
12+
export const LIST_PATTERN = /^<ul>([\s\S]*)<\/ul>$/i;
13+
14+
/**
15+
* Matches individual <li>...</li> elements within a list.
16+
* Captures the inner content (group 1) of each list item.
17+
* Non-greedy to handle multiple consecutive items.
18+
* Case-insensitive, allows multiline content.
19+
*/
20+
export const LI_PATTERN = /<li>([\s\S]*?)<\/li>/gi;
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
/**
2+
* Rehype plugin to restore limited HTML elements inside Markdown table cells.
3+
*
4+
* ## Problem
5+
* The remark/rehype pipeline neutralizes inline HTML as literal text
6+
* (remarkLiteralHtml) so that XML/HTML snippets in LLM responses display
7+
* as-is instead of being rendered. This causes <br> and <ul> markup in
8+
* table cells to show as plain text.
9+
*
10+
* ## Solution
11+
* This plugin traverses the HAST post-conversion, parses whitelisted HTML
12+
* patterns from text nodes, and replaces them with actual HAST element nodes
13+
* that will be rendered as real HTML.
14+
*
15+
* ## Supported HTML
16+
* - `<br>` / `<br/>` / `<br />` - Line breaks (inline)
17+
* - `<ul><li>...</li></ul>` - Unordered lists (block)
18+
*
19+
* ## Key Implementation Details
20+
*
21+
* ### 1. Sibling Combination (Critical)
22+
* The Markdown pipeline may fragment content across multiple text nodes and `<br>`
23+
* elements. For example, `<ul><li>a</li></ul>` might arrive as:
24+
* - Text: `"<ul>"`
25+
* - Element: `<br>`
26+
* - Text: `"<li>a</li></ul>"`
27+
*
28+
* We must combine consecutive text nodes and `<br>` elements into a single string
29+
* before attempting to parse list markup. Without this, list detection fails.
30+
*
31+
* ### 2. visitParents for Deep Traversal
32+
* Table cell content may be wrapped in intermediate elements (e.g., `<p>` tags).
33+
* Using `visitParents` instead of direct child iteration ensures we find text
34+
* nodes at any depth within the cell.
35+
*
36+
* ### 3. Reference Comparison for No-Op Detection
37+
* When checking if `<br>` expansion changed anything, we compare:
38+
* `expanded.length !== 1 || expanded[0] !== textNode`
39+
*
40+
* This catches both cases:
41+
* - Multiple nodes created (text was split)
42+
* - Single NEW node created (original had only `<br>`, now it's an element)
43+
*
44+
* A simple `length > 1` check would miss the single `<br>` case.
45+
*
46+
* ### 4. Strict List Validation
47+
* `parseList()` rejects malformed markup by checking for garbage text between
48+
* `<li>` elements. This prevents creating broken DOM from partial matches like
49+
* `<ul>garbage<li>a</li></ul>`.
50+
*
51+
* ### 5. Newline Substitution for `<br>` in Combined String
52+
* When combining siblings, existing `<br>` elements become `\n` in the combined
53+
* string. This allows list content to span visual lines while still being parsed
54+
* as a single unit.
55+
*
56+
* @example
57+
* // Input Markdown:
58+
* // | Feature | Notes |
59+
* // |---------|-------|
60+
* // | Multi-line | First<br>Second |
61+
* // | List | <ul><li>A</li><li>B</li></ul> |
62+
* //
63+
* // Without this plugin: <br> and <ul> render as literal text
64+
* // With this plugin: <br> becomes line break, <ul> becomes actual list
65+
*/
66+
67+
import type { Plugin } from 'unified';
68+
import type { Element, ElementContent, Root, Text } from 'hast';
69+
import { visit } from 'unist-util-visit';
70+
import { visitParents } from 'unist-util-visit-parents';
71+
import { BR_PATTERN, LIST_PATTERN, LI_PATTERN } from '$lib/constants/table-html-restorer';
72+
73+
/**
74+
* Expands text containing `<br>` tags into an array of text nodes and br elements.
75+
*/
76+
function expandBrTags(value: string): ElementContent[] {
77+
const matches = [...value.matchAll(BR_PATTERN)];
78+
if (!matches.length) return [{ type: 'text', value } as Text];
79+
80+
const result: ElementContent[] = [];
81+
let cursor = 0;
82+
83+
for (const m of matches) {
84+
if (m.index! > cursor) {
85+
result.push({ type: 'text', value: value.slice(cursor, m.index) } as Text);
86+
}
87+
result.push({ type: 'element', tagName: 'br', properties: {}, children: [] } as Element);
88+
cursor = m.index! + m[0].length;
89+
}
90+
91+
if (cursor < value.length) {
92+
result.push({ type: 'text', value: value.slice(cursor) } as Text);
93+
}
94+
95+
return result;
96+
}
97+
98+
/**
99+
* Parses a `<ul><li>...</li></ul>` string into a HAST element.
100+
* Returns null if the markup is malformed or contains unexpected content.
101+
*/
102+
function parseList(value: string): Element | null {
103+
const match = value.trim().match(LIST_PATTERN);
104+
if (!match) return null;
105+
106+
const body = match[1];
107+
const items: ElementContent[] = [];
108+
let cursor = 0;
109+
110+
for (const liMatch of body.matchAll(LI_PATTERN)) {
111+
// Reject if there's non-whitespace between list items
112+
if (body.slice(cursor, liMatch.index!).trim()) return null;
113+
114+
items.push({
115+
type: 'element',
116+
tagName: 'li',
117+
properties: {},
118+
children: expandBrTags(liMatch[1] ?? '')
119+
} as Element);
120+
121+
cursor = liMatch.index! + liMatch[0].length;
122+
}
123+
124+
// Reject if no items found or trailing garbage exists
125+
if (!items.length || body.slice(cursor).trim()) return null;
126+
127+
return { type: 'element', tagName: 'ul', properties: {}, children: items } as Element;
128+
}
129+
130+
/**
131+
* Processes a single table cell, restoring HTML elements from text content.
132+
*/
133+
function processCell(cell: Element) {
134+
visitParents(cell, 'text', (textNode: Text, ancestors) => {
135+
const parent = ancestors[ancestors.length - 1];
136+
if (!parent || parent.type !== 'element') return;
137+
138+
const parentEl = parent as Element;
139+
const siblings = parentEl.children as ElementContent[];
140+
const startIndex = siblings.indexOf(textNode as ElementContent);
141+
if (startIndex === -1) return;
142+
143+
// Combine consecutive text nodes and <br> elements into one string
144+
let combined = '';
145+
let endIndex = startIndex;
146+
147+
for (let i = startIndex; i < siblings.length; i++) {
148+
const sib = siblings[i];
149+
if (sib.type === 'text') {
150+
combined += (sib as Text).value;
151+
endIndex = i;
152+
} else if (sib.type === 'element' && (sib as Element).tagName === 'br') {
153+
combined += '\n';
154+
endIndex = i;
155+
} else {
156+
break;
157+
}
158+
}
159+
160+
// Try parsing as list first (replaces entire combined range)
161+
const list = parseList(combined);
162+
if (list) {
163+
siblings.splice(startIndex, endIndex - startIndex + 1, list);
164+
return;
165+
}
166+
167+
// Otherwise, just expand <br> tags in this text node
168+
const expanded = expandBrTags(textNode.value);
169+
if (expanded.length !== 1 || expanded[0] !== textNode) {
170+
siblings.splice(startIndex, 1, ...expanded);
171+
}
172+
});
173+
}
174+
175+
export const rehypeRestoreTableHtml: Plugin<[], Root> = () => (tree) => {
176+
visit(tree, 'element', (node: Element) => {
177+
if (node.tagName === 'td' || node.tagName === 'th') {
178+
processCell(node);
179+
}
180+
});
181+
};

0 commit comments

Comments
 (0)