diff --git a/README.md b/README.md
index 69556690..74848b91 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
[](https://www.npmjs.com/package/hyperparam)
[](https://github.com/hyparam/hyperparam-cli/actions)
[](https://opensource.org/licenses/MIT)
-
+
This is the hyperparam cli tool.
diff --git a/bin/serve.js b/bin/serve.js
index 2c7e947d..8b477d4d 100644
--- a/bin/serve.js
+++ b/bin/serve.js
@@ -327,9 +327,9 @@ function gzip(req, content) {
*/
function openUrl(url) {
switch (process.platform) {
- case 'darwin': exec(`open ${url}`); return
- case 'win32': exec(`start ${url}`); return
- case 'linux': exec(`xdg-open ${url}`); return
- default: throw new Error(`unsupported platform ${process.platform}`)
+ case 'darwin': exec(`open ${url}`); return
+ case 'win32': exec(`start ${url}`); return
+ case 'linux': exec(`xdg-open ${url}`); return
+ default: throw new Error(`unsupported platform ${process.platform}`)
}
}
diff --git a/eslint.config.js b/eslint.config.js
index 90cc3d25..5c2fe82b 100644
--- a/eslint.config.js
+++ b/eslint.config.js
@@ -46,7 +46,7 @@ export default typescript.config(
'eol-last': 'error',
eqeqeq: 'error',
'func-style': ['error', 'declaration'],
- indent: ['error', 2],
+ indent: ['error', 2, { SwitchCase: 1 }],
'no-constant-condition': 'off',
'no-extra-parens': 'error',
'no-multi-spaces': 'error',
diff --git a/package.json b/package.json
index 2afd16b1..8e2e6987 100644
--- a/package.json
+++ b/package.json
@@ -29,7 +29,7 @@
"scripts": {
"build:types": "tsc -b",
"build:lib": "vite build -c vite.lib.config.js",
- "build:app": "vite build -c vite.app.config.js",
+ "build:app": "vite build",
"build": "run-s build:lib build:types build:app",
"coverage": "vitest run -c vite.lib.config.js --coverage --coverage.include=src --coverage.include=bin",
"dev": "run-p -l watch:ts watch:vite watch:serve",
@@ -38,7 +38,7 @@
"prepublishOnly": "npm run build",
"serve": "node bin/cli.js",
"preserve": "npm run build",
- "test": "vitest run -c vite.lib.config.js",
+ "test": "vitest run",
"typecheck": "tsc --noEmit",
"url": "run-p -l watch:ts watch:vite watch:url",
"watch:ts": "tsc --watch",
diff --git a/src/components/Markdown.tsx b/src/components/Markdown.tsx
index 64a11a28..94dec57d 100644
--- a/src/components/Markdown.tsx
+++ b/src/components/Markdown.tsx
@@ -1,252 +1,434 @@
-import type { ReactNode } from 'react'
+import { ReactNode, createElement } from 'react'
interface MarkdownProps {
text: string
className?: string
}
-export default function Markdown({ text, className }: MarkdownProps) {
- // Inline parsing: parse bold, italic, underline, links, images, inline code
- function parseInline(str: string): ReactNode[] {
- const nodes: ReactNode[] = []
-
- // A helper function to safely parse inline and return an array of react nodes
- function renderTextSegments(text: string): ReactNode[] {
- let result: ReactNode[] = []
-
- // Process in order: image links, images, regular links, and then formatting
- const imageInsideLinkRegex = /\[!\[([^\]]*)\]\(([^)]+)\)\]\(([^)]+)\)/g
- // Handle mixed content within links: [text  more text](link_url)
- const mixedContentLinkRegex = /\[([^\]]*?!\[[^\]]*?\]\([^)]+?\)[^\]]*?)\]\(([^)]+)\)/g
- const imageRegex = /!\[([^\]]*)\]\(([^)]+)\)/g
- const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g
- const codeRegex = /`([^`]+)`/g
- const boldRegex = /\*\*([^*]+)\*\*/g
- const italicRegex = /\*(?!\s)([^*]+?)(?!\s)\*/g
- const underlineRegex = /__(.+?)__/g
-
- function applyRegex(
- currentText: ReactNode[],
- regex: RegExp,
- renderFn: (match: RegExpExecArray) => ReactNode
- ) {
- const newResult: ReactNode[] = []
- for (const segment of currentText) {
- if (typeof segment === 'string') {
- const str = segment
- let lastIndex = 0
- let match: RegExpExecArray | null
- regex.lastIndex = 0 // Reset regex for safety
- while ((match = regex.exec(str)) !== null) {
- // Add text before match
- if (match.index > lastIndex) {
- newResult.push(str.slice(lastIndex, match.index))
- }
- // Add replaced node
- newResult.push(renderFn(match))
- lastIndex = match.index + match[0].length
- }
- if (lastIndex < str.length) {
- newResult.push(str.slice(lastIndex))
- }
- } else {
- // If it's already a ReactNode (not a string), just push it
- newResult.push(segment)
- }
- }
- return newResult
- }
+type Token =
+ | { type: 'text', content: string }
+ | { type: 'bold', children: Token[] }
+ | { type: 'italic', children: Token[] }
+ | { type: 'code', content: string }
+ | { type: 'link', href: string, children: Token[] }
+ | { type: 'image', alt: string, src: string }
+ | { type: 'paragraph', children: Token[] }
+ | { type: 'heading', level: number, children: Token[] }
+ | { type: 'list', ordered: boolean, items: Token[][] }
+ | { type: 'blockquote', children: Token[] }
+ | { type: 'codeblock', language?: string, content: string }
+
+function parseMarkdown(text: string): Token[] {
+ const tokens: Token[] = []
+ const lines = text.split('\n')
+ let i = 0
- // Start with entire text as a single segment
- result = [text]
+ while (i < lines.length) {
+ const line = lines[i]
- // Apply in a specific order to handle nested elements:
- // First handle image-inside-link pattern
- result = applyRegex(result, imageInsideLinkRegex, (m) =>
-
- )
+ // Skip blank lines
+ if (line.trim() === '') {
+ i++
+ continue
+ }
- // Then handle mixed content links (with images and text)
- result = applyRegex(result, mixedContentLinkRegex, (m) => {parseInline(m[1])})
+ // Code fence at top-level
+ if (line.startsWith('```')) {
+ const language = line.slice(3).trim() || undefined
+ i++
+ const codeLines = []
+ while (i < lines.length && !lines[i].startsWith('```')) {
+ codeLines.push(lines[i])
+ i++
+ }
+ i++ // skip the closing ```
+ tokens.push({ type: 'codeblock', language, content: codeLines.join('\n') })
+ continue
+ }
- // Then handle regular images and links
- result = applyRegex(result, imageRegex, (m) =>
)
- result = applyRegex(result, linkRegex, (m) => {parseInline(m[1])})
+ // Heading
+ const headingMatch = /^(#{1,6})\s+(.*)/.exec(line)
+ if (headingMatch) {
+ const level = headingMatch[1].length
+ tokens.push({
+ type: 'heading',
+ level,
+ children: parseInline(headingMatch[2]),
+ })
+ i++
+ continue
+ }
- // Finally handle text formatting
- result = applyRegex(result, codeRegex, (m) =>
{m[1]})
- result = applyRegex(result, boldRegex, (m) => {m[1]})
- result = applyRegex(result, italicRegex, (m) => {m[1]})
- result = applyRegex(result, underlineRegex, (m) => {m[1]})
+ // List (ordered or unordered)
+ const listMatch = /^(\s*)([-*+]|\d+\.)\s+(.*)/.exec(line)
+ if (listMatch) {
+ const baseIndent = listMatch[1].length
+ const ordered = /^\d+\./.test(listMatch[2])
+ const [items, newIndex] = parseList(lines, i, baseIndent)
+ tokens.push({ type: 'list', ordered, items })
+ i = newIndex
+ continue
+ }
- return result
+ // Blockquote
+ if (line.startsWith('>')) {
+ const quoteLines = []
+ while (i < lines.length && lines[i].startsWith('>')) {
+ quoteLines.push(lines[i].replace(/^>\s?/, ''))
+ i++
+ }
+ tokens.push({
+ type: 'blockquote',
+ children: parseMarkdown(quoteLines.join('\n')),
+ })
+ continue
}
- nodes.push(...renderTextSegments(str))
- return nodes
+ // Paragraph
+ const paraLines = []
+ while (i < lines.length && lines[i].trim() !== '') {
+ paraLines.push(lines[i])
+ i++
+ }
+ tokens.push({
+ type: 'paragraph',
+ children: parseInline(paraLines.join(' ')),
+ })
}
- // Block-level parsing: paragraphs, headers, lists, code blocks
- type NodeType =
- | { type: 'paragraph', content: string }
- | { type: 'header', level: number, content: string }
- | { type: 'codeblock', content: string }
- | { type: 'list', ordered: boolean, items: ListItemType[] }
-
- interface ListItemType {
- content: string
- children: NodeType[]
- }
+ return tokens
+}
- function parseBlocks(lines: string[]): NodeType[] {
- let i = 0
- const nodes: NodeType[] = []
+function parseList(lines: string[], start: number, baseIndent: number): [Token[][], number] {
+ const items: Token[][] = []
+ let i = start
- function parseList(startIndent: number, ordered: boolean): { node: NodeType, endIndex: number } {
- const items: ListItemType[] = []
- while (i < lines.length) {
- const line = lines[i]
- const indent = /^(\s*)/.exec(line)?.[1].length ?? 0
+ while (i < lines.length) {
+ // End of list if blank line or no more lines
+ if (lines[i].trim() === '') {
+ i++
+ continue
+ }
- // Check if line is a list item at or deeper than startIndent
- const liMatch = ordered
- ? /^\s*\d+\.\s+(.*)/.exec(line)
- : /^\s*-\s+(.*)/.exec(line)
+ // This matches a new top-level bullet/number for the list
+ const match = /^(\s*)([-*+]|\d+\.)\s+(.*)/.exec(lines[i])
+ // If we don't find a bullet/number at the same indent, break out
+ if (!match || match[1].length !== baseIndent) {
+ break
+ }
- if (!liMatch || indent < startIndent) {
- break
- }
+ // Begin a new list item: an array of block tokens
+ const itemTokens: Token[] = []
+ // Add the first line content directly without paragraph wrapper
+ const content = match[3]
+ if (content.trim()) {
+ // Use inline tokens directly without paragraph wrapper
+ itemTokens.push(...parseInline(content))
+ }
+ i++
- const content = liMatch[1]
+ // Now parse subsequent indented lines as sub-items or sub-blocks
+ while (i < lines.length) {
+ const subline = lines[i]
+ if (subline.trim() === '') {
i++
-
- // Check if next lines form sub-lists or paragraphs under this item
- const children: NodeType[] = []
- while (i < lines.length) {
- const subline = lines[i]
- const subIndent = /^(\s*)/.exec(subline)?.[1].length ?? 0
- // Check for sub-list
- const subOlMatch = /^\s*\d+\.\s+(.*)/.exec(subline)
- const subUlMatch = /^\s*-\s+(.*)/.exec(subline)
- if ((subOlMatch || subUlMatch) && subIndent > startIndent) {
- const { node: sublist, endIndex } = parseList(subIndent, !!subOlMatch)
- children.push(sublist)
- i = endIndex
- } else if (subline.trim().length === 0 || subIndent > startIndent) {
- if (subline.trim().length !== 0) {
- // paragraph under item
- children.push({ type: 'paragraph', content: subline.trim() })
- }
+ continue
+ }
+ const subIndent = subline.search(/\S/)
+ if (subIndent > baseIndent) {
+ const trimmed = subline.trimStart()
+ if (trimmed.startsWith('```')) {
+ // If it’s a fenced code block, parse until closing fence
+ const language = trimmed.slice(3).trim() || undefined
+ i++
+ const codeLines = []
+ while (i < lines.length && !lines[i].trimStart().startsWith('```')) {
+ codeLines.push(lines[i])
i++
- } else {
- break
}
+ i++ // skip the closing ```
+ itemTokens.push({
+ type: 'codeblock',
+ language,
+ content: codeLines.join('\n'),
+ })
+ continue
}
- items.push({ content, children })
+ // Check for nested list
+ const sublistMatch = /^(\s*)([-*+]|\d+\.)\s+(.*)/.exec(subline)
+ if (sublistMatch && sublistMatch[1].length > baseIndent) {
+ const newBaseIndent = sublistMatch[1].length
+ const ordered = /^\d+\./.test(sublistMatch[2])
+ const [subItems, newIndex] = parseList(lines, i, newBaseIndent)
+ itemTokens.push({ type: 'list', ordered, items: subItems })
+ i = newIndex
+ } else {
+ // Otherwise, additional paragraph in the same list item
+ itemTokens.push({
+ type: 'paragraph',
+ children: parseInline(subline.trim()),
+ })
+ i++
+ }
+ } else {
+ // Not further-indented => break sub-block parsing for this item
+ break
}
-
- return { node: { type: 'list', ordered, items }, endIndex: i }
}
- while (i < lines.length) {
- const line = lines[i]
+ items.push(itemTokens)
+ }
+
+ return [items, i]
+}
+
+function tokensToString(tokens: Token[]): string {
+ return tokens
+ .map(token => {
+ switch (token.type) {
+ case 'text':
+ return token.content
+ case 'bold':
+ case 'italic':
+ case 'link':
+ return tokensToString(token.children)
+ default:
+ return ''
+ }
+ })
+ .join('')
+}
+
+function parseInline(text: string): Token[] {
+ const [tokens] = parseInlineRecursive(text)
+ return tokens
+}
+
+function parseInlineRecursive(text: string, stop?: string): [Token[], number] {
+ const tokens: Token[] = []
+ let i = 0
- // Skip blank lines
- if (line.trim() === '') {
+ while (i < text.length) {
+ if (stop && text.startsWith(stop, i)) {
+ return [tokens, i]
+ }
+
+ // Image: 
+ if (text[i] === '!' && i + 1 < text.length && text[i + 1] === '[') {
+ const start = i
+ i += 2
+ const [altTokens, consumedAlt] = parseInlineRecursive(text.slice(i), ']')
+ i += consumedAlt
+ if (i >= text.length || text[i] !== ']') {
+ // For incomplete image syntax without closing bracket, preserve the whole text
+ tokens.push({ type: 'text', content: text.slice(start, start + 2 + consumedAlt) })
+ continue
+ }
+ i++
+ if (i < text.length && text[i] === '(') {
i++
+ const endParen = text.indexOf(')', i)
+ if (endParen === -1) {
+ tokens.push({ type: 'text', content: text.slice(start, i) })
+ continue
+ }
+ const src = text.slice(i, endParen).trim()
+ const alt = tokensToString(altTokens)
+ i = endParen + 1
+ tokens.push({ type: 'image', alt, src })
+ continue
+ } else {
+ tokens.push({ type: 'text', content: '![' })
+ tokens.push(...altTokens)
+ tokens.push({ type: 'text', content: ']' })
continue
}
+ }
- // Check for code block
- if (line.trim().startsWith('```')) {
- i++
- const codeLines: string[] = []
- while (i < lines.length && !lines[i].trim().startsWith('```')) {
- codeLines.push(lines[i])
- i++
+ // Link: [text](url)
+ if (text[i] === '[') {
+ const start = i
+ i++
+ const [linkTextTokens, consumed] = parseInlineRecursive(text.slice(i), ']')
+ i += consumed
+ if (i >= text.length || text[i] !== ']') {
+ tokens.push({ type: 'text', content: text[start] })
+ continue
+ }
+ i++ // skip ']'
+ if (i < text.length && text[i] === '(') {
+ i++ // skip '('
+ const endParen = text.indexOf(')', i)
+ if (endParen === -1) {
+ tokens.push({ type: 'text', content: text.slice(start, i) })
+ continue
}
- i++ // skip the ending ```
- nodes.push({ type: 'codeblock', content: codeLines.join('\n') })
+ const href = text.slice(i, endParen).trim()
+ i = endParen + 1
+ tokens.push({
+ type: 'link',
+ href,
+ children: linkTextTokens,
+ })
+ continue
+ } else {
+ tokens.push({ type: 'text', content: '[' })
+ tokens.push(...linkTextTokens)
+ tokens.push({ type: 'text', content: ']' })
continue
}
+ }
- // Check for headers
- const headerMatch = /^(#{1,3})\s+(.*)/.exec(line)
- if (headerMatch) {
- const level = headerMatch[1].length
- const content = headerMatch[2]
- nodes.push({ type: 'header', level, content })
+ // Inline code
+ if (text[i] === '`') {
+ i++
+ let code = ''
+ while (i < text.length && text[i] !== '`') {
+ code += text[i]
i++
- continue
}
+ i++
+ tokens.push({ type: 'code', content: code })
+ continue
+ }
- // Check for list item
- const olMatch = /^\s*\d+\.\s+(.*)/.exec(line)
- const ulMatch = /^\s*-\s+(.*)/.exec(line)
+ // Bold (** or __)
+ if (text.startsWith('**', i) || text.startsWith('__', i)) {
+ const delimiter = text.slice(i, i + 2)
+ i += 2
+ const [innerTokens, consumed] = parseInlineRecursive(text.slice(i), delimiter)
+ i += consumed
+ i += 2
+ tokens.push({ type: 'bold', children: innerTokens })
+ continue
+ }
- if (olMatch || ulMatch) {
- const indent = /^(\s*)/.exec(line)?.[1].length ?? 0
- const ordered = !!olMatch
- const { node, endIndex } = parseList(indent, ordered)
- nodes.push(node)
- i = endIndex
- continue
+ // Italic (* or _)
+ if (text[i] === '*' || text[i] === '_') {
+ const delimiter = text[i]
+ // For '*' only: if surrounding non-space chars are digits, treat as literal
+ if (delimiter === '*') {
+ let j = i - 1
+ while (j >= 0 && text[j] === ' ') j--
+ const prevIsDigit = j >= 0 && /\d/.test(text[j])
+ let k = i + 1
+ while (k < text.length && text[k] === ' ') k++
+ const nextIsDigit = k < text.length && /\d/.test(text[k])
+ if (prevIsDigit && nextIsDigit) {
+ tokens.push({ type: 'text', content: delimiter })
+ i++
+ continue
+ }
}
-
- // If not code block, header, or list, treat as paragraph
- nodes.push({ type: 'paragraph', content: line })
i++
+ const [innerTokens, consumed] = parseInlineRecursive(text.slice(i), delimiter)
+ i += consumed
+ i++ // skip closing delimiter
+ tokens.push({ type: 'italic', children: innerTokens })
+ continue
}
- return nodes
+ // Otherwise, consume plain text until next special character or end
+ let j = i
+ while (
+ j < text.length
+ && text[j] !== '`'
+ && !(text.startsWith('**', j) || text.startsWith('__', j))
+ && text[j] !== '*'
+ && text[j] !== '_'
+ && text[j] !== '['
+ // && text[j] !== '!'
+ && !(stop && text.startsWith(stop, j))
+ // handle  for images but not for `text!`
+ && !(text[j] === '!' && j + 1 < text.length && text[j + 1] === '[')
+ ) {
+ j++
+ }
+ tokens.push({ type: 'text', content: text.slice(i, j) })
+ i = j
}
- const lines = text.split('\n')
- const ast = parseBlocks(lines)
+ return [tokens, i]
+}
- // Convert AST to React elements
- function renderNodes(nodes: NodeType[]): ReactNode {
- return nodes.map((node, idx) => {
- switch (node.type) {
+function renderTokens(tokens: Token[], keyPrefix = ''): ReactNode[] {
+ return tokens.map((token, index) => {
+ const key = `${keyPrefix}${index}`
+ switch (token.type) {
+ case 'text':
+ return token.content
+ case 'code':
+ return createElement('code', { key }, token.content)
+ case 'bold':
+ return createElement(
+ 'strong',
+ { key },
+ renderTokens(token.children, key + '-')
+ )
+ case 'italic':
+ return createElement(
+ 'em',
+ { key },
+ renderTokens(token.children, key + '-')
+ )
+ case 'link':
+ return createElement(
+ 'a',
+ { key, href: token.href },
+ renderTokens(token.children, key + '-')
+ )
+ case 'image':
+ return createElement('img', {
+ key,
+ src: token.src,
+ alt: token.alt,
+ })
case 'paragraph':
- return
{...parseInline(node.content)}
- case 'header': - if (node.level === 1) return{...parseInline(node.content)}
- case 'codeblock': - return ( -
- {node.content}
-
+ return createElement(
+ 'p',
+ { key },
+ renderTokens(token.children, key + '-')
)
- case 'list':
- if (node.ordered) {
- return (
- tags for simple lists
+ expect(container.querySelectorAll('p').length).toBe(0)
})
- it('renders an list with bold', () => {
+ it('renders a list with bold', () => {
const text = '- **Item 1**\n- Item 2\n- Item 3\n\n'
const { getByText } = render(
tags for paragraphs
+ expect(container.querySelectorAll('p').length).toBe(2)
+ })
+
+ it('renders an unterminated list', () => {
const text = '- Item 1'
const { getByText } = render(