Skip to content

Commit 09afe0e

Browse files
committed
Minor QoL tweaks and improvements
- Better parsing of an item's textual component: tag stripping, ignoring text modifiers (strong, emphasis, ...), whitespace normalization - Ignore directories based on "ignore" directive in metdata file
1 parent 7b2f0b1 commit 09afe0e

File tree

16 files changed

+140
-130
lines changed

16 files changed

+140
-130
lines changed

src/parse.ts

Lines changed: 78 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
import type { Parent, Node, Yaml, ListItem, Text, Heading } from 'mdast';
3-
import type { TagMap, Task, Worklog, ParseContext, ParseFileContext, InternalTagMap, ParsedHeading } from './types.js';
3+
import type { TagMap, Task, Worklog, ParseContext, ParseFileContext } from './types.js';
44

55
import { readdir, readFile, stat } from 'node:fs/promises';
66
import { resolve, relative } from 'node:path';
@@ -15,116 +15,95 @@ import { gfmFromMarkdown } from 'mdast-util-gfm';
1515
import { frontmatterFromMarkdown } from 'mdast-util-frontmatter';
1616

1717
import { extractTagsFromText, extractTagsFromYaml } from './tags.js';
18-
import { FOLDER_META_FILE } from './utils.js';
18+
import { FOLDER_META_FILE, joinMergeWhitespace, normalizeWhitespace, SPACE } from './utils.js';
1919

2020
const WL_REGEXP = /^WL:(\d{1,2}(?:\.\d{1,2})?)[hH]\s/;
2121

22-
const isListNodeWorklog = (node: ListItem): boolean => {
23-
const paragraph = node.children[0];
24-
if (!paragraph || paragraph.type !== 'paragraph') {
25-
return false;
22+
const collectTextDepthFirst = (root: Node | undefined, acc: string = ''): string => {
23+
if (!root) {
24+
return acc;
2625
}
27-
const worklog = paragraph.children[0];
28-
if (!worklog || worklog.type !== 'text') {
29-
return false;
26+
if (root.type === 'text') {
27+
return joinMergeWhitespace(acc, normalizeWhitespace((root as Text).value));
28+
}
29+
if ('children' in root) {
30+
return joinMergeWhitespace(acc, (root as Parent).children.map(child => collectTextDepthFirst(child, acc)).join(SPACE));
3031
}
31-
return WL_REGEXP.test(worklog.value);
32+
return acc;
3233
};
3334

34-
const trimTextNodeText = (text: string) => {
35-
return text.trim()
36-
.replaceAll(/\r?\n/g, ' ')
37-
.replace(/\s+/, ' ');
38-
};
39-
40-
const parseTextNode = (node: Text, ctx: ParseFileContext, curr_task: Task | null, curr_wlog: Worklog | null) => {
41-
if (curr_wlog) {
42-
let match;
43-
if (!('text' in curr_wlog.internal_tags) && (match = node.value.match(WL_REGEXP))) {
44-
const [full, hours] = match;
45-
const text = trimTextNodeText(node.value.slice(full.length))
46-
curr_wlog.internal_tags.hours = hours;
47-
curr_wlog.internal_tags.text = text;
48-
extractTagsFromText(text, curr_wlog.tags);
49-
} else {
50-
extractTagsFromText(node.value, curr_wlog.tags);
51-
}
52-
}
53-
if (curr_task) {
54-
if (!('text' in curr_task.internal_tags)) {
55-
const text = trimTextNodeText(node.value);
56-
curr_task.internal_tags.text = text;
57-
extractTagsFromText(text, curr_task.tags);
58-
} else {
59-
extractTagsFromText(node.value, curr_task.tags);
35+
const parseListItemNode = (node: ListItem, ctx: ParseFileContext, item: Task | Worklog | null) => {
36+
if (!item) {
37+
const text = collectTextDepthFirst(node);
38+
if (typeof node.checked === 'boolean') {
39+
const tags: TagMap = {
40+
...ctx.tags,
41+
...ctx.heading?.tags,
42+
line: String(node.position!.start.line),
43+
checked: String(node.checked),
44+
};
45+
tags.text = extractTagsFromText(text, tags);
46+
Object.assign(tags, ctx.internal_tags);
47+
ctx.tasks.add({ type: 'task', tags, file: ctx.file, worklogs: [] });
48+
return;
49+
}
50+
const wl_match = text.match(WL_REGEXP);
51+
if (wl_match) {
52+
const [full, hours] = wl_match;
53+
const tags: TagMap = {
54+
...ctx.tags,
55+
...ctx.heading?.tags,
56+
hours,
57+
line: String(node.position!.start.line),
58+
};
59+
tags.text = extractTagsFromText(text.slice(full.length), tags);
60+
Object.assign(tags, ctx.internal_tags);
61+
ctx.worklogs.add({ type: 'wlog', tags, file: ctx.file, task: item });
62+
return;
6063
}
6164
}
65+
parseParentNode(node, ctx, item);
6266
};
6367

64-
65-
const parseListItemNode = (node: ListItem, ctx: ParseFileContext, curr_task: Task | null, curr_wlog: Worklog | null) => {
66-
if (!curr_task && typeof node.checked === 'boolean') {
67-
const tags: TagMap = { ...ctx.tags };
68-
const internal_tags: InternalTagMap = {
69-
...ctx.internal_tags,
70-
...ctx.curr_heading?.tags,
71-
line: String(node.position!.start.line),
72-
checked: String(node.checked),
73-
};
74-
const task: Task = { tags, internal_tags, file: ctx.file, worklogs: [] };
75-
parseParentNode(node as Parent, ctx, task, curr_wlog);
76-
Object.assign(tags, internal_tags);
77-
ctx.tasks.add(task);
78-
} else if (!curr_wlog && isListNodeWorklog(node)) {
79-
const tags: TagMap = { ...ctx.tags };
80-
const internal_tags: TagMap = {
81-
...ctx.internal_tags,
82-
...ctx.curr_heading?.tags,
83-
line: String(node.position!.start.line),
84-
};
85-
const worklog: Worklog = { tags, internal_tags, file: ctx.file, task: curr_task };
86-
parseParentNode(node as Parent, ctx, curr_task, worklog);
87-
Object.assign(tags, internal_tags);
88-
ctx.worklogs.add(worklog);
89-
} else {
90-
parseParentNode(node, ctx, curr_task, curr_wlog);
91-
}
92-
};
93-
94-
const parseParentNode = (node: Parent, ctx: ParseFileContext, curr_task: Task | null, curr_wlog: Worklog | null) => {
68+
const parseParentNode = (node: Parent, ctx: ParseFileContext, item: Task | Worklog | null) => {
9569
node.children.forEach((node) => {
96-
parseNode(node, ctx, curr_task, curr_wlog);
70+
parseNode(node, ctx, item);
9771
});
9872
};
9973

100-
const parseHeadingNode = (node: Heading, ctx: ParseFileContext, curr_task: Task | null, curr_wlog: Worklog | null) => {
101-
let parent = ctx.curr_heading;
74+
const parseHeadingNode = (node: Heading, ctx: ParseFileContext, item: Task | Worklog | null) => {
75+
let parent = ctx.heading;
10276
while (parent && parent.depth > node.depth) {
10377
parent = parent.parent;
10478
}
10579
const tags = parent ? { ...parent.tags } : {};
106-
const text = trimTextNodeText((node.children[0] as Text).value);
80+
const text = collectTextDepthFirst(node);
10781
extractTagsFromText(text, tags);
108-
ctx.curr_heading = { depth: node.depth, tags, parent };
82+
ctx.heading = { depth: node.depth, tags, parent };
10983
};
11084

111-
const parseNode = (node: Node, ctx: ParseFileContext, curr_task: Task | null, curr_wlog: Worklog | null) => {
85+
const parseYamlNode = (node: Yaml, ctx: ParseFileContext, item: Task | Worklog | null) => {
86+
try {
87+
extractTagsFromYaml((node as Yaml).value, ctx.tags);
88+
} catch (err) {
89+
throw new Error(`could not parse YAML front-matter in file ${ctx.file}: ${(err as Error).message}`);
90+
}
91+
};
92+
93+
const parseNode = (node: Node, ctx: ParseFileContext, item: Task | Worklog | null) => {
11294
switch (node.type) {
11395
case 'yaml':
114-
extractTagsFromYaml((node as Yaml).value, ctx.tags);
96+
parseYamlNode(node as Yaml, ctx, item);
11597
break;
11698
case 'listItem':
117-
parseListItemNode(node as ListItem, ctx, curr_task, curr_wlog);
118-
break;
119-
case 'text':
120-
parseTextNode(node as Text, ctx, curr_task, curr_wlog);
99+
parseListItemNode(node as ListItem, ctx, item);
121100
break;
122101
case 'heading':
123-
parseHeadingNode(node as Heading, ctx, curr_task, curr_wlog);
102+
parseHeadingNode(node as Heading, ctx, item);
124103
break;
125104
default:
126105
if ('children' in node) {
127-
parseParentNode(node as Parent, ctx, curr_task, curr_wlog);
106+
parseParentNode(node as Parent, ctx, item);
128107
}
129108
}
130109
};
@@ -154,27 +133,31 @@ export const parseFile = async (ctx: ParseFileContext) => {
154133
if (date_match) {
155134
ctx.tags['date'] = date_match[1].replaceAll('-', '');
156135
}
157-
parseNode(root_node, ctx, null, null);
136+
parseNode(root_node, ctx, null);
158137
} catch (err) {
159138
if ((err as any).code !== 'ENOENT') {
160139
throw err;
161140
}
162141
}
163142
};
164143

165-
const readFolderMetadata = async (ctx: ParseContext, dir_path: string): Promise<TagMap | undefined> => {
144+
const readFolderMetadata = async (ctx: ParseContext, dir_path: string): Promise<{ tags: TagMap, ignore: boolean }> => {
145+
const target_path = resolve(dir_path, FOLDER_META_FILE);
146+
const tags: TagMap = {};
166147
try {
167-
const target_path = resolve(dir_path, FOLDER_META_FILE);
168-
const data: any = load(await readFile(target_path, 'utf8'));
148+
const data: any = load(await readFile(target_path, 'utf8'));
169149
if (typeof data.tags === 'object' && data.tags !== null) {
170-
return Object.fromEntries(Object.entries(data.tags).map(([k, v]) => [k, String(v)]));
150+
Object.entries(data.tags).forEach(([k, v]) => {
151+
tags[k] = String(v);
152+
});
171153
}
154+
return { tags, ignore: !!data.ignore };
172155
} catch (err) {
173156
if ((err as any).code !== 'ENOENT') {
174-
throw err;
157+
throw new Error(`could not parse folder metadata file ${target_path}: ${err as Error}.message`);
175158
}
159+
return { tags, ignore: false };
176160
}
177-
return undefined;
178161
};
179162

180163
const parseFolderHelper = async (ctx: ParseContext, target_path: string) => {
@@ -190,20 +173,20 @@ const parseFolderHelper = async (ctx: ParseContext, target_path: string) => {
190173
},
191174
});
192175
} else if (target_stats.isDirectory()) {
193-
const folder_tags = await readFolderMetadata(ctx, target_path);
194-
if (folder_tags) {
176+
const { tags, ignore } = await readFolderMetadata(ctx, target_path);
177+
if (!ignore) {
195178
ctx = {
196179
...ctx,
197180
tags: {
198181
...ctx.tags,
199-
...folder_tags,
182+
...tags,
200183
},
201184
};
202-
}
203-
const child_names = await readdir(target_path);
204-
for (const child_name of child_names) {
205-
const child_path = resolve(target_path, child_name);
206-
await parseFolderHelper(ctx, child_path);
185+
const child_names = await readdir(target_path);
186+
for (const child_name of child_names) {
187+
const child_path = resolve(target_path, child_name);
188+
await parseFolderHelper(ctx, child_path);
189+
}
207190
}
208191
}
209192
};

src/tags.ts

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,21 @@ import type { TagMap, TagSortExpression, TagFilterExpression, Item } from './typ
44
import { isMatch } from 'matcher';
55
import { load } from 'js-yaml';
66

7-
import { isNullish } from './utils.js';
7+
import { isNullish, joinMergeWhitespace } from './utils.js';
88

9-
const TAG_SEARCH_REGEXP = /\#([a-z0-9]+)(?:\(([^),]+)\))/gi;
10-
const TAG_CHECK_REGEXP = /([a-z0-9]+)(?:\(([^),]+)\))/i;
9+
export const TAG_SEARCH_REGEXP = /\#([a-z0-9]+)(?:\(([^),]+)\))/gi;
10+
export const TAG_CHECK_REGEXP = /([a-z0-9]+)(?:\(([^),]+)\))/i;
1111

1212
export const extractTagsFromText = (raw: string, tags: TagMap) => {
13-
for (const [, key, value] of raw.matchAll(TAG_SEARCH_REGEXP)) {
14-
tags[key] = value ?? 'true';
13+
let offset = 0;
14+
let stripped = '';
15+
for (const match of raw.matchAll(TAG_SEARCH_REGEXP)) {
16+
tags[match[1]] = match[2] ?? 'true';
17+
stripped = joinMergeWhitespace(stripped, raw.slice(offset, match.index));
18+
offset += match.index + match[0].length;
1519
}
20+
stripped = joinMergeWhitespace(stripped, raw.slice(offset));
21+
return stripped;
1622
};
1723

1824
export const extractTagsFromYaml = (raw: string, tags: TagMap) => {
@@ -61,7 +67,7 @@ export const compileTagSortExpressions = (exprs: TagSortExpression[]): ItemCompa
6167
};
6268
};
6369

64-
const FILTER_REGEXP = /^([!*$^<>]?=|[<>]|is|not)\s*([a-z0-9*]+)$/i
70+
const FILTER_REGEXP = /^([!*$^<>]?=|[<>]|is|not)\s*([a-z0-9*_\-]+)$/i
6571

6672
export const parseTagFilterExpressions = (raw: string): TagFilterExpression[] => {
6773
return raw.split(',').map((raw_substr) => {

src/test-runner.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ for (const item of await readdir(tests_dir_path)) {
4444
const this_test_path = resolve(tests_dir_path, item);
4545
const test_info = JSON.parse(await readFile(resolve(this_test_path, 'test-info.json'), 'utf8'));
4646
const stdout = await readFile(resolve(this_test_path, 'test-stdout'), 'utf8');
47-
tests.push({ ...test_info, stdout, path: this_test_path });
47+
tests.push({ ...test_info, description: `${item}: ${test_info.description}`, stdout, path: this_test_path });
4848
}
4949

5050
describe('taskparser', () => {

src/types.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,18 @@ export type InternalTag = 'hours' | 'checked' | 'file' | 'line' | 'text';
66
export type InternalTagMap = Partial<Record<InternalTag, string>>;
77

88
export interface Item {
9+
type: string;
910
tags: TagMap;
10-
internal_tags: InternalTagMap;
1111
file: string;
1212
}
1313

1414
export interface Task extends Item {
15+
type: 'task';
1516
worklogs: Worklog[];
1617
}
1718

1819
export interface Worklog extends Item {
20+
type: 'wlog';
1921
task: Task | null;
2022
}
2123

@@ -62,6 +64,6 @@ export interface ParsedHeading {
6264
export interface ParseFileContext extends ParseContext {
6365
file: string;
6466
tags: TagMap;
65-
curr_heading?: ParsedHeading;
67+
heading?: ParsedHeading;
6668
internal_tags: InternalTagMap;
6769
}

src/utils.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,13 @@ export const isNullish = (v: any) => {
44
};
55

66
export const FOLDER_META_FILE = '.taskparser.yaml';
7+
8+
export const SPACE = ' ';
9+
10+
export const normalizeWhitespace = (text: string) => {
11+
return text.trim().replaceAll(/\s+/g, SPACE);
12+
};
13+
14+
export const joinMergeWhitespace = (a: string, b: string) => {
15+
return `${a.trim()}${SPACE}${b.trim()}`.trim();
16+
};

tests/003-inline-tags/test-stdout

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
text | date | foo
2-
---- | ---- | ---
3-
a pending task #foo(bar) | 20240101 | bar
4-
a completed task | 20240101 |
1+
text | date | foo
2+
---- | ---- | ---
3+
a pending task | 20240101 | bar
4+
a completed task | 20240101 |
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
text | date | foo
2-
---- | ---- | ---
3-
a pending task #foo(baz) | 20240101 | baz
4-
a completed task | 20240101 | bar
1+
text | date | foo
2+
---- | ---- | ---
3+
a pending task | 20240101 | baz
4+
a completed task | 20240101 | bar
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
text | file | line
2-
---- | ---- | ----
3-
a pending task #file(baz) | 20240101-overriding-internal-tags-not-allowed.md | 5
4-
a completed task #done(false) | 20240101-overriding-internal-tags-not-allowed.md | 6
1+
text | file | line
2+
---- | ---- | ----
3+
a pending task | 20240101-overriding-internal-tags-not-allowed.md | 5
4+
a completed task | 20240101-overriding-internal-tags-not-allowed.md | 6
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
text | h | file | foo
2-
---- | - | ---- | ---
3-
first worklog #foo(baz) | 2 | 20241024-worklog-with-tags.md | baz
4-
second worklog | 3 | 20241024-worklog-with-tags.md | bar
5-
third worklog | 1 | 20241024-worklog-with-tags.md | bin
1+
text | h | file | foo
2+
---- | - | ---- | ---
3+
first worklog | 2 | 20241024-worklog-with-tags.md | baz
4+
second worklog | 3 | 20241024-worklog-with-tags.md | bar
5+
third worklog | 1 | 20241024-worklog-with-tags.md | bin

tests/010-folder-tags/test-stdout

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
text | foo
2-
---- | ---
3-
a pending task | bar
4-
a completed task #foo(baz) | baz
1+
text | foo
2+
---- | ---
3+
a pending task | bar
4+
a completed task | baz

0 commit comments

Comments
 (0)