Skip to content

Commit 6fd3768

Browse files
committed
data-driven listing and abstract pages for research papers from arxiv, also includes github repos where available
1 parent 225824c commit 6fd3768

File tree

13 files changed

+1290
-20
lines changed

13 files changed

+1290
-20
lines changed

package-lock.json

Lines changed: 846 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,22 @@
77
"start": "astro dev",
88
"build": "astro check && astro build",
99
"preview": "astro preview",
10-
"astro": "astro"
10+
"astro": "astro",
11+
"test": "vitest"
1112
},
1213
"dependencies": {
1314
"@astrojs/check": "^0.9.3",
1415
"@astrojs/starlight": "^0.26.1",
1516
"@astrojs/starlight-tailwind": "^2.0.3",
1617
"@astrojs/tailwind": "^5.1.2",
18+
"@effect/platform-node": "^0.65.0",
1719
"astro": "^4.14.5",
20+
"date-fns": "^4.1.0",
21+
"effect": "^3.11.0",
1822
"sharp": "^0.33.5",
1923
"tailwindcss": "^3.4.15",
20-
"typescript": "^5.5.4"
24+
"typescript": "^5.5.4",
25+
"vitest": "^2.1.7",
26+
"xml-js": "^1.6.11"
2127
}
2228
}

src/components/GrArxivPage.astro

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
2+
---
3+
import { format } from 'date-fns';
4+
5+
import {extractArxivID, getPaper} from '../lib/arxiv'
6+
7+
import StarlightPage from '@astrojs/starlight/components/StarlightPage.astro';
8+
9+
const props = Astro.props;
10+
11+
const arxivEntry = props.arxiv
12+
13+
const arxivid = extractArxivID(arxivEntry.id)
14+
15+
const paper = getPaper(arxivid)
16+
17+
---
18+
<StarlightPage
19+
frontmatter={{ title: arxivEntry.title, editUrl:false, tableOfContents:false} }}
20+
>
21+
<p class="byline text-xs">
22+
<time itemprop="published" datetime={format(arxivEntry.published, 'yyyy-MM-dd')}>
23+
Published {format(arxivEntry.published, 'MMMM do, yyyy')}
24+
</time>
25+
<address class="author text-xs">By
26+
{arxivEntry.author.map( (author:any, i:number) => (
27+
<span>{(i ? ', ' : '')}<a rel="author" class="url fn n">{author.name}</a></span>
28+
))}
29+
</address>
30+
<cite class="arxivid text-xs">
31+
<a href={arxivEntry.id}>arXiv:{arxivid}</a>
32+
[ {arxivEntry.category.join(", ")}
33+
]
34+
</cite>
35+
<cite class="github text-xs">
36+
{paper?.github !== undefined && paper.github !== "" ?
37+
(<a href={"https://github.com/" + paper?.github}>github:{paper?.github}</a>)
38+
: ''
39+
}
40+
41+
</cite>
42+
</p>
43+
44+
<h2 id="quote">Abstract</h2>
45+
<blockquote><p>{arxivEntry.summary}</p></blockquote>
46+
47+
</StarlightPage>
48+
49+
50+
51+
<!-- <StarlightPage
52+
frontmatter={{ title: `${entry.title.text}`, editUrl:false, tableOfContents:false} }}
53+
>
54+
55+
<p class="byline text-xs">
56+
<time pubdate={format(pubDate, 'yyyy-MM-dd')} title="August 28th, 2011">Published {format(pubDate, 'MMMM do, yyyy')}</time>
57+
<address class="author text-xs">By
58+
{authors.map( (author:any, i:number) => (
59+
<span key={i}>{(i ? ', ' : '')}
60+
<a rel="author" class="url fn n">{author.name.text}</a>
61+
</span>
62+
))}
63+
</address>
64+
<cite class="arxivid text-xs">
65+
<a href={entry.id.text}>arXiv:{arxivid}</a>
66+
[ {categories.map( (category:any) => category._attributes.term).join(", ")}
67+
]
68+
</cite>
69+
<cite class="github text-xs">
70+
{paper?.github !== undefined && paper.github !== "" ?
71+
(<a href={"https://github.com/" + paper?.github}>github:{paper?.github}</a>)
72+
: ''
73+
}
74+
75+
</cite>
76+
</p>
77+
78+
79+
<h2 id="quote">Abstract</h2>
80+
<blockquote><p>{entry.summary.text}</p></blockquote>
81+
82+
</StarlightPage> -->

src/content/config.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
1-
import { defineCollection } from 'astro:content';
1+
import { z, defineCollection } from 'astro:content';
22
import { docsSchema } from '@astrojs/starlight/schema';
33

4+
const researchCollection = defineCollection({
5+
type: 'data',
6+
schema: z.object({
7+
title: z.string(),
8+
canonicalURL: z.string().url()
9+
})
10+
})
11+
412
export const collections = {
513
docs: defineCollection({ schema: docsSchema() }),
14+
research: researchCollection
615
};

src/content/docs/appendices/notation.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ The GraphRAG pattern catalog uses a data notation called `gram` to describe
1212
logical graph structures called patterns that are composed of nodes, relationships
1313
and subjects.
1414

15-
The Gram notation is intended to be self-descriptive and explicit, able to
16-
represent data and structures that are often implicit in a physical graph models.
17-
For example, paths are present in any connected graph, however storing path-level
18-
information isn't normally supported. You can find paths, even store paths, but
19-
there is no way to "say something" about a path.
15+
> The Gram notation is intended to be self-descriptive and explicit, able to
16+
> represent data and structures that are often implicit in a physical graph models.
17+
> For example, paths are present in any connected graph, however storing path-level
18+
> information isn't normally supported. You can find paths, even store paths, but
19+
> there is no way to "say something" about a path.
2020
2121
Gram starts with a notion of "subjects" as a self-describing data structure
2222
in two parts:

src/content/docs/appendices/research.mdx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
---
22
title: GraphRAG Papers
33
description: Foundational research papers about GraphRAG and Knowledge Graphs
4+
editUrl: false
5+
tableOfContents: false
46
---
57

68
- [Graph Retrieval-Augmented Generation: A Survey](https://arxiv.org/abs/2408.08921)

src/data/papers.json

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
[
2+
{
3+
"arxivid": "2408.08921",
4+
"github": "pengboci/GraphRAG-Survey"
5+
},
6+
{
7+
"arxivid": "2312.16890",
8+
"github": "HKUDS/DiffKG"
9+
},
10+
{
11+
"arxivid": "2306.08302",
12+
"github": ""
13+
},
14+
{
15+
"arxivid": "2310.04560",
16+
"github": "google-research/talk-like-a-graph"
17+
},
18+
{
19+
"arxivid": "2311.07509",
20+
"github": "datadotworld/cwd-benchmark-data"
21+
},
22+
{
23+
"arxivid": "2402.07630",
24+
"github": "XiaoxinHe/G-Retriever"
25+
},
26+
{
27+
"arxivid": "2404.12491",
28+
"github": "urchade/GraphER"
29+
},
30+
{
31+
"arxivid": "2404.16130",
32+
"github": "microsoft/graphrag"
33+
},
34+
{
35+
"arxivid": "2404.17723",
36+
"github": ""
37+
},
38+
{
39+
"arxivid": "2408.04948",
40+
"github": ""
41+
},
42+
{
43+
"arxivid": "2406.14550",
44+
"github": ""
45+
},
46+
{
47+
"arxivid": "2410.05779",
48+
"github": "HKUDS/LightRAG"
49+
},
50+
{
51+
"arxivid": "2410.08815",
52+
"github": "Li-Z-Q/StructRAG"
53+
},
54+
{
55+
"arxivid": "2307.07697",
56+
"github": "IDEA-FinAI/ToG"
57+
},
58+
{
59+
"arxivid": "2405.14831",
60+
"github": "OSU-NLP-Group/HippoRAG"
61+
},
62+
{
63+
"arxivid": "2408.04187",
64+
"github": "MedicineToken/Medical-Graph-RAG"
65+
},
66+
{
67+
"arxivid": "2405.18414",
68+
"github": ""
69+
},
70+
{
71+
"arxivid": "2405.16506",
72+
"github": "HuieL/GRAG"
73+
},
74+
{
75+
"arxivid": "2410.23875",
76+
"github": "liyichen-cly/PoG"
77+
},
78+
{
79+
"arxivid": "2410.18415",
80+
"github": ""
81+
},
82+
{
83+
"arxivid": "2404.07103",
84+
"github": "PeterGriffinJin/Graph-CoT"
85+
}
86+
]

src/lib/arxiv.spec.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import { assert, expect, test } from 'vitest';
2+
3+
import { Effect, Either } from "effect"
4+
import { FetchHttpClient } from "@effect/platform"
5+
6+
import { getArxivDetails } from './arxiv'
7+
import { type ArxivEntry } from './arxiv'
8+
9+
test('arxiv fetch well-known entry', async () => {
10+
const arxivid = '2402.07630';
11+
12+
const program = getArxivDetails(arxivid).pipe(
13+
Effect.scoped,
14+
Effect.provide(FetchHttpClient.layer) // provide a real implementation of fetch()
15+
);
16+
17+
const result = await Effect.runPromise(program)
18+
19+
expect(Either.isRight(result))
20+
21+
expect(Either.getOrThrow(result).title).toBe("G-Retriever: Retrieval-Augmented Generation for Textual Graph Understanding and Question Answering")
22+
23+
// DEBUG
24+
// Either.match(result,
25+
// {
26+
// onLeft: (e) => console.error(e),
27+
// onRight: (result) => console.log(result.feed.entry[0])
28+
// }
29+
// )
30+
31+
});
32+
33+
test('arxiv get many entries', async () => {
34+
const arxivids = ['2402.07630', '2311.07509', '2306.08302'];
35+
36+
const program = Effect.forEach(arxivids, (arxivid) =>
37+
getArxivDetails(arxivid)
38+
).pipe(
39+
Effect.map( Either.all ), // gather all the successes into one array
40+
Effect.scoped,
41+
Effect.provide(FetchHttpClient.layer)
42+
)
43+
44+
const result = await Effect.runPromise(program)
45+
46+
expect(Either.isRight(result))
47+
48+
})

0 commit comments

Comments
 (0)