Skip to content

Commit 7e1ae2c

Browse files
committed
Updates
1 parent 7945564 commit 7e1ae2c

File tree

5 files changed

+30
-30
lines changed

5 files changed

+30
-30
lines changed

.github/workflows/short.yml

Lines changed: 0 additions & 13 deletions
This file was deleted.

arxiv_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ def fill_papers_with_arxiv(papers: list[Paper]) -> list[Paper]:
7070

7171
if paper.title and paper.title != result.title:
7272
print(f'[!] Title mismatch: "{paper.title}" vs "{result.title}"')
73-
continue
7473

7574
paper.title = result.title
7675
paper.url = result.entry_id

notion_utils.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
1+
import asyncio
12
import typing as t
23
from datetime import datetime
34

4-
from notion_client import Client
5-
from notion_client.helpers import collect_paginated_api
5+
from notion_client import AsyncClient
6+
from notion_client.helpers import async_collect_paginated_api
7+
from tqdm import tqdm # type: ignore
68

79
from _types import Paper, Focus
810

9-
NotionClient = Client
11+
NotionClient = AsyncClient
1012

1113

1214
def get_notion_client(token: str) -> NotionClient:
1315
return NotionClient(auth=token)
1416

1517

16-
def get_papers_from_notion(client: NotionClient, database_id: str) -> list[Paper]:
17-
results = collect_paginated_api(client.databases.query, database_id=database_id)
18+
async def get_papers_from_notion(client: NotionClient, database_id: str) -> list[Paper]:
19+
results = await async_collect_paginated_api(
20+
client.databases.query, database_id=database_id
21+
)
1822

1923
papers: list[Paper] = []
2024
for result in results:
@@ -53,10 +57,10 @@ def get_papers_from_notion(client: NotionClient, database_id: str) -> list[Paper
5357
return papers
5458

5559

56-
def write_papers_to_notion(
60+
async def write_papers_to_notion(
5761
client: NotionClient, database_id: str, papers: list[Paper]
5862
) -> None:
59-
for paper in papers:
63+
for paper in tqdm(papers):
6064
properties: dict[str, t.Any] = {}
6165
if paper.title:
6266
properties["Title"] = {"title": [{"text": {"content": paper.title}}]}
@@ -78,8 +82,10 @@ def write_papers_to_notion(
7882
properties["Explored"] = {"checkbox": paper.explored}
7983

8084
if paper.page_id:
81-
client.pages.update(paper.page_id, properties=properties)
85+
await client.pages.update(paper.page_id, properties=properties)
8286
else:
83-
client.pages.create(
87+
await client.pages.create(
8488
parent={"database_id": database_id}, properties=properties
8589
)
90+
91+
return None

paperstack.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import argparse
2+
import asyncio
23
import os
4+
from datetime import datetime
35

46
from arxiv_utils import fill_papers_with_arxiv, search_arxiv_as_paper
57
from notion_utils import (
@@ -20,7 +22,7 @@
2022
"""
2123

2224

23-
def main():
25+
async def main():
2426
parser = argparse.ArgumentParser()
2527

2628
parser.add_argument(
@@ -53,7 +55,12 @@ def main():
5355
openai_client = get_openai_client(args.openai_token)
5456

5557
print(f" |- Getting papers from Notion [{args.database_id}]")
56-
papers = get_papers_from_notion(notion_client, args.database_id)
58+
papers = await get_papers_from_notion(notion_client, args.database_id)
59+
print(f" |- {len(papers)} existing papers")
60+
61+
for p in papers:
62+
if p.published < datetime.fromisoformat("2024-07-01 00:00:00+00:00"):
63+
p.explored = True
5764

5865
if not all([p.has_arxiv_props() for p in papers]):
5966
print(" |- Filling in missing data from arXiv")
@@ -63,7 +70,7 @@ def main():
6370
print(" |- Searching arXiv for new papers")
6471
existing_titles = [paper.title for paper in papers]
6572
for searched_paper in search_arxiv_as_paper(
66-
args.arxiv_search_query, max_results=10
73+
args.arxiv_search_query, max_results=50
6774
):
6875
if searched_paper.title not in existing_titles:
6976
print(f" |- {searched_paper.title[:50]}...")
@@ -73,7 +80,7 @@ def main():
7380
to_explore = [p for p in papers if not p.explored]
7481
if to_explore:
7582
print(" |- Getting related papers from Semantic Scholar")
76-
recommended_papers = get_recommended_arxiv_ids_from_semantic_scholar(papers)
83+
recommended_papers = get_recommended_arxiv_ids_from_semantic_scholar(to_explore)
7784
papers.extend(fill_papers_with_arxiv(recommended_papers))
7885
print(f" |- {len(recommended_papers)} new papers")
7986
else:
@@ -96,10 +103,10 @@ def main():
96103
to_write = [p for p in papers if p.has_changed()]
97104
if to_write:
98105
print(f" |- Writing {len(to_write)} updates back to Notion")
99-
write_papers_to_notion(notion_client, args.database_id, to_write)
106+
await write_papers_to_notion(notion_client, args.database_id, to_write)
100107

101108
print("[+] Done!")
102109

103110

104111
if __name__ == "__main__":
105-
main()
112+
asyncio.run(main())

scholar_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from semanticscholar import SemanticScholar # type: ignore
2+
from tqdm import tqdm # type: ignore
23

34
from _types import Paper
45

@@ -9,7 +10,7 @@ def get_recommended_arxiv_ids_from_semantic_scholar(
910
papers: list[Paper], max_results: int = 10, min_year: int = 2018
1011
) -> list[Paper]:
1112
results: list[dict] = []
12-
for paper in papers:
13+
for paper in tqdm(papers):
1314
if not paper.url:
1415
continue
1516

0 commit comments

Comments
 (0)