Skip to content

Commit 358ab5d

Browse files
authored
Merge pull request #102 from lsst-sqre/tickets/DM-51015
DM-51015: Commit database while running nbstripout
2 parents 921c683 + 950e2db commit 358ab5d

File tree

3 files changed

+27
-5
lines changed

3 files changed

+27
-5
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@ Collect fragments into this file with: scriv collect --version X.Y.Z
88

99
<!-- scriv-insert-here -->
1010

11+
<a id='changelog-0.20.1'></a>
12+
13+
## 0.20.1 (2025-05-22)
14+
15+
### Bug fixes
16+
17+
- In the `times-square nbstripout` command, commit to the database on each page migration. Without this, the migration failed when migrating large numbers of notebooks (~2000+).
18+
1119
<a id='changelog-0.20.0'></a>
1220

1321
## 0.20.0 (2025-05-22)

src/timessquare/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ async def run_nbstripout(
220220
db_session=db_session,
221221
)
222222
count = await page_service.migrate_ipynb_with_nbstripout(
223-
dry_run=dry_run, for_page_id=page
223+
dry_run=dry_run, for_page_id=page, db_session=db_session
224224
)
225225
logger.info(
226226
"Finished running nbstripout",

src/timessquare/services/backgroundpage.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from collections.abc import Mapping
88
from typing import Any
99

10+
from sqlalchemy.ext.asyncio import async_scoped_session
11+
1012
from timessquare.domain.nbhtml import NbDisplaySettings, NbHtmlKey
1113
from timessquare.domain.page import PageInstanceModel
1214
from timessquare.storage.noteburst import NoteburstJobResponseModel
@@ -52,7 +54,11 @@ async def update_nbhtml(
5254
)
5355

5456
async def migrate_ipynb_with_nbstripout(
55-
self, *, dry_run: bool = True, for_page_id: str | None = None
57+
self,
58+
*,
59+
dry_run: bool = True,
60+
for_page_id: str | None = None,
61+
db_session: async_scoped_session,
5662
) -> int:
5763
"""Migrate the ipynb files with nbstripout to remove outputs
5864
and metadata.
@@ -68,19 +74,23 @@ async def migrate_ipynb_with_nbstripout(
6874
"""
6975
if for_page_id:
7076
return await self._run_nbstripout_on_page(
71-
dry_run=dry_run, page_id=for_page_id
77+
dry_run=dry_run, page_id=for_page_id, db_session=db_session
7278
)
7379

7480
page_count = 0
7581
for page_id in await self._page_store.list_page_names():
7682
page_count += await self._run_nbstripout_on_page(
77-
dry_run=dry_run, page_id=page_id
83+
dry_run=dry_run, page_id=page_id, db_session=db_session
7884
)
7985

8086
return page_count
8187

8288
async def _run_nbstripout_on_page(
83-
self, *, dry_run: bool = True, page_id: str
89+
self,
90+
*,
91+
dry_run: bool = True,
92+
page_id: str,
93+
db_session: async_scoped_session,
8494
) -> int:
8595
"""Run nbstripout on a page."""
8696
try:
@@ -106,6 +116,10 @@ async def _run_nbstripout_on_page(
106116
if not dry_run:
107117
page.strip_ipynb()
108118
await self.update_page_in_store(page, drop_html_cache=False)
119+
# Manually commit the session because this migration can
120+
# potentially change a lot of pages, and committing all pages
121+
# at once caused problems.
122+
await db_session.commit()
109123
return 1 if has_kernelspec else 0
110124

111125
async def migrate_html_cache_keys(

0 commit comments

Comments
 (0)