Skip to content

Commit 3affaa8

Browse files
committed
Avoid checking whole list everytime to reduce transfer cost to DB
1 parent f667aeb commit 3affaa8

File tree

3 files changed

+8
-3
lines changed

3 files changed

+8
-3
lines changed

.github/workflows/static.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ on:
1313
types: [ generate-gh-pages ]
1414

1515
schedule:
16-
- cron: "*/20 * * * *"
16+
- cron: "*/10 * * * *"
1717

1818
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
1919
permissions:

db/image.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# TODO: need a separate model?
22
import logging
33
import os
4+
import random
45
import time
56

67
from sqlalchemy import String, column, Values, select
@@ -21,7 +22,10 @@
2122
def expire():
2223
start = time.time()
2324
removed = 0
24-
for img_files in chunks(os.listdir(config.image_dir), 500):
25+
all_files = list(os.listdir(config.image_dir))
26+
random.shuffle(all_files) # avoid checking whole list everytime to reduce transfer cost to DB
27+
candidates = all_files[:1000]
28+
for img_files in chunks(candidates, 500):
2529
values = Values(column('name', String), name='v').data(list(map(lambda x: (x,), img_files)))
2630
stmt = select(values).join(Summary, Summary.image_name == values.c.name,
2731
isouter=True # Add this to implement left outer join
@@ -32,7 +36,7 @@ def expire():
3236
os.remove(os.path.join(config.image_dir, image_name[0]))
3337
removed += 1
3438
cost = (time.time() - start) * 1000
35-
logger.info(f'removed {removed} feature images, cost(ms): {cost:.2f}')
39+
logger.info(f'removed {removed}/{len(candidates)} feature images, cost(ms): {cost:.2f}')
3640

3741

3842
def chunks(lst, n):

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ openai==0.28.1
1717
torch==2.1.0
1818
bert-extractive-summarizer==0.10.1
1919
transformers==4.36.0
20+
numpy==1.26.4
2021
python-dotenv==1.0.0
2122
python_slugify==8.0.1
2223
sqlalchemy==2.0.21

0 commit comments

Comments
 (0)