Merge pull request #358 from rmuir/fix_build_pin_deps_dependabot

rmuir · web-flow · commit 27945a2aa30b · 2025-04-10T19:36:41.000-04:00
fix build, pin dependencies, enable dependabot
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,41 @@
+version: 2
+updates:
+  - package-ecosystem: github-actions
+    directory: /
+    schedule:
+      interval: weekly
+      day: tuesday
+    commit-message:
+      prefix: ci
+    labels: [dependencies]
+    open-pull-requests-limit: 1
+
+  - package-ecosystem: pip
+    directory: /
+    schedule:
+      interval: weekly
+      day: tuesday
+    commit-message:
+      prefix: build(deps)
+    labels:
+      - dependencies
+
+  - package-ecosystem: npm
+    directory: /src/javascript/2d
+    schedule:
+      interval: weekly
+      day: tuesday
+    commit-message:
+      prefix: build(deps)
+    labels:
+      - dependencies
+
+  - package-ecosystem: npm
+    directory: /src/javascript/3d
+    schedule:
+      interval: weekly
+      day: tuesday
+    commit-message:
+      prefix: build(deps)
+    labels:
+      - dependencies
diff --git a/pyproject.toml b/pyproject.toml
@@ -112,6 +112,7 @@ ignore = [
   "PLW0128", # Redeclared variable in assignment
   "PLW0602", # Using global for `VERBOSE` but no assignment is done
   "PLW0603", # Using the global statement to update `camelCase` is discouraged
+  "PLW1507", # Shallow copy of `os.environ` via `copy.copy(os.environ)`
   "PLW1509", # `preexec_fn` argument is unsafe when using threads
   "PLW2901", # `for` loop variable overwritten by assignment target
   "PT018",   # Assertion should be broken down into multiple parts
@@ -133,11 +134,13 @@ ignore = [
   "PTH204",  # `os.path.getmtime` should be replaced by `Path.stat().st_mtime`
   "PTH206",  # Replace `.split(os.sep)` with `Path.parts`
   "PTH207",  # Replace `glob` with `Path.glob` or `Path.rglob`
+  "PTH208",  # Use `pathlib.Path.iterdir()` instead.
   "PYI024",  # Use `typing.NamedTuple` instead of `collections.namedtuple`
   "RET504",  # Unnecessary assignment to variable before `return` statement
   "RET503",  # Missing explicit `return` at the end of function able to return non-`None` value
   "RUF005",  # Consider `["Foo", *series]` instead of concatenation
   "RUF012",  # Mutable class attributes should be annotated with `typing.ClassVar`
+  "RUF046",  # Value being cast to `int` is already an integer
   "S101",    # Use of `assert` detected
   "S102",    # Use of `exec` detected
   "S104",    #  Possible binding to all interfaces
diff --git a/requirements.txt b/requirements.txt
@@ -1,24 +1,24 @@
 # used by segments-to-html
-graphviz
-intervaltree
+graphviz==0.20.3
+intervaltree==3.1.0
 
 # used by infer-token-vectors
 # FIXME: HMM this is an 800MB download?
 # sentence-transformers
 
 # used by index-chart
-py-gnuplot
+py-gnuplot==1.3
 
 # used by qps-chart
-pillow
+pillow==11.1.0
 
 # used by mergeviz
-iso8601
+iso8601==2.1.0
 
 # used by indexosm
-rtree
+rtree==1.4.0
 
 # linter and formatter
-ruff
+ruff==0.11.5
 # type checker
-basedpyright
+basedpyright==1.28.5
diff --git a/src/python/WikipediaExtractor.py b/src/python/WikipediaExtractor.py
@@ -528,7 +528,7 @@ def compact(text):
     # Drop residuals of lists
     elif line[0] in "{|" or line[-1] in "}" or (line[0] == "(" and line[-1] == ")") or line.strip(".-") == "":
       continue
-    elif len(headers):
+    elif headers:
       items = list(headers.items())
       items.sort()
       for i, v in items:
diff --git a/src/python/index_sim.py b/src/python/index_sim.py
@@ -19,7 +19,6 @@
 import bisect
 import collections
 import random
-import sys
 
 #
 # each line of index events source is this syntax:
@@ -94,10 +93,7 @@ def __repr__(self):
     return f"<segment {self.name} in_ram={self.in_ram} size={mult * self.size_in_bytes / 1024 / 1024:.1f} MB>"
 
   def add_document(self, docid, size_bytes):
-    """
-    Only used for in-memory segments.
-    """
-
+    """Only used for in-memory segments."""
     assert docid not in self.docs
     assert docid not in self.deletes
 
@@ -163,10 +159,7 @@ def get_doc_counts(self):
     return max_doc, del_count
 
   def refresh(self):
-    """
-    Returns frozen segments for searching.
-    """
-
+    """Returns frozen segments for searching."""
     for index_thread, seg in list(self.index_thread_to_segment.items()):
       self.flush(index_thread, seg, "refresh")
     print(f"after refresh {self.ram_bytes_used} {len(self.index_thread_to_segment)}")
@@ -254,14 +247,12 @@ def finish_merge(self, merged_seg, to_merge_segments, del_docs_to_reclaim, final
     self.maybe_merge("finish-merge")
 
   def launch_merge(self, to_merge_segments, reason):
-    """
-    Simulates a merge running, scheduling the end of the merge to commit / reclaim deletes.
+    """Simulates a merge running, scheduling the end of the merge to commit / reclaim deletes.
     The merge runs in the background ... once the clock advances to the merge finish time,
     we commit the merge.
 
     We model merge run-time as simple linear multiplier on size of merged segment.
     """
-
     for seg in to_merge_segments:
       assert seg not in self.merging_segments
       self.merging_segments.add(seg)
@@ -412,7 +403,7 @@ def main():
   search_net_deletes = 0
   net_replicate_bytes = 0
 
-  with open(index_events_source, "r") as f:
+  with open(index_events_source) as f:
     while True:
       line = f.readline()
       if line == "":
@@ -468,7 +459,7 @@ def main():
         next_print_sec += print_every_sec
 
     # summary stats
-    print(f"\nDONE!")
+    print("\nDONE!")
     print(f"  {search_net_docs=:,} {search_net_deletes=:,} ({100.0 * search_net_deletes / search_net_docs:.1f} %)")
     print(f"  replicated {net_replicate_bytes / 1024 / 1024 / 1024.0:,.1f} GB")
 
diff --git a/src/python/segments_to_html.py b/src/python/segments_to_html.py
@@ -16,7 +16,6 @@
 #
 
 import datetime
-import json
 import math
 import os
 import pickle

Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,6 @@`
`16`	`16`	`#`
`17`	`17`
`18`	`18`	`import datetime`
`19`		`-import json`
`20`	`19`	`import math`
`21`	`20`	`import os`
`22`	`21`	`import pickle`