Skip to content

Commit 14c2d50

Browse files
jfroclaude
andcommitted
feat: add comprehensive ebook library management system
This commit includes multiple related changes: feat: add ebook library management with admin UI - Add Libraries CRUD UI at /admin/libraries for managing library directories - Support three scan modes: manual, auto_watch (filesystem monitoring), scheduled (cron) - Include directory tree picker component for intuitive path selection - Implement LibraryWatcher GenServer for automatic filesystem change detection - Implement LibraryScheduler GenServer for cron-based scheduled scanning - Add comprehensive test coverage (341 tests passing) feat: display all books regardless of collection status - Switch from INNER JOIN to LEFT JOIN on collection_items - Make media types informational only, not governing visibility - Add delete book button with proper confirmation fix: resolve PDF processing hanging on large files - Switch from in-memory pdf_info library to command-line pdfinfo - Resolve 8+ hour hangs on 50MB+ PDF files - Process large PDFs in sub-second timeframes fix: handle semicolon-separated tags to prevent overflow - Add normalize_tags/1 to split and process delimited tag strings - Prevent varchar(255) overflow errors from long concatenated tags fix: support namespaced XML elements in EPUB parsing - Handle container.xml and OPF files with namespace prefixes - Support various EPUB XML format variations refactor: migrate EPUB parsing from erlang :zip to zstream - Use zstream library for more flexible ZIP handling - Add fallback to erlang :zip for unsupported ZIP formats refactor: replace xmerl with Saxy for XML parsing - Rewrite OpfParser to use Saxy instead of xmerl - Improve parsing reliability and performance chore: update dependencies and Docker configuration - Add zstream, file_system, saxy dependencies - Remove pdf_info dependency - Add poppler-utils to Dockerfile for pdfinfo command Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent 8849220 commit 14c2d50

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+6240
-198
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ LABEL org.opencontainers.image.licenses=MIT
8585
RUN echo "[$BUILD_TIME] [$GITREF] building on host that is $BUILDPLATFORM, for the target architecture $TARGETPLATFORM" > /build.log
8686

8787
RUN apt-get update \
88-
&& apt-get install -y --no-install-recommends libstdc++6 openssl libncurses5 locales ca-certificates \
88+
&& apt-get install -y --no-install-recommends libstdc++6 openssl libncurses5 locales ca-certificates poppler-utils \
8989
&& rm -rf /var/lib/apt/lists/*
9090

9191
# Set the locale

config/config.exs

Lines changed: 107 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,16 +118,121 @@ config :fuzzy_catalog, Oban.Web.Resolver, pubsub: FuzzyCatalog.PubSub
118118

119119
config :oban, :notifier, pubsub: FuzzyCatalog.PubSub
120120

121-
# Ebook management configuration
121+
# =====================================================
122+
# Ebook Management Configuration
123+
# =====================================================
124+
#
125+
# This section configures the ebook library management system, including:
126+
# - File format support and limits
127+
# - Fuzzy matching for book linking
128+
# - Automatic filesystem watching (auto_watch mode)
129+
# - Scheduled scanning (scheduled mode)
130+
#
131+
# See lib/fuzzy_catalog/ebooks/library.ex for scan mode details
132+
122133
config :fuzzy_catalog, :ebooks,
123134
# Supported file formats for ebook scanning
135+
# Only files with these extensions will be processed during scans
124136
supported_formats: ["epub", "pdf"],
137+
125138
# Maximum file size in bytes (100MB default)
139+
# Files larger than this will be skipped during scanning
126140
max_file_size: 100 * 1024 * 1024,
141+
127142
# Enable fuzzy matching for book linking by default
143+
# When true, uses fuzzy string matching to link ebooks to existing books
128144
enable_fuzzy_matching: true,
145+
129146
# Fuzzy matching threshold (0.0 - 1.0) - higher means stricter matching
130-
fuzzy_threshold: 0.85
147+
# 0.85 means 85% similarity required to consider a match
148+
fuzzy_threshold: 0.85,
149+
150+
# FileSystem watcher configuration (for auto_watch mode)
151+
# When enabled, monitors library directories for file changes
152+
# Set to false in test environment to prevent interference
153+
watcher_enabled: true,
154+
155+
# Debounce delay in milliseconds (wait for 5 seconds of no changes before scanning)
156+
# This prevents rapid-fire scans during bulk file operations
157+
# Libraries in auto_watch mode will wait this long after the last file change
158+
watcher_debounce_ms: 5000,
159+
160+
# Library scheduler configuration (for scheduled mode)
161+
# When enabled, processes libraries with cron-based schedules
162+
# Set to false in test environment to prevent interference
163+
scheduler_enabled: true
164+
165+
# Note: Calibre library support is automatic - metadata.opf and cover.jpg
166+
# files are automatically detected and parsed when found alongside ebook files
167+
168+
# =====================================================
169+
# Library Management Workflow
170+
# =====================================================
171+
#
172+
# ## Creating a Library
173+
#
174+
# 1. Navigate to /admin/libraries (admin users only)
175+
# 2. Click "New Library"
176+
# 3. Enter library name
177+
# 4. Use "Browse" button to select directory path via tree picker
178+
# 5. Choose scan mode:
179+
# - Manual: Trigger scans via "Scan Now" button
180+
# - Auto Watch: Automatic scans when files change (5s debounce)
181+
# - Scheduled: Scans on cron schedule (e.g., "0 */6 * * *")
182+
# 6. If scheduled mode, enter cron expression
183+
# 7. Click "Create"
184+
#
185+
# ## Scan Modes Explained
186+
#
187+
# ### Manual Mode
188+
# - Scans only when you click "Scan Now" button
189+
# - Best for: One-time imports, testing, infrequent updates
190+
# - No background processes required
191+
#
192+
# ### Auto Watch Mode
193+
# - Monitors directory for file changes using FileSystem library
194+
# - Automatically triggers scan 5 seconds after last change
195+
# - Best for: Active libraries with frequent additions
196+
# - Requires: watcher_enabled: true (default)
197+
# - Background process: LibraryWatcher GenServer
198+
#
199+
# ### Scheduled Mode
200+
# - Runs scans on a cron schedule (e.g., every 6 hours)
201+
# - Cron format: minute hour day month weekday
202+
# - Examples:
203+
# - "0 */6 * * *" = every 6 hours
204+
# - "0 0 * * *" = daily at midnight
205+
# - "0 9 * * 1" = Mondays at 9am
206+
# - Best for: Stable libraries with predictable update patterns
207+
# - Requires: scheduler_enabled: true (default)
208+
# - Background process: LibraryScheduler GenServer
209+
#
210+
# ## How Scanning Works
211+
#
212+
# 1. Library is marked as "scanning" to prevent concurrent scans
213+
# 2. ScanWorker job is enqueued in Oban
214+
# 3. ScanWorker recursively scans directory for .epub and .pdf files
215+
# 4. For each ebook file:
216+
# - Extract metadata (title, author, ISBN, etc.)
217+
# - Create or update Ebook record
218+
# - Link to Library via library_id
219+
# - If Calibre metadata.opf exists, parse it for rich metadata
220+
# 5. Library status updated to "idle" or "failed"
221+
# 6. last_scanned_at timestamp recorded
222+
#
223+
# ## Error Handling
224+
#
225+
# - If scan fails, library status set to "failed"
226+
# - Error message stored in last_scan_error field
227+
# - Can retry by clicking "Scan Now" or waiting for next scheduled scan
228+
# - Failed status does not prevent future scans
229+
#
230+
# ## Concurrent Scan Prevention
231+
#
232+
# - Only one scan can run per library at a time
233+
# - scanning_status field acts as a lock
234+
# - Manual, auto_watch, and scheduled modes all check this status
235+
# - "Scan Now" button disabled while scanning in progress
131236

132237
# Import environment specific config. This must remain at the bottom
133238
# of this file so it overrides the configuration defined above.

config/test.exs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,8 @@ config :fuzzy_catalog, Oban,
4949
testing: :manual,
5050
queues: false,
5151
plugins: false
52+
53+
# Disable FileSystem watcher and scheduler in tests
54+
config :fuzzy_catalog, :ebooks,
55+
watcher_enabled: false,
56+
scheduler_enabled: false

lib/fuzzy_catalog/application.ex

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,40 @@ defmodule FuzzyCatalog.Application do
77

88
@impl true
99
def start(_type, _args) do
10-
children = [
11-
FuzzyCatalogWeb.Telemetry,
12-
FuzzyCatalog.Repo,
13-
{Oban, Application.fetch_env!(:fuzzy_catalog, Oban)},
14-
{DNSCluster, query: Application.get_env(:fuzzy_catalog, :dns_cluster_query) || :ignore},
15-
{Phoenix.PubSub, name: FuzzyCatalog.PubSub},
16-
FuzzyCatalog.SyncStatusManager,
17-
FuzzyCatalog.ProviderScheduler,
18-
# Start a worker by calling: FuzzyCatalog.Worker.start_link(arg)
19-
# {FuzzyCatalog.Worker, arg},
20-
# Start to serve requests, typically the last entry
21-
FuzzyCatalogWeb.Endpoint
22-
]
10+
# Conditionally start LibraryWatcher based on config
11+
library_watcher_child =
12+
if watcher_enabled?() do
13+
[FuzzyCatalog.Ebooks.LibraryWatcher]
14+
else
15+
[]
16+
end
17+
18+
# Conditionally start LibraryScheduler based on config
19+
library_scheduler_child =
20+
if scheduler_enabled?() do
21+
[FuzzyCatalog.Ebooks.LibraryScheduler]
22+
else
23+
[]
24+
end
25+
26+
children =
27+
[
28+
FuzzyCatalogWeb.Telemetry,
29+
FuzzyCatalog.Repo,
30+
{Oban, Application.fetch_env!(:fuzzy_catalog, Oban)},
31+
{DNSCluster, query: Application.get_env(:fuzzy_catalog, :dns_cluster_query) || :ignore},
32+
{Phoenix.PubSub, name: FuzzyCatalog.PubSub},
33+
FuzzyCatalog.SyncStatusManager,
34+
FuzzyCatalog.ProviderScheduler
35+
] ++
36+
library_watcher_child ++
37+
library_scheduler_child ++
38+
[
39+
# Start a worker by calling: FuzzyCatalog.Worker.start_link(arg)
40+
# {FuzzyCatalog.Worker, arg},
41+
# Start to serve requests, typically the last entry
42+
FuzzyCatalogWeb.Endpoint
43+
]
2344

2445
# See https://hexdocs.pm/elixir/Supervisor.html
2546
# for other strategies and supported options
@@ -34,4 +55,14 @@ defmodule FuzzyCatalog.Application do
3455
FuzzyCatalogWeb.Endpoint.config_change(changed, removed)
3556
:ok
3657
end
58+
59+
defp watcher_enabled? do
60+
Application.get_env(:fuzzy_catalog, :ebooks, [])
61+
|> Keyword.get(:watcher_enabled, true)
62+
end
63+
64+
defp scheduler_enabled? do
65+
Application.get_env(:fuzzy_catalog, :ebooks, [])
66+
|> Keyword.get(:scheduler_enabled, true)
67+
end
3768
end

lib/fuzzy_catalog/catalog/book.ex

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ defmodule FuzzyCatalog.Catalog.Book do
3434
field :original_title, :string
3535
field :language, :string
3636

37+
# Extended Metadata (from OPF files)
38+
field :rating, :integer
39+
field :tags, {:array, :string}
40+
field :custom_metadata, :map
41+
3742
has_many :collection_items, FuzzyCatalog.Collections.CollectionItem
3843

3944
timestamps()
@@ -59,12 +64,16 @@ defmodule FuzzyCatalog.Catalog.Book do
5964
:series,
6065
:series_number,
6166
:original_title,
62-
:language
67+
:language,
68+
:rating,
69+
:tags,
70+
:custom_metadata
6371
])
6472
|> validate_required([:title, :author])
6573
|> validate_length(:title, min: 1, max: 255)
6674
|> validate_length(:author, min: 1, max: 255)
6775
|> validate_number(:pages, greater_than: 0)
76+
|> validate_number(:rating, greater_than_or_equal_to: 0, less_than_or_equal_to: 10)
6877
|> validate_series_number()
6978
|> validate_publication_date()
7079
end

lib/fuzzy_catalog/collections.ex

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -337,12 +337,20 @@ defmodule FuzzyCatalog.Collections do
337337
# Remove search from Flop params to avoid validation issues
338338
flop_params = remove_search_param(params)
339339

340+
# Show all books regardless of collection items or ebook files
341+
# Media types are just informational and can be used for filtering
340342
base_query =
341343
from b in Book,
342-
join: ci in CollectionItem,
344+
left_join: ci in CollectionItem,
343345
on: ci.book_id == b.id,
344346
group_by: [b.id],
345-
select: {b, fragment("array_agg(? ORDER BY ?)::text[]", ci.media_type, ci.media_type)}
347+
select:
348+
{b,
349+
fragment(
350+
"array_remove(array_agg(DISTINCT ? ORDER BY ?), NULL)::text[]",
351+
ci.media_type,
352+
ci.media_type
353+
)}
346354

347355
# Apply search filter if present
348356
query_with_search =

lib/fuzzy_catalog/ebooks.ex

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,24 @@ defmodule FuzzyCatalog.Ebooks do
5252
|> Repo.one()
5353
end
5454

55+
@doc """
56+
Lists all ebooks associated with a book.
57+
58+
## Examples
59+
60+
iex> list_ebooks_for_book(book)
61+
[%Ebook{}, ...]
62+
63+
iex> list_ebooks_for_book(%Book{id: 123})
64+
[]
65+
"""
66+
def list_ebooks_for_book(%{id: book_id}) do
67+
Ebook
68+
|> where([e], e.book_id == ^book_id)
69+
|> order_by([e], asc: e.file_path)
70+
|> Repo.all()
71+
end
72+
5573
@doc """
5674
Triggers a directory scan for ebook files.
5775

lib/fuzzy_catalog/ebooks/ebook.ex

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ defmodule FuzzyCatalog.Ebooks.Ebook do
3232
# Optional relationship to Book
3333
belongs_to :book, FuzzyCatalog.Catalog.Book
3434

35+
# Optional relationship to Library
36+
belongs_to :library, FuzzyCatalog.Ebooks.Library
37+
3538
timestamps()
3639
end
3740

@@ -54,13 +57,15 @@ defmodule FuzzyCatalog.Ebooks.Ebook do
5457
:processing_error,
5558
:last_processed_at,
5659
:cover_thumbnail_key,
57-
:book_id
60+
:book_id,
61+
:library_id
5862
])
5963
|> validate_required([:file_path, :file_format])
6064
|> validate_inclusion(:file_format, @file_formats)
6165
|> validate_inclusion(:processing_status, @processing_statuses)
6266
|> validate_number(:file_size, greater_than: 0)
6367
|> foreign_key_constraint(:book_id)
68+
|> foreign_key_constraint(:library_id)
6469
|> unique_constraint(:file_path)
6570
end
6671

0 commit comments

Comments
 (0)