Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e3699ff
Create Series and SeriesBook models
hughrun Nov 2, 2025
423e187
add connector logic and fix templates and views
hughrun Nov 3, 2025
6a046ec
finalise connectors
hughrun Nov 4, 2025
aa5470e
refactor series models and templates
hughrun Nov 13, 2025
89406e0
bunch of changes and tests
hughrun Nov 15, 2025
1739f50
formatting
hughrun Nov 15, 2025
5d350ad
final commit for series
hughrun Nov 19, 2025
486a8ae
add migration
hughrun Nov 19, 2025
77d67e2
use SearchVector in connectors
hughrun Nov 20, 2025
dd18940
add management command and improve connector series matching
hughrun Nov 20, 2025
bfc8304
fix upgrade_series admin command
hughrun Nov 20, 2025
57b70fd
fix linters hopefully
hughrun Nov 20, 2025
4c19da1
Merge branch 'main' into series-model
mouse-reeve Nov 22, 2025
ec56489
fix rank match value for series matches
hughrun Nov 29, 2025
393190c
Resolve conflicts and merge 'main' into series-model
hughrun Dec 14, 2025
6e11d75
fix series title search in ConfirmEditBook
hughrun Dec 14, 2025
76da2cb
Merge branch 'main' into series-model
mouse-reeve Feb 2, 2026
d78bbd2
Hughrun series model (#10)
mouse-reeve Feb 3, 2026
d1e9ea1
clean up
hughrun Feb 8, 2026
4a941fb
Merge branch 'main' into series-model
hughrun Feb 8, 2026
02b6a5f
Merge branch 'main' into series-model
hughrun Feb 8, 2026
b652f69
basically undo previous changes to models
hughrun Feb 10, 2026
2177432
fix series model
hughrun Feb 14, 2026
c29d61c
Merge branch 'main' into series-model
hughrun Feb 14, 2026
dc90a04
Merge branch 'main' into series-model
hughrun Feb 21, 2026
ddbbe97
Merge branch 'main' into series-model
hughrun Mar 21, 2026
3356a3e
fix inventaire series and clean up
hughrun Mar 22, 2026
124e08e
Merge branch 'main' into series-model
hughrun Mar 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bookwyrm/activitypub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from .ordered_collection import BookList, Shelf
from .person import Person, PublicKey
from .response import ActivitypubResponse
from .book import Edition, Work, Author
from .book import Edition, Work, Author, Series, SeriesBook
from .verbs import Create, Delete, Undo, Update
from .verbs import Follow, Accept, Reject, Block
from .verbs import Add, Remove
Expand Down
27 changes: 25 additions & 2 deletions bookwyrm/activitypub/book.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from .base_activity import ActivityObject
from .image import Document
from .ordered_collection import OrderedCollection, CollectionItem


@dataclass(init=False)
Expand Down Expand Up @@ -35,8 +36,9 @@ class Book(BookData):
subtitle: str = None
description: str = ""
languages: list[str] = field(default_factory=list)
series: str = ""
seriesNumber: str = ""
series: str = "" # legacy, now deprecated
seriesNumber: str = "" # legacy, now deprecated
seriesBooks: list[str] = field(default_factory=list)
subjects: list[str] = field(default_factory=list)
subjectPlaces: list[str] = field(default_factory=list)

Expand Down Expand Up @@ -91,3 +93,24 @@ class Author(BookData):
wikipediaLink: str = ""
type: str = "Author"
website: str = ""


@dataclass(init=False)
class Series(BookData, OrderedCollection):
"""serializes a book series"""

actor: str
name: str
alternativeNames: list[str] = field(default_factory=list)
type: str = "Series"


@dataclass(init=False)
class SeriesBook(CollectionItem):
"""a book in a series"""

actor: str
book: str
series: str
seriesNumber: int = None
type: str = "SeriesBook"
118 changes: 117 additions & 1 deletion bookwyrm/connectors/abstract_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@
from requests.exceptions import RequestException
import aiohttp

from django.contrib.postgres.search import SearchRank, SearchVector
from django.core.files.base import ContentFile
from django.db import transaction
from django.db.models import Subquery

from bookwyrm import activitypub, models, settings
from bookwyrm.settings import USER_AGENT
from bookwyrm.settings import USER_AGENT, INSTANCE_ACTOR_USERNAME
from .connector_manager import load_more_data, ConnectorException, raise_not_valid_url
from .format_mappings import format_mappings
from ..book_search import SearchResult
Expand Down Expand Up @@ -111,6 +113,78 @@ async def get_results(
logger.info(err)
return None

def get_or_create_seriesbook_from_data( # pylint: disable=no-self-use
self,
work: models.Work,
edition: models.Edition,
) -> None:
"""series may be a a string or an obj"""
user = models.User.objects.get(localname=INSTANCE_ACTOR_USERNAME)
series_to_process = []
authors = work.authors.all().union(edition.authors.all())

# Inventaire series will be a list of activity strings
if hasattr(work, "series") and isinstance(work.series, list):
if len(work.series) > 0:
for data in work.series:
series_data = models.Series(**data) # type: ignore
series_to_process.append(series_data)

else:
# otherwise it's just a a name
name = work.series or edition.series
if not name or name == "":
return
series_to_process.append(models.Series(name=name)) # type: ignore
work.series_number = work.series_number or edition.series_number

for series in series_to_process:
instance = None

vector = SearchVector("name", weight="A") + SearchVector(
"alternative_names", weight="B"
)
possible_series = (
models.Series.objects.annotate(search=vector)
.annotate(rank=SearchRank(vector, series.name, normalization=32))
.filter(
rank__gt=0.19
) # short alias names like XY get rank around 0.1956
.order_by("-rank")[:5]
)

if possible_series.exists():
books = models.Book.objects.filter(
authors__in=Subquery(authors.values("pk"))
)

if same_author_sb := models.SeriesBook.objects.filter(
book__in=books
).filter(series__in=Subquery(possible_series.values("pk"))):
# there is already a series with a seriesbook by a matching author
# let's feel lucky
instance = same_author_sb.first().series # type: ignore

else:
# leave it for the user to work out
if work.series:
edition.series = series.name
edition.series_number = work.series_number
edition.save()

continue

edition.series = None
edition.series_number = None
edition.save()

activitydata_to_seriesbook(
user=user,
work=work,
new=series,
instance=instance, # type: ignore
)

@abstractmethod
def get_or_create_book(self, remote_id: str) -> Optional[models.Book]:
"""pull up a book record by whatever means possible"""
Expand Down Expand Up @@ -186,6 +260,9 @@ def get_or_create_book(self, remote_id: str) -> Optional[models.Book]:
work.authors.add(author)

edition = self.create_edition_from_data(work, edition_data)

self.get_or_create_seriesbook_from_data(work, edition)

load_more_data.delay(self.connector.id, work.id)
return edition

Expand Down Expand Up @@ -443,3 +520,42 @@ def maybe_isbn(query: str) -> bool:
10,
13,
] # ISBN10 or ISBN13, or maybe ISBN10 missing a leading zero


def activitydata_to_seriesbook(
user: models.User,
work: models.Work,
new: models.Series,
instance: Optional[models.Series],
) -> None:
"""make a series & seriesbook from incoming data"""

if instance:
for field in [
"inventaire_id",
"librarything_key",
"goodreads_key",
"wikidata",
"isfdb",
"name",
]:
if not getattr(instance, field) and getattr(new, field):
setattr(instance, field, getattr(new, field))

for name in new.alternative_names:
if name not in instance.alternative_names and name != instance.name:
instance.alternative_names.append(name)
series = instance
series.save()
else:
new.user = user
series = new
series.save()

# using the work.series_number for every series is safe because
# Inventaire doesn't supply series ordinal when more than one series
models.SeriesBook.objects.get_or_create(
book=work,
series=series,
defaults={"user": user, "series_number": work.series_number},
)
9 changes: 8 additions & 1 deletion bookwyrm/connectors/bookwyrm_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,14 @@ def __init__(self, identifier: str):
super().__init__(identifier)

def get_or_create_book(self, remote_id: str) -> models.Edition:
return activitypub.resolve_remote_id(remote_id, model=models.Edition)
edition = activitypub.resolve_remote_id(remote_id, model=models.Edition)

if edition.series:
self.get_or_create_seriesbook_from_data(
work=edition.parent_work, edition=edition
)

return edition

def parse_search_data(
self, data: list[dict[str, Any]], min_confidence: float
Expand Down
40 changes: 39 additions & 1 deletion bookwyrm/connectors/inventaire.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ def __init__(self, identifier: str):
Mapping("title", remote_field="wdt:P1476", formatter=get_first),
Mapping("title", remote_field="labels", formatter=get_language_code),
Mapping("subtitle", remote_field="wdt:P1680", formatter=get_first),
Mapping("series", remote_field="wdt:P179", formatter=self.format_series),
Mapping("seriesNumber", remote_field="wdt:P1545", formatter=get_first),
Mapping("inventaireId", remote_field="uri"),
Mapping(
"description", remote_field="sitelinks", formatter=self.get_description
Expand Down Expand Up @@ -78,7 +80,7 @@ def get_book_data(self, remote_id: str) -> JsonDict:
**data.get("claims", {}),
**{
k: data.get(k)
for k in ["uri", "image", "labels", "sitelinks", "type"]
for k in ["uri", "image", "labels", "sitelinks", "type", "originalLang"]
if k in data
},
}
Expand Down Expand Up @@ -241,6 +243,42 @@ def get_remote_id_from_model(self, obj: models.BookDataModel) -> str:
remote_id_value = obj.inventaire_id
return self.get_remote_id(remote_id_value)

def format_series(self, keys: Iterable[str]) -> list[dict[str, str]]:
"""resolve series data into activitypub data"""

series_list = []
for uri in keys:
try:
series_data = self.get_book_data(self.get_remote_id(uri))
except ConnectorException:
continue

alternative_names = set()
series = {}
original_lang = series_data.get("originalLang")
if original_lang:
original_lang = original_lang.split("-")[0]
else:
original_lang = "en"

for k, v in series_data["labels"].items():
if k == original_lang:
series["name"] = v
else:
alternative_names.add(v)

series["alternative_names"] = list(alternative_names)
series["inventaire_id"] = uri
series["wikidata"] = uri.split("wd:")[1]
if series_data.get("wdt:P6947"):
series["goodreads_key"] = series_data["wdt:P6947"][0]
if series_data.get("wdt:P1235"):
series["isfdb"] = series_data["wdt:P1235"][0]
if series_data.get("wdt:P8513"):
series["librarything_key"] = series_data["wdt:P8513"][0]
series_list.append(series)
return series_list


def get_language_code(options: JsonDict, code: str = "en") -> Any:
"""when there are a bunch of translation but we need a single field"""
Expand Down
12 changes: 12 additions & 0 deletions bookwyrm/forms/books.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,15 @@ class Meta:
"cover",
"first_published_date",
]


class SeriesForm(CustomForm):
class Meta:
model = models.Series
fields = [
"name",
"alternative_names",
"inventaire_id",
"wikidata",
"isfdb",
]
83 changes: 83 additions & 0 deletions bookwyrm/management/commands/upgrade_series.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""fix legacy series"""

from django.core.management.base import BaseCommand
from django.contrib.postgres.search import SearchRank, SearchVector
from django.db.models import Subquery, Q
from django.db.models.functions import Length
from bookwyrm import activitypub
from bookwyrm.models import Book, Edition, Series, SeriesBook, User


def upgrade_series_data():
"""turn strings into things"""

series_count = Series.objects.count()
seriesbook_count = SeriesBook.objects.count()

for book in Edition.objects.exclude(series=None):
user = activitypub.get_representative()
vector = SearchVector("name", weight="A") + SearchVector(
"alternative_names", weight="B"
)
possible_series = (
Series.objects.annotate(search=vector)
.annotate(rank=SearchRank(vector, book.series, normalization=32))
.filter(rank__gt=0.19)
.order_by("-rank")
)

if possible_series.exists():
books = Edition.objects.filter(
authors__in=Subquery(book.authors.values("pk"))
).values(
"parent_work__pk"
) # the parent work is the book attached to the series

same_author_sb = SeriesBook.objects.filter(book__in=books).filter(
series__in=Subquery(possible_series.values("pk"))
) # there is a possible series with a seriesbook by a matching author

match = same_author_sb.filter(
Q(series__name__iexact=book.series)
| Q(series__alternative_names__icontains=book.series)
) # it's the same series

if match:
series = match.first().series

else:
# there might be a matching series but we don't know
# leave it for a user to work out manually
continue
else:
series = Series.objects.create(user=user, name=book.series)

# Use get_or_create so we don't duplicate.
SeriesBook.objects.get_or_create(
book=book.parent_work,
series=series,
defaults={"user": user, "series_number": book.series_number},
)

book.series = None
book.series_number = None
book.save(broadcast=False)

# print how many things we created
new_series_count = Series.objects.count()
new_seriesbook_count = SeriesBook.objects.count()
net_series = new_series_count - series_count
net_books = new_seriesbook_count - seriesbook_count

print("-------")
print(f"Created {net_series} new Series and {net_books} new SeriesBooks")


class Command(BaseCommand):
"""Turn legacy series data into Series and SeriesBook objects"""

help = "Turn legacy series data into Series and SeriesBook objects"

def handle(self, *args, **options):
"""run data migration"""
upgrade_series_data()
Loading
Loading