Skip to content

Commit 03acf91

Browse files
committed
Show similar mods on mod page
1 parent b632cb8 commit 03acf91

File tree

9 files changed

+315
-1
lines changed

9 files changed

+315
-1
lines changed

KerbalStuff/blueprints/api.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
ModList
2323
from ..search import search_mods, search_users, typeahead_mods, get_mod_score
2424
from ..thumbnail import thumb_path_from_background_path
25+
from ..celery import update_mod_similarities
2526

2627
api = Blueprint('api', __name__)
2728

@@ -545,6 +546,8 @@ def accept_grant_mod(mod_id: int) -> Tuple[Dict[str, Any], int]:
545546
mod = _get_mod(mod_id)
546547
author = _get_mod_pending_author(mod)
547548
author.accepted = True
549+
db.commit()
550+
update_mod_similarities.delay([mod.id])
548551
notify_ckan(mod, 'co-author-added')
549552
return {'error': False}, 200
550553

@@ -581,6 +584,8 @@ def revoke_mod(mod_id: int) -> Tuple[Dict[str, Any], int]:
581584
author = [a for a in mod.shared_authors if a.user == new_user][0]
582585
mod.shared_authors = [a for a in mod.shared_authors if a.user != current_user]
583586
db.delete(author)
587+
db.commit()
588+
update_mod_similarities.delay([mod.id])
584589
notify_ckan(mod, 'co-author-removed')
585590
return {'error': False}, 200
586591

@@ -698,6 +703,7 @@ def create_mod() -> Tuple[Dict[str, Any], int]:
698703
db.commit()
699704
mod.score = get_mod_score(mod)
700705
db.commit()
706+
update_mod_similarities.delay([mod.id])
701707
set_game_info(game)
702708
send_to_ckan(mod)
703709
return {

KerbalStuff/blueprints/mods.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
Featured, Media, GameVersion, Game, Following
3434
from ..search import get_mod_score
3535
from ..thumbnail import thumb_path_from_background_path
36+
from ..celery import update_mod_similarities
3637

3738
mods = Blueprint('mods', __name__)
3839

@@ -295,6 +296,8 @@ def edit_mod(mod_id: int, mod_name: str) -> Union[str, werkzeug.wrappers.Respons
295296
if not mod.published:
296297
newly_published = True
297298
mod.published = True
299+
db.commit()
300+
update_mod_similarities.delay([mod.id])
298301
if ckan is None:
299302
ckan = False
300303
else:
@@ -499,6 +502,8 @@ def publish(mod_id: int, mod_name: str) -> werkzeug.wrappers.Response:
499502
mod.published = True
500503
mod.updated = datetime.now()
501504
mod.score = get_mod_score(mod)
505+
db.commit()
506+
update_mod_similarities.delay([mod.id])
502507
send_to_ckan(mod)
503508
return redirect(url_for("mods.mod", mod_id=mod.id, mod_name=mod.name))
504509

KerbalStuff/celery.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from .objects import Mod
1212
from .search import get_mod_score
1313
from .ckan import import_ksp_versions_from_ckan
14+
from .similarity import update_similar_mods
1415

1516
app = Celery("tasks", broker=_cfg("redis-connection"))
1617

@@ -118,6 +119,14 @@ def ckan_version_import() -> None:
118119
if game_id > 0:
119120
import_ksp_versions_from_ckan(game_id)
120121

122+
123+
@app.task
124+
@with_session
125+
def update_mod_similarities(mod_ids: List[int]) -> None:
126+
for mod_id in mod_ids:
127+
update_similar_mods(Mod.query.get(mod_id))
128+
129+
121130
# to debug this:
122131
# * add PTRACE capability to celery container via docker-compose.yaml
123132
# celery:

KerbalStuff/objects.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import os.path
33
from datetime import datetime
44
import re
5-
from typing import Optional
5+
from typing import Optional, Dict, Set
66

77
import bcrypt
88
from flask import url_for
@@ -14,6 +14,7 @@
1414

1515
from . import thumbnail
1616
from .database import Base
17+
from .str_similarity import meaningful_words, words_similarity
1718

1819

1920
class Following(Base): # type: ignore
@@ -235,6 +236,7 @@ class Mod(Base): # type: ignore
235236
followings = relationship('Following', back_populates='mod')
236237
# List of users that follow this mods
237238
followers = association_proxy('followings', 'user')
239+
similar_mods = association_proxy('similarities', 'other_mod')
238240

239241
def background_thumb(self) -> Optional[str]:
240242
return thumbnail.get_or_create(self)
@@ -251,6 +253,20 @@ def background_url(self, protocol: Optional[str], cdn_domain: Optional[str]) ->
251253
else:
252254
return url_for('mods.mod_background', mod_id=self.id, mod_name=self.name)
253255

256+
def get_author_names(self) -> Set[str]:
257+
self._author_names: Set[str]
258+
if not hasattr(self, '_author_names'):
259+
self._author_names = {self.user.username, *(a.username for a in self.shared_authors)}
260+
return self._author_names
261+
262+
def get_words(self, prop_name: str) -> Set[str]:
263+
""" Only parse the strings once to speed up mass-compares """
264+
if not hasattr(self, '_words'):
265+
self._words: Dict[str, Set[str]] = {}
266+
if prop_name not in self._words:
267+
self._words[prop_name] = meaningful_words(getattr(self, prop_name, ''))
268+
return self._words[prop_name]
269+
254270
def __repr__(self) -> str:
255271
return '<Mod %r %r>' % (self.id, self.name)
256272

@@ -300,6 +316,36 @@ def __repr__(self) -> str:
300316
return '<SharedAuthor %r>' % self.user_id
301317

302318

319+
class ModSimilarity(Base): # type: ignore
320+
__tablename__ = 'mod_similarity'
321+
__table_args__ = (PrimaryKeyConstraint('main_mod_id', 'other_mod_id', name='pk_mods'), )
322+
similarity = Column(Float(precision=5), nullable=False)
323+
main_mod_id = Column(Integer, ForeignKey('mod.id', ondelete='CASCADE'), nullable=False)
324+
main_mod = relationship('Mod',
325+
foreign_keys=main_mod_id,
326+
backref=backref('similarities',
327+
passive_deletes=True,
328+
order_by=similarity.desc()))
329+
other_mod_id = Column(Integer, ForeignKey('mod.id', ondelete='CASCADE'), nullable=False)
330+
other_mod = relationship('Mod', foreign_keys=other_mod_id)
331+
332+
Index('ix_mod_similarity_main_mod_similarity', main_mod_id, similarity.desc())
333+
334+
WORD_PROPS = ['name', 'short_description', 'description']
335+
336+
def __init__(self, main_mod: Mod, other_mod: Mod) -> None:
337+
self.main_mod_id = main_mod.id
338+
self.other_mod_id = other_mod.id
339+
self.similarity = (0.1 * words_similarity(main_mod.get_author_names(),
340+
other_mod.get_author_names())
341+
+ sum(words_similarity(main_mod.get_words(prop_name),
342+
other_mod.get_words(prop_name))
343+
for prop_name in self.WORD_PROPS))
344+
345+
def __repr__(self) -> str:
346+
return f'<Mod Similarity {self.main_mod_id} {self.other_mod_id}>'
347+
348+
303349
class DownloadEvent(Base): # type: ignore
304350
__tablename__ = 'downloadevent'
305351
id = Column(Integer, primary_key=True)

KerbalStuff/similarity.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from heapq import nlargest
2+
from typing import List
3+
4+
from .objects import Mod, ModSimilarity
5+
6+
7+
def find_most_similar(mod: Mod, how_many: int = 6) -> List[ModSimilarity]:
8+
get_sim = lambda mod_sim: mod_sim.similarity
9+
return sorted(nlargest(how_many,
10+
# Zero similarity means nothing at all in common, so skip those
11+
filter(lambda mod_sim: mod_sim.similarity > 0,
12+
(ModSimilarity(mod, other_mod)
13+
for other_mod in
14+
Mod.query.filter(Mod.published,
15+
Mod.game_id == mod.game_id,
16+
Mod.id != mod.id))),
17+
key=get_sim),
18+
key=get_sim,
19+
reverse=True)
20+
21+
22+
def update_similar_mods(mod: Mod, how_many: int = 6) -> None:
23+
if not mod.published:
24+
mod.similarities = []
25+
else:
26+
most_similar = find_most_similar(mod, how_many)
27+
# Remove rows for mods that are no longer among the most similar
28+
for mod_sim in mod.similarities:
29+
if not any(mod_sim.other_mod_id == other_sim.other_mod_id
30+
for other_sim in most_similar):
31+
ModSimilarity.query\
32+
.filter(ModSimilarity.main_mod_id == mod_sim.main_mod_id,
33+
ModSimilarity.other_mod_id == mod_sim.other_mod_id)\
34+
.delete()
35+
for mod_sim in most_similar:
36+
match = [other_sim for other_sim in mod.similarities
37+
if mod_sim.other_mod_id == other_sim.other_mod_id]
38+
if match:
39+
# Update existing rows for mods that are still similar
40+
match[0].similarity = mod_sim.similarity
41+
# Update the row with swapped IDs, if any
42+
for other_sim in match[0].other_mod.similarities:
43+
if other_sim.other_mod_id == mod_sim.main_mod_id:
44+
other_sim.similarity = mod_sim.similarity
45+
else:
46+
# Add new rows for newly similar mods
47+
mod.similarities.append(mod_sim)

KerbalStuff/str_similarity.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import re
2+
from typing import Set, Iterable
3+
4+
5+
# Split words on one or more non-alphanumerics
6+
WORD_SPLIT = re.compile(r'[^a-zA-Z0-9]+')
7+
8+
# Split up pieces of StudlyCapsStrings
9+
STUDLY_SPLIT = re.compile(r'(?=[A-Z])')
10+
11+
# English words that do not convey meaning about the context
12+
# We care about things like "rocket" and "propellant" and "deltaV"
13+
MEANINGLESS = {
14+
'the', 'an', 'this', 'these', 'that', 'those',
15+
'and', 'or', 'but', 'however',
16+
'as', 'such', 'than', 'there',
17+
'me', 'my', 'we', 'us', 'our',
18+
'you', 'your', 'he', 'him', 'she', 'her', 'it',
19+
'they', 'them',
20+
'to', 'from', 'in', 'on', 'for', 'with', 'of', 'into', 'at', 'by',
21+
'what', 'because', 'then',
22+
'is', 'be', 'been', 'are', 'get', 'getting', 'has', 'have', 'come',
23+
'do', 'does',
24+
'will', 'make', 'work', 'also', 'more',
25+
'should', 'so', 'some', 'like', 'likely', 'can', 'seems',
26+
'really', 'very', 'each', 'yup', 'which',
27+
've', 're',
28+
'accommodate', 'manner', 'therefore', 'ever', 'probably', 'almost',
29+
'something',
30+
'mod', 'pack', 'contains', 'ksp',
31+
'http', 'https', 'www', 'youtube', 'imgur', 'com',
32+
'github', 'githubusercontent',
33+
'forum', 'kerbalspaceprogram', 'index', 'thread', 'topic', 'php',
34+
'kerbal', 'space', 'continued', 'revived', 'updated', 'redux',
35+
'inc', 'plus',
36+
}
37+
38+
39+
def split_with_acronyms(s: str) -> Iterable[str]:
40+
words = WORD_SPLIT.split(s)
41+
yield from words
42+
for w in words:
43+
yield from STUDLY_SPLIT.split(w)
44+
45+
46+
def meaningful_words(s: str) -> Set[str]:
47+
return set(map(lambda w: w.lower(),
48+
filter(lambda w: len(w) > 1 and not w.isnumeric(),
49+
split_with_acronyms(s)))) - MEANINGLESS
50+
51+
52+
def words_similarity(words1: Set[str], words2: Set[str]) -> float:
53+
in_both = words1.intersection(words2)
54+
all_words = words1 | words2
55+
return len(in_both) / len(all_words) if all_words else 0
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""Create mod_similarity table
2+
3+
Revision ID: bbcce95b6e79
4+
Revises: 3fb8a6e2e0a5
5+
Create Date: 2021-12-16 05:06:06.312797
6+
7+
"""
8+
9+
# revision identifiers, used by Alembic.
10+
revision = 'bbcce95b6e79'
11+
down_revision = '3fb8a6e2e0a5'
12+
13+
from alembic import op
14+
from alembic.op import create_table, drop_table
15+
import sqlalchemy as sa
16+
17+
from KerbalStuff.celery import update_mod_similarities
18+
19+
Base = sa.ext.declarative.declarative_base()
20+
21+
class Mod(Base): # type: ignore
22+
__tablename__ = 'mod'
23+
id = sa.Column(sa.Integer, primary_key=True)
24+
published = sa.Column(sa.Boolean, default=False)
25+
26+
27+
def upgrade() -> None:
28+
create_table('mod_similarity',
29+
sa.Column('main_mod_id', sa.Integer(), nullable=False),
30+
sa.Column('other_mod_id', sa.Integer(), nullable=False),
31+
sa.Column('similarity', sa.Float(precision=5), nullable=False),
32+
sa.ForeignKeyConstraint(['main_mod_id'], ['mod.id'], ondelete='CASCADE'),
33+
sa.ForeignKeyConstraint(['other_mod_id'], ['mod.id'], ondelete='CASCADE'),
34+
sa.PrimaryKeyConstraint('main_mod_id', 'other_mod_id', name='pk_mods'))
35+
op.create_index('ix_mod_similarity_main_mod_similarity',
36+
'mod_similarity', ['main_mod_id', sa.text('similarity DESC')], unique=False)
37+
38+
# Ask Celery to build the similarity rows for existing published mods
39+
update_mod_similarities.delay([mod_id for mod_id, in
40+
sa.orm.Session(bind=op.get_bind())
41+
.query(Mod)
42+
.filter(Mod.published)
43+
.with_entities(Mod.id)])
44+
45+
def downgrade() -> None:
46+
op.drop_index('ix_mod_similarity_main_mod_similarity', table_name='mod_similarity')
47+
drop_table('mod_similarity')

templates/mod.html

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,21 @@ <h3>Export Raw Stats</h3>
394394
</div>
395395
</div>
396396
</div>
397+
{% if mod.similar_mods %}
398+
<div class="well">
399+
<div class="container main-cat">
400+
<h3>Similar-ish Mods</h3>
401+
</div>
402+
</div>
403+
<div class="container">
404+
<div class="row">
405+
{% set similar_mods = mod.similar_mods[:6] -%}
406+
{%- for mod in similar_mods -%}
407+
{%- include "mod-box.html" -%}
408+
{%- endfor %}
409+
</div>
410+
</div>
411+
{% endif %}
397412
{% if editable %}
398413
<div class="modal fade" id="confirm-delete" tabindex="-1" role="dialog" aria-labelledby="confirm-delete" aria-hidden="true">
399414
<div class="modal-dialog">

0 commit comments

Comments
 (0)