diff --git a/.gitignore b/.gitignore index f3a6164..de1c0e9 100644 --- a/.gitignore +++ b/.gitignore @@ -108,6 +108,7 @@ venv/ ENV/ env.bak/ venv.bak/ +.vscode/ # Spyder project settings .spyderproject @@ -127,5 +128,8 @@ dmypy.json # Pyre type checker .pyre/ +# staticfiles collected with python manage.py collectstatic +staticfiles/ + # End of https://www.gitignore.io/api/python diff --git a/api/tests/test_views.py b/api/tests/test_views.py index 0289746..2591bb0 100644 --- a/api/tests/test_views.py +++ b/api/tests/test_views.py @@ -4,13 +4,13 @@ class APITests(TransactionTestCase): def test_tutorials_page_status_code(self): - response = self.client.get('/tutorials/') + response = self.client.get('/api/tutorials/') self.assertEquals(response.status_code, 200) def test_tags_page_status_code(self): - response = self.client.get('/tags/') + response = self.client.get('/api/tags/') self.assertEquals(response.status_code, 200) def test_latest_page_status_code(self): - response = self.client.get('/latest/') + response = self.client.get('/api/latest/') self.assertEquals(response.status_code, 200) diff --git a/app/tests/test_views.py b/app/tests/test_views.py index 1419a98..577a276 100644 --- a/app/tests/test_views.py +++ b/app/tests/test_views.py @@ -1,4 +1,10 @@ -from django.test import SimpleTestCase, TransactionTestCase +import logging +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +from django.test import SimpleTestCase, TransactionTestCase, TestCase +from django.urls import reverse +from app.models import Tag, Tutorial class StaticPageTests(SimpleTestCase): @@ -57,3 +63,138 @@ def test_tags_page_name(self): response = self.client.get('/tags/') self.assertTemplateUsed(template_name='tags.html') +def create_tag(name): + t = Tag(name=name) + t.save() + return t.id + +def create_tutorial(title,link,tag_pks,category, publish=True): + tut = Tutorial.objects.create( + title=title, + link=link, + category = category, + publish = publish + ) + tut.tags.add(*tag_pks) + + +class SearchQueryViewTests(TestCase): + + url = reverse('app:search-results') + + def test_search_empty(self): + + response = self.client.get(self.url, data={ + "q": "hello" + }) + self.assertQuerysetEqual(response.context['tutorials'], []) + + def test_simple_search(self): + # crate our sample tags + # save their ids for adding to the corresponding tutorial + tags = ['javascript', 'python'] + tag_pks = {t:create_tag(t) for t in tags} + + # create our sample tutorials + tutorials= [{ + "title": "Python 101", + "link": "https://www.python.org", + "tags": ["python"], + "category": "docs" + }, + { + "title": "Python Advanced", + "link": "https://www.pyadv.com", + "tags": ["python"], + "category": "course" + }, + { + "title": "JavaScript 101", + "link": "https://www.jsResource.com", + "tags": ["javascript"], + "category": "video" + } + ] + for tut in tutorials: + # compile a list of tag ids for this tut + my_tags = [tag_pks[t] for t in tut['tags'] if t in tag_pks] + create_tutorial(tut['title'], tut['link'], my_tags, tut['category']) + + res1 = self.client.get(self.url, data={"q": "python"}) + self.assertQuerysetEqual(res1.context['tutorials'], ['','', ]) + + res2 = self.client.get(self.url, data={"q": "javascript"}) + self.assertQuerysetEqual(res2.context['tutorials'], ['']) + + res3 = self.client.get(self.url, data={"q": "kotlin"}) + self.assertQuerysetEqual(res3.context['tutorials'], []) + + def test_search_relevance(self): + """ full matches are placed before partial matches """ + tags = ['java', 'javascript', 'javaEE'] + tag_pks = {t:create_tag(t) for t in tags} + + tutorials = [ + { + "title": "JavaScript 101", + "link": "https://www.jsResource.io", + "tags": ["javascript"], + "category": "video" + }, + { + "title": "A Cup of Java", + "link": "https://www.javaRef1.com", + "tags": ["java"], + "category": "course" + }, + { + "title": "Java Patterns", + "link": "https://www.JVMBites.org", + "tags": ["javaEE"], + "category": "article" + }, + { + "title": "ObscureJSResource1", + "link": "https://www.hasajstag.org", + "tags": ["javascript"], + "category": "docs" + }, + { + "title": "ObscureJVMResource1", + "link": "https://www.hasajavatag.org", + "tags": ["java"], + "category": "book" + }, + { + "title": "JavaScript Fatigue", + "link": "https://www.babelwebpack.org", + "tags": ["javascript"], + "category": "cheatsheet" + }, + + ] + for tut in tutorials: + # compile a list of tag ids for this tut + my_tags = [tag_pks[t] for t in tut['tags'] if t in tag_pks] + create_tutorial(tut['title'], tut['link'], my_tags, tut['category']) + + res1 = self.client.get(self.url, data={"q": "javascript"}) + self.assertQuerysetEqual(res1.context['tutorials'], [ + '', + '', + '' + ]) + + res2 = self.client.get(self.url, data={"q": "java"}) + first_page = res2.context['tutorials'] + paginator = first_page.paginator + self.assertQuerysetEqual(paginator.object_list, [ + '', + '', + '', + '', + '' + ]) + + # res3 = self.client.get(self.url, data={"q": "kotlin"}) + # self.assertQuerysetEqual(res3.context['tutorials'], []) diff --git a/app/views.py b/app/views.py index 485fa15..c19cff1 100644 --- a/app/views.py +++ b/app/views.py @@ -1,4 +1,7 @@ import time +import logging + +logger = logging.getLogger(__name__) from django.core.paginator import EmptyPage, PageNotAnInteger, Paginator from django.db.models import Q @@ -20,27 +23,75 @@ def get_context_data(self, **kwargs): def search_query(request): - """view for the search results""" - query = request.GET.get('q').lower() + """view for the search results + search results (that meet category constraint) are prioritized by: + 1 - Title contains the exact keyword(s) + 2 - Tags contain the exact keyword(s) + 3 - Title contains partial keyword matches + """ + try: + # if somehow query is an empty string + query = request.GET.get('q').lower().strip() + except AttributeError: + # pretend it's a space + query = " " category = request.GET.get('category') list_query = query.split() start_time = time.time() - if category is not None: - tutorials = Tutorial.objects.filter( - (Q(title__icontains=query) | Q(tags__name__in=list_query)) - & Q(category__icontains=category) - ).order_by('id').filter(publish=True).distinct() + # Begin with Category-valid Tutorials + all_tutorials = Tutorial.objects.filter(category=category) if category else Tutorial.objects.all() + logger.debug(f"all tutorials {all_tutorials}") + if len(list_query): + # Get all Tutorials with partial keyword matches in title OR exact keyword titles in tags + partial_title_matches = Q() + for keyword in list_query: + logger.debug(f"Keyword is {keyword}") + partial_title_matches.add(Q(title__icontains=keyword), Q.OR) + + filtered_tutorials = all_tutorials.filter( + Q(tags__name__in=list_query) | partial_title_matches + ).filter(publish=True).distinct() + + # Now to do this sorting operation, we'll have to convert to a list + def relevance_order(tut): + title_set = set(tut.title.lower().split()) + logger.debug(f"titleset is {title_set}") + query_set = set(list_query) + logger.debug(f"queryset is {query_set}") + tag_set = set(tut.tags.values_list('name', flat=True)) + logger.debug(f"tagset is {tag_set}") + title_score = len(title_set & query_set) + tag_score = len(tag_set & query_set) + # give more weight to exact title matches + logger.debug(f"{tut}: {title_score}*5 + {tag_score}") + return -(title_score *5 + tag_score) + + sorted_tutorials = sorted(filtered_tutorials, key=relevance_order) + logger.debug(f"filtered_tut is {filtered_tutorials}") + logger.debug(f"sorted_tut is {sorted_tutorials}") + else: - tutorials = Tutorial.objects.filter( - (Q(title__icontains=query) | Q(tags__name__in=list_query)) - ).order_by('id').filter(publish=True).distinct() + # no need to go through all this trouble if user searched an empty string! + sorted_tutorials = all_tutorials + + + + # if category is not None: + # tutorials = Tutorial.objects.filter( + # (Q(title__icontains=query) | Q(tags__name__in=list_query)) + # & Q(category__icontains=category) + # ).order_by('id').filter(publish=True).distinct() + # else: + # tutorials = Tutorial.objects.filter( + # (Q(title__icontains=query) | Q(tags__name__in=list_query)) + # ).order_by('id').filter(publish=True).distinct() end_time = time.time() - total = len(tutorials) + total = len(sorted_tutorials) result_time = round(end_time - start_time, 3) - paginator = Paginator(tutorials, 3) + paginator = Paginator(sorted_tutorials, 3) page = request.GET.get('page') try: tutorials = paginator.page(page) @@ -86,6 +137,7 @@ def taglinks(request, tagname): """view for the tutorials with the {tagname}""" taglist = [] taglist.append(tagname) + # tutorials = Tag.objects.get(name = tagname).tutorial_set.all().filter(publish=True) tutorials = Tutorial.objects.filter(tags__name__in=taglist, publish=True) context = { 'tag': tagname, diff --git a/tutorialdb/settings.py b/tutorialdb/settings.py index 19d789c..6d339cb 100644 --- a/tutorialdb/settings.py +++ b/tutorialdb/settings.py @@ -119,16 +119,20 @@ USE_TZ = True STATIC_URL = '/static/' -PROJECT_ROOT = os.path.join(os.path.abspath(__file__)) +PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__)) STATIC_ROOT = os.path.join(PROJECT_ROOT, 'staticfiles') # Extra lookup directories for collectstatic to find static files -STATICFILES_DIRS = ( - os.path.join(PROJECT_ROOT, 'static'), -) +# STATICFILES_DIRS = ( +# os.path.join(PROJECT_ROOT, 'static'), +# ) STATICFILES_STORAGE = 'whitenoise.django.GzipManifestStaticFilesStorage' +# Use this instead for testing, without having to run 'python manage.py collectstatic' +STATICFILES_STORAGE = 'django.contrib.staticfiles.storage.StaticFilesStorage' + + import dj_database_url prod_db = dj_database_url.config(conn_max_age=500) DATABASES['default'].update(prod_db) \ No newline at end of file