diff --git a/challenge_eval/__init__.py b/challenge_eval/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/challenge_eval/evaluate_models.py b/challenge_eval/evaluate_models.py
new file mode 100644
index 000000000..ed5309c46
--- /dev/null
+++ b/challenge_eval/evaluate_models.py
@@ -0,0 +1,136 @@
+"""
+This script calculates accuracy of models produced by datasets submitted for challenges.
+"""
+from __future__ import print_function
+
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), ".."))
+import config
+
+import db
+import db.data
+import db.dataset
+import db.challenge
+import db.dataset_eval
+import db.exceptions
+import utils.path
+import utils.hl_calc
+import utils.models
+import subprocess
+import tempfile
+import logging
+import json
+import time
+import os
+
+
+HIGH_LEVEL_EXTRACTOR_BINARY = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)),
+ "..",
+ "hl_extractor",
+ "streaming_extractor_music_svm"
+)
+PROFILE_CONF_TEMPLATE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "profile_template.yaml")
+SLEEP_DURATION = 30 # number of seconds to wait between runs
+
+
+def main():
+ logging.info("Starting challenge submissions evaluator...")
+ while True:
+ db.init_db_engine(config.SQLALCHEMY_DATABASE_URI)
+ eval_task = db.challenge.get_next_eval_task()
+ if eval_task:
+ logging.info("Processing model from job %s with snapshot %s for challenge %s..." %
+ (eval_task["job_id"], eval_task["validation_snapshot_id"], eval_task["challenge_id"]))
+ result = measure_accuracy(
+ model_path=utils.models.get_model_file_path(eval_task["job_id"]),
+ validation_dataset=db.dataset.get_snapshot(eval_task["validation_snapshot_id"])["data"],
+ )
+ db.challenge.set_submission_result(
+ eval_job_id=eval_task["job_id"],
+ challenge_id=eval_task["challenge_id"],
+ result=result,
+ )
+ else:
+ logging.info("No pending models. Sleeping %s seconds." % SLEEP_DURATION)
+ time.sleep(SLEEP_DURATION)
+
+
+def measure_accuracy(model_path, validation_dataset):
+ temp_dir = tempfile.mkdtemp()
+ print("Measuring accuracy for a model %s in %s..." % (model_path, temp_dir))
+
+ profile_file = os.path.join(temp_dir, "profile.conf")
+ utils.hl_calc.create_profile(
+ in_file=PROFILE_CONF_TEMPLATE,
+ out_file=profile_file,
+ sha1=utils.hl_calc.get_build_sha1(HIGH_LEVEL_EXTRACTOR_BINARY),
+ models=[model_path],
+ )
+
+ results_dir = os.path.join(temp_dir, "results")
+
+ rec_total = count_recordings_in_dataset(validation_dataset)
+ rec_current = 0
+ rec_correct = 0
+
+ for cls in validation_dataset["classes"]:
+ expected_value = cls["name"].lower()
+ for recording in cls["recordings"]:
+ rec_current += 1
+ print("Extracting data for recording %s of %s..." % (rec_current, rec_total))
+ current_rec_dir = os.path.join(results_dir, recording[0:1], recording[0:2], recording)
+ utils.path.create_path(current_rec_dir)
+ hl_output = get_hl_output(
+ recording=recording,
+ profile_file=profile_file,
+ working_dir=current_rec_dir,
+ )
+ # Name (key) of the result depends on the name of the dataset, but since we expect only
+ # one we can just get the first item...
+ got_value = hl_output["highlevel"][hl_output["highlevel"].keys()[0]]["value"].lower()
+ print("Expected value: %s. Got value: %s." % (expected_value, got_value))
+ if expected_value == got_value:
+ rec_correct += 1
+
+ print("Done! %s out of %s correct results." % (rec_correct, rec_total))
+ # TODO(roman): Not sure if this is the right data to store
+ return {
+ "correct": rec_correct,
+ "total": rec_total,
+ }
+
+
+def count_recordings_in_dataset(dataset):
+ count = 0
+ for cls in dataset["classes"]:
+ count += len(cls["recordings"])
+ return count
+
+
+def get_hl_output(recording, profile_file, working_dir):
+ ll_data_file = os.path.join(working_dir, "input.json")
+ with open(ll_data_file, "w+b") as f:
+ # TODO(roman): This part could probably use an improvement (not selecting a random LL document)
+ f.write(json.dumps(db.data.load_low_level(recording)).encode("utf-8"))
+
+ output_file = os.path.join(working_dir, "output.json")
+ open(output_file, "a").close() # Creating output file
+
+ devnull = open(os.devnull, 'w')
+ try:
+ subprocess.check_call(
+ [HIGH_LEVEL_EXTRACTOR_BINARY, ll_data_file, output_file, profile_file],
+ #stdout=devnull,
+ #stderr=devnull,
+ )
+ finally:
+ devnull.close()
+
+ with open(output_file) as f:
+ result = f.read()
+ return json.loads(result)
+
+if __name__ == "__main__":
+ main()
diff --git a/challenge_eval/profile_template.yaml b/challenge_eval/profile_template.yaml
new file mode 100644
index 000000000..a77d419e0
--- /dev/null
+++ b/challenge_eval/profile_template.yaml
@@ -0,0 +1,12 @@
+indent: 0
+
+highlevel:
+ compute: 1.
+ svm_models: []
+
+mergeValues:
+ metadata:
+ version:
+ highlevel:
+ essentia_build_sha:
+ models_essentia_git_sha: "v2.1_beta1"
diff --git a/db/challenge.py b/db/challenge.py
new file mode 100644
index 000000000..c4ac0289d
--- /dev/null
+++ b/db/challenge.py
@@ -0,0 +1,406 @@
+from hashlib import sha256
+from datetime import datetime
+import db.dataset
+import pytz
+from collections import defaultdict
+import logging
+import copy
+import time
+import json
+import os
+import db
+import db.exceptions
+from sqlalchemy import text
+import db
+import db.exceptions
+import sqlalchemy
+import string
+import re
+import random
+
+KEY_LENGTH = 40
+
+
+def create(user_id, name, start_time, end_time, classes, validation_dataset_id):
+ """Create a new challenge.
+
+ Validation dataset must have the same set of classes as one that is defined for
+ a challenge (in `classes` argument).
+
+ Args:
+ user_id: User that created a challenge.
+ name: Name of a challenge.
+ start_time: Time when submissions begin.
+ end_time: Time when submissions end.
+ classes: List of class names (labels) as strings are required for submissions.
+ validation_dataset_id: ID of a dataset that will be used for validation.
+
+ Returns:
+ ID of a newly created challenge.
+ """
+ if end_time < start_time:
+ raise ValueError("End time can't be earlier than start time.")
+ for cls in classes:
+ if not re.match("^[A-Za-z0-9_-]+$", cls):
+ raise ValueError("Incorrect class name format.")
+ validation_dataset = db.dataset.get(validation_dataset_id)
+ _validate_dataset_structure(
+ dataset=validation_dataset,
+ classes=classes,
+ )
+
+ with db.engine.connect() as connection:
+ result = connection.execute(sqlalchemy.text("""
+ INSERT INTO challenge (id, creator, name, start_time, end_time, classes, validation_snapshot)
+ VALUES (uuid_generate_v4(), :creator, :name, :start_time, :end_time, :classes, :validation_snapshot)
+ RETURNING id
+ """), {
+ "creator": user_id,
+ "name": name,
+ "start_time": start_time,
+ "end_time": end_time,
+ "classes": ",".join(set(classes)),
+ "validation_snapshot": db.dataset.create_snapshot(validation_dataset["id"]),
+ })
+ return result.fetchone()["id"]
+
+
+def _submit_eval_job(connection, challenge_id, dataset_id, job_id):
+ _validate_dataset_structure(
+ dataset=db.dataset.get(dataset_id),
+ classes=get(challenge_id)["classes"],
+ )
+ connection.execute(sqlalchemy.text("""
+ INSERT INTO dataset_eval_challenge (dataset_eval_job, challenge_id)
+ VALUES (:dataset_eval_job, :challenge_id)
+ """), {
+ "dataset_eval_job": job_id,
+ "challenge_id": challenge_id,
+ })
+
+
+def _validate_dataset_structure(dataset, classes):
+ classes = [c.lower() for c in classes]
+ encountered = defaultdict(lambda: False)
+ for ds_cls in dataset["classes"]:
+ ds_cls_name = ds_cls["name"].lower()
+ encountered[ds_cls_name] = True
+ if ds_cls_name not in classes:
+ raise db.exceptions.BadDataException("Class `%s` defined in the dataset is not a part of required dataset "
+ "structure for a challenge. Required classes are: %s." %
+ (ds_cls_name, ", ".join(classes)))
+ for req_cls in classes:
+ if not encountered[req_cls]:
+ raise db.exceptions.BadDataException("Dataset is missing a class required by a challenge: %s." % req_cls)
+
+
+def get(id):
+ with db.engine.connect() as connection:
+ result = connection.execute(sqlalchemy.text("""
+ SELECT id, creator, name, start_time, end_time, classes, validation_snapshot, created, concluded
+ FROM challenge
+ WHERE id = :id
+ """), {"id": id})
+ row = result.fetchone()
+ if not row:
+ raise db.exceptions.NoDataFoundException("Can't find challenge with a specified ID.")
+ return _prep_full_row_out(row)
+
+
+def is_ongoing(challenge_id):
+ """Check if challenge is ongoing (evaluation jobs can be submitted as a part of that challenge).
+
+ Args:
+ challenge_id: Identifier of a challenge that needs to be checked.
+
+ Returns:
+ True if it's ongoing, False if it's not.
+ """
+ with db.engine.connect() as connection:
+ result = connection.execute(sqlalchemy.text("""
+ SELECT start_time, end_time, concluded
+ FROM challenge
+ WHERE id = :id
+ """), {"id": challenge_id})
+ row = result.fetchone()
+ if not row:
+ raise db.exceptions.NoDataFoundException("Can't find challenge with a specified ID.")
+ current_t = datetime.now(pytz.utc)
+ return not (row["concluded"] or (row["end_time"] < current_t < row["start_time"]))
+
+
+def get_submissions(challenge_id, order=None):
+ """Get evaluation jobs submitted for a challenge and related information.
+
+ Args:
+ challenge_id: ID of a challenge.
+ order: Optional sort order of results. Can be one of: `submission`, `accuracy`.
+ `time` - sort by evaluation job creation time.
+ `accuracy` - sort by accuracy in a evaluation results (only for completed jobs).
+ """
+ if order not in ["time", "accuracy"]:
+ raise ValueError("Incorrect order argument.")
+ # TODO: Allow to specify offset and limit
+
+ query = """
+ SELECT dataset_eval_challenge.result AS challenge_result,
+ dataset_eval_jobs.id AS job_id,
+ dataset_eval_jobs.snapshot_id AS job_snapshot_id,
+ dataset_eval_jobs.status AS job_status,
+ dataset_eval_jobs.created AS job_created,
+ dataset_eval_jobs.result AS job_result,
+ dataset.id AS dataset_id,
+ dataset.name AS dataset_name,
+ dataset.description AS dataset_description,
+ dataset.public AS dataset_public,
+ dataset.created AS dataset_created,
+ dataset.last_edited AS dataset_public,
+ "user".id AS user_id,
+ "user".musicbrainz_id AS user_musicbrainz_id
+ FROM dataset_eval_challenge
+ JOIN dataset_eval_jobs ON dataset_eval_jobs.id = dataset_eval_challenge.dataset_eval_job
+ JOIN dataset_snapshot ON dataset_snapshot.id = dataset_eval_jobs.snapshot_id
+ JOIN dataset ON dataset.id = dataset_snapshot.dataset_id
+ JOIN "user" ON "user".id = dataset.author
+ WHERE dataset_eval_challenge.challenge_id = :challenge_id
+ """
+ if order == "time":
+ query += "ORDER BY dataset_eval_jobs.created DESC"
+ elif order == "accuracy":
+ query += "ORDER BY dataset_eval_challenge.result->>'correct' DESC NULLS LAST"
+
+ with db.engine.connect() as connection:
+ result = connection.execute(sqlalchemy.text(query), {"challenge_id": challenge_id})
+ return [{
+ "eval_job": {
+ "id": row["job_id"],
+ "snapshot_id": row["job_snapshot_id"],
+ "status": row["job_status"],
+ "created": row["job_created"],
+ "job_result": row["job_result"],
+ "dataset": {
+ "id": row["dataset_id"],
+ "name": row["dataset_name"],
+ "description": row["dataset_description"],
+ "public": row["dataset_public"],
+ "created": row["dataset_created"],
+ "author": {
+ "id": row["user_id"],
+ "musicbrainz_id": row["user_musicbrainz_id"],
+ },
+ },
+ },
+ "challenge_result": row["challenge_result"],
+ } for row in result.fetchall()]
+
+
+def get_results(challenge_id):
+ # TODO: Allow to specify offset and limit
+ query = """
+ WITH results AS (
+ SELECT dataset_eval_challenge.result AS challenge_result,
+ dataset_eval_jobs.id AS job_id,
+ dataset_eval_jobs.snapshot_id AS job_snapshot_id,
+ dataset_eval_jobs.status AS job_status,
+ dataset_eval_jobs.created AS job_created,
+ dataset_eval_jobs.result AS job_result,
+ dataset.id AS dataset_id,
+ dataset.name AS dataset_name,
+ dataset.description AS dataset_description,
+ dataset.public AS dataset_public,
+ dataset.created AS dataset_created,
+ dataset.last_edited AS dataset_public,
+ "user".id AS user_id,
+ "user".musicbrainz_id AS user_musicbrainz_id,
+ ROW_NUMBER() OVER(
+ PARTITION BY "user".id
+ ORDER BY dataset_eval_challenge.result->>'correct' DESC NULLS LAST,
+ dataset_eval_jobs.created ASC
+ ) AS rk
+ FROM dataset_eval_challenge
+ JOIN dataset_eval_jobs ON dataset_eval_jobs.id = dataset_eval_challenge.dataset_eval_job
+ JOIN dataset_snapshot ON dataset_snapshot.id = dataset_eval_jobs.snapshot_id
+ JOIN dataset ON dataset.id = dataset_snapshot.dataset_id
+ JOIN "user" ON "user".id = dataset.author
+ WHERE dataset_eval_challenge.challenge_id = :challenge_id
+ )
+ SELECT results.*
+ FROM results
+ WHERE results.rk = 1
+ ORDER BY results.challenge_result->>'correct' DESC NULLS LAST,
+ results.job_created ASC
+ """
+ with db.engine.connect() as connection:
+ result = connection.execute(sqlalchemy.text(query), {"challenge_id": challenge_id})
+ return [{
+ "eval_job": {
+ "id": row["job_id"],
+ "snapshot_id": row["job_snapshot_id"],
+ "status": row["job_status"],
+ "created": row["job_created"],
+ "job_result": row["job_result"],
+ "dataset": {
+ "id": row["dataset_id"],
+ "name": row["dataset_name"],
+ "description": row["dataset_description"],
+ "public": row["dataset_public"],
+ "created": row["dataset_created"],
+ "author": {
+ "id": row["user_id"],
+ "musicbrainz_id": row["user_musicbrainz_id"],
+ },
+ },
+ },
+ "challenge_result": row["challenge_result"],
+ } for row in result.fetchall()]
+
+
+def find_active(query):
+ with db.engine.connect() as connection:
+ result = connection.execute(sqlalchemy.text("""
+ SELECT id, creator, name, start_time, end_time, created, classes
+ FROM challenge
+ WHERE name ILIKE :query_like
+ """), {
+ "query_like": '%' + query + '%',
+ })
+ return [_prep_full_row_out(row) for row in result.fetchall()]
+
+
+def list_all(content_filter=None, limit=20, offset=0):
+ with db.engine.connect() as connection:
+
+ if not content_filter or content_filter == "all":
+ result = connection.execute(sqlalchemy.text("""
+ SELECT id, creator, name, start_time, end_time, created, concluded, classes
+ FROM challenge
+ ORDER BY start_time DESC, end_time DESC
+ LIMIT :limit
+ OFFSET :offset
+ """), {
+ "limit": limit,
+ "offset": offset,
+ })
+ result_count = connection.execute("SELECT COUNT(*) FROM challenge")
+
+ elif content_filter == "upcoming":
+ result = connection.execute(sqlalchemy.text("""
+ SELECT id, creator, name, start_time, end_time, created, concluded, classes
+ FROM challenge
+ WHERE start_time > :now
+ ORDER BY start_time DESC, end_time DESC
+ LIMIT :limit
+ OFFSET :offset
+ """), {
+ "now": datetime.now(pytz.utc),
+ "limit": limit,
+ "offset": offset,
+ })
+ result_count = connection.execute(sqlalchemy.text("""
+ SELECT COUNT(*)
+ FROM challenge
+ WHERE start_time > :now
+ """), {"now": datetime.now(pytz.utc)})
+
+ elif content_filter == "active":
+ result = connection.execute(sqlalchemy.text("""
+ SELECT id, creator, name, start_time, end_time, created, concluded, classes
+ FROM challenge
+ WHERE start_time < :now AND end_time > :now
+ ORDER BY start_time DESC, end_time DESC
+ LIMIT :limit
+ OFFSET :offset
+ """), {
+ "now": datetime.now(pytz.utc),
+ "limit": limit,
+ "offset": offset,
+ })
+ result_count = connection.execute(sqlalchemy.text("""
+ SELECT COUNT(*)
+ FROM challenge
+ WHERE start_time < :now AND end_time > :now
+ """), {"now": datetime.now(pytz.utc)})
+
+ elif content_filter == "ended":
+ result = connection.execute(sqlalchemy.text("""
+ SELECT id, creator, name, start_time, end_time, created, concluded, classes
+ FROM challenge
+ WHERE end_time < :now
+ ORDER BY start_time DESC, end_time DESC
+ LIMIT :limit
+ OFFSET :offset
+ """), {
+ "now": datetime.now(pytz.utc),
+ "limit": limit,
+ "offset": offset,
+ })
+ result_count = connection.execute(sqlalchemy.text("""
+ SELECT COUNT(*)
+ FROM challenge
+ WHERE end_time < :now
+ """), {"now": datetime.now(pytz.utc)})
+
+ else:
+ raise db.exceptions.DatabaseException("Incorrect content filter: %s. Must be one of %s." %
+ (content_filter, ["all", "upcoming", "active", "ended"]))
+
+ return [_prep_full_row_out(row) for row in result.fetchall()], result_count.fetchone()[0]
+
+
+def update(id, name, start_time, end_time):
+ with db.engine.connect() as connection:
+ connection.execute(sqlalchemy.text("""
+ UPDATE challenge
+ SET name = :name, start_time = :start_time, end_time = :end_time
+ WHERE id = :id
+ """), {
+ "id": id,
+ "name": name,
+ "start_time": start_time,
+ "end_time": end_time,
+ })
+
+
+def delete(id):
+ with db.engine.connect() as connection:
+ connection.execute(sqlalchemy.text("""
+ DELETE FROM challenge
+ WHERE id = :id
+ """), {"id": id})
+
+
+def get_next_eval_task():
+ with db.engine.connect() as connection:
+ result = connection.execute("""
+ SELECT dataset_eval_challenge.dataset_eval_job::text as job_id,
+ dataset_eval_challenge.challenge_id::text as challenge_id,
+ challenge.validation_snapshot::text as validation_snapshot_id
+ FROM dataset_eval_challenge
+ JOIN dataset_eval_jobs ON dataset_eval_challenge.dataset_eval_job = dataset_eval_jobs.id
+ JOIN challenge ON dataset_eval_challenge.challenge_id = challenge.id
+ WHERE dataset_eval_challenge.result IS NULL
+ AND dataset_eval_jobs.status = 'done'
+ """)
+ result = result.fetchone()
+ if not result:
+ return None
+ return dict(result)
+
+
+def set_submission_result(eval_job_id, challenge_id, result):
+ with db.engine.connect() as connection:
+ connection.execute(sqlalchemy.text("""
+ UPDATE dataset_eval_challenge
+ SET result = :result
+ WHERE dataset_eval_job = :dataset_eval_job AND challenge_id = :challenge_id
+ """), {
+ "result": json.dumps(result),
+ "dataset_eval_job": eval_job_id,
+ "challenge_id": challenge_id,
+ })
+
+
+def _prep_full_row_out(row):
+ row = dict(row)
+ row["classes"] = row["classes"].split(",")
+ return row
diff --git a/db/data.py b/db/data.py
index 13bda4e45..ec16d965f 100644
--- a/db/data.py
+++ b/db/data.py
@@ -531,12 +531,12 @@ def load_many_high_level(recordings, map_classes=False):
# Metadata
meta_query = text("""
SELECT hl.id
- , hlm.data
+ , highlevel_meta.data
, ll.gid::text
, ll.submission_offset::text
FROM highlevel hl
- JOIN highlevel_meta hlm
- ON hl.id = hlm.id
+ JOIN highlevel_meta
+ ON hl.id = highlevel_meta.id
JOIN lowlevel ll
ON ll.id = hl.id
WHERE (ll.gid, ll.submission_offset)
@@ -560,6 +560,7 @@ def load_many_high_level(recordings, map_classes=False):
# Model data
model_query = text("""
SELECT m.model
+ , hlmo.id as highlevel_model_id
, hlmo.data
, version.data as version
, ll.gid::text
@@ -583,8 +584,8 @@ def load_many_high_level(recordings, map_classes=False):
mapping = row['class_mapping']
if map_classes and mapping:
data = map_highlevel_class_names(data, mapping)
-
data['version'] = row['version']
+ data['highlevel_model_id'] = row['highlevel_model_id']
gid = row['gid']
submission_offset = row['submission_offset']
diff --git a/db/dataset.py b/db/dataset.py
index ed1757f2d..a89cefa97 100644
--- a/db/dataset.py
+++ b/db/dataset.py
@@ -293,6 +293,27 @@ def create_snapshot(dataset_id):
return result.fetchone()["id"]
+def replace_snapshot(snapshot_id, dataset):
+ snapshot = {
+ "name": dataset["name"],
+ "description": dataset["description"],
+ "classes": [{
+ "name": c["name"],
+ "description": c["description"],
+ "recordings": c["recordings"],
+ } for c in dataset["classes"]],
+ }
+ with db.engine.connect() as connection:
+ connection.execute(sqlalchemy.text("""
+ UPDATE dataset_snapshot
+ SET data = :data
+ WHERE id = :id
+ """), {
+ "id": snapshot_id,
+ "data": json.dumps(snapshot),
+ })
+
+
def get_snapshot(id):
"""Get snapshot of a dataset.
@@ -322,6 +343,23 @@ def get_snapshot(id):
return dict(row)
+def find(query, user_id):
+ with db.engine.connect() as connection:
+ result = connection.execute(sqlalchemy.text("""
+ SELECT id::text, name, description, author, created, public, last_edited
+ FROM dataset
+ WHERE name ILIKE :query_like AND author = :author
+ """), {
+ "query_like": '%' + query + '%',
+ "author": user_id
+ })
+ rows = result.fetchall()
+ for row in rows:
+ row = dict(row)
+ row["classes"] = _get_classes(row["id"])
+ return rows
+
+
def _delete_snapshot(connection, snapshot_id):
"""Delete a snapshot.
diff --git a/db/dataset_eval.py b/db/dataset_eval.py
index aed4e97d3..dac529a29 100644
--- a/db/dataset_eval.py
+++ b/db/dataset_eval.py
@@ -1,5 +1,6 @@
import db
import db.exceptions
+import db.challenge
import db.dataset
import db.data
import db.user
@@ -46,7 +47,7 @@
def evaluate_dataset(dataset_id, normalize, eval_location, c_values=None, gamma_values=None,
- preprocessing_values=None, filter_type=None):
+ preprocessing_values=None, filter_type=None, challenge_id=None):
"""Add dataset into evaluation queue.
Args:
@@ -67,6 +68,8 @@ def evaluate_dataset(dataset_id, normalize, eval_location, c_values=None, gamma_
filter_type: Optional filtering that will be applied to the dataset.
See FILTER_* variables in this module for a list of existing
filters.
+ challenge_id: Optional UUID of a challenge. If specified, evaluation
+ job will be submitted as a part of that challenge.
Raises:
JobExistsException: if the dataset has already been submitted for evaluation
@@ -90,7 +93,8 @@ def evaluate_dataset(dataset_id, normalize, eval_location, c_values=None, gamma_
# Validate dataset contents
validate_dataset_contents(db.dataset.get(dataset_id))
return _create_job(connection, dataset_id, normalize, eval_location,
- c_values, gamma_values, preprocessing_values, filter_type)
+ c_values, gamma_values, preprocessing_values, filter_type,
+ challenge_id=challenge_id)
def job_exists(dataset_id):
@@ -230,6 +234,18 @@ def get_jobs_for_dataset(dataset_id):
return [dict(j) for j in result.fetchall()]
+def get_jobs_in_challenge(challenge_id):
+ """Get jobs that were submitted for a specific challenge."""
+ with db.engine.connect() as connection:
+ result = connection.execute(sqlalchemy.text("""
+ SELECT dataset_eval_jobs.*
+ FROM dataset_eval_challenge
+ JOIN dataset_eval_jobs ON dataset_eval_jobs.id = dataset_eval_challenge.dataset_eval_job
+ WHERE dataset_eval_challenge.challenge_id = :challenge_id
+ """), {"challenge_id": challenge_id})
+ return [dict(j) for j in result.fetchall()]
+
+
def set_job_result(job_id, result):
with db.engine.begin() as connection:
connection.execute(
@@ -330,7 +346,7 @@ def add_dataset_eval_set(connection, data):
def _create_job(connection, dataset_id, normalize, eval_location, c_value,
- gamma_value, preprocessing_values, filter_type):
+ gamma_value, preprocessing_values, filter_type, challenge_id=None):
if not isinstance(normalize, bool):
raise ValueError("Argument 'normalize' must be a boolean.")
if filter_type is not None:
@@ -360,6 +376,14 @@ def _create_job(connection, dataset_id, normalize, eval_location, c_value,
"eval_location": eval_location
})
job_id = result.fetchone()[0]
+ if challenge_id:
+ _submit_for_challenge(
+ connection=connection,
+ challenge_id=challenge_id,
+ dataset_id=dataset_id,
+ job_id=job_id,
+ snapshot_id=snapshot_id,
+ )
return job_id
def get_remote_pending_jobs_for_user(user_id):
@@ -400,6 +424,40 @@ def get_remote_pending_jobs_for_user(user_id):
return jobs
+def _submit_for_challenge(connection, challenge_id, dataset_id, job_id, snapshot_id):
+ """Submit existing dataset for a challenge.
+
+ This function also performs recording filtering (removes recordings that are present in a
+ validation dataset from a submission. This is a mandatory step, which updates snapshot that
+ was created for evaluation job.
+ """
+ if not db.challenge.is_ongoing(challenge_id):
+ raise db.exceptions.DatabaseException("Can only submit dataset for an ongoing challenge.")
+ recordings_to_remove = set()
+ validation_snapshot = db.dataset.get_snapshot(db.challenge.get(challenge_id)["validation_snapshot"])["data"]
+ for cls in validation_snapshot["classes"]:
+ for rec in cls["recordings"]:
+ recordings_to_remove.add(rec)
+ filtered_ds = _filter_recordings(recordings_to_remove, db.dataset.get(dataset_id))
+ db.dataset.replace_snapshot(snapshot_id, filtered_ds)
+ db.challenge._submit_eval_job(connection, challenge_id, dataset_id, job_id)
+
+
+def _filter_recordings(recordings, dataset):
+ """This function performs recording filtering in a dataset.
+
+ Args:
+ recordings (set): Set of recording IDs (strings) that need to be removed.
+ dataset (dict): Dataset to be filtered.
+
+ Returns:
+ Dataset with recording filtering applied.
+ """
+ for cls in dataset["classes"]:
+ cls["recordings"] = [r for r in cls["recordings"] if r not in recordings]
+ return dataset
+
+
class IncompleteDatasetException(db.exceptions.DatabaseException):
pass
diff --git a/db/feedback.py b/db/feedback.py
new file mode 100644
index 000000000..208a1528b
--- /dev/null
+++ b/db/feedback.py
@@ -0,0 +1,30 @@
+import db
+import db.dataset
+import db.exceptions
+import sqlalchemy
+
+KEY_LENGTH = 40
+
+
+def submit(hl_model_id, user_id, is_correct, suggestion=None):
+ if hl_model_id is None or is_correct is None or user_id is None:
+ raise ValueError("Missing required data")
+ if type(hl_model_id) is not int:
+ raise ValueError("`hl_model_id` argument must be an integer")
+ if type(user_id) is not int:
+ raise ValueError("`user_id` argument must be an integer")
+ if type(is_correct) is not bool:
+ raise ValueError("`is_correct` argument must be a boolean")
+ with db.engine.connect() as connection:
+ connection.execute(sqlalchemy.text("""
+ INSERT INTO feedback (highlevel_model_id, user_id, correct, suggestion)
+ VALUES (:highlevel_model_id, :user_id, :correct, :suggestion)
+ ON CONFLICT ON CONSTRAINT feedback_pkey
+ DO UPDATE SET (correct, suggestion) = (:correct, :suggestion)
+ WHERE feedback.highlevel_model_id = :highlevel_model_id AND feedback.user_id = :user_id
+ """), {
+ "highlevel_model_id": hl_model_id,
+ "user_id": user_id,
+ "correct": is_correct,
+ "suggestion": suggestion,
+ })
diff --git a/hl_extractor/hl_calc.py b/hl_extractor/hl_calc.py
index 00b74e8d8..e15833306 100644
--- a/hl_extractor/hl_calc.py
+++ b/hl_extractor/hl_calc.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-import hashlib
import json
import logging
import os
@@ -11,11 +10,11 @@
import traceback
import concurrent.futures
-import yaml
from flask import current_app
import db
import db.data
+import utils.hl_calc
DEFAULT_NUM_THREADS = 2
@@ -36,11 +35,6 @@ class HighLevelExtractorError(Exception):
"""Indicates an error running the highlevel extractor"""
-class HighLevelConfigurationError(Exception):
- """Indicates an error configuring the highlevel extractor on startup,
- before processing items"""
-
-
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in xrange(0, len(l), n):
@@ -129,46 +123,6 @@ def process_lowlevel_data(data, logger_name=None):
return results
-def create_profile(in_file, out_file, sha1):
- """Prepare a profile file for use with essentia. Sanity check to make sure
- important values are present.
- """
-
- try:
- with open(in_file, 'r') as f:
- doc = yaml.load(f, Loader=yaml.SafeLoader)
- except IOError as e:
- raise HighLevelConfigurationError(u"Cannot read profile {}: {}".format(in_file, e))
-
- try:
- models_ver = doc['mergeValues']['metadata']['version']['highlevel']['models_essentia_git_sha']
- except KeyError:
- models_ver = None
-
- if not models_ver:
- raise HighLevelConfigurationError("{} needs to have mergeValues.metadata.version.highlevel."
- "models_essentia_git_sha defined".format(in_file))
-
- doc['mergeValues']['metadata']['version']['highlevel']['essentia_build_sha'] = sha1
-
- try:
- with open(out_file, 'w') as yaml_file:
- yaml.dump(doc, yaml_file, default_flow_style=False)
- except IOError as e:
- raise HighLevelConfigurationError(u"Cannot write profile {}: {}".format(out_file, e))
-
-
-def get_build_sha1(binary):
- """Calculate the SHA1 of the binary we're using."""
- try:
- with open(binary, "rb") as fp:
- contents = fp.read()
- except IOError as e:
- raise HighLevelConfigurationError("Cannot calculate the SHA1 of the high-level extractor binary: {}".format(e))
-
- return hashlib.sha1(contents).hexdigest()
-
-
def save_hl_documents(hl_data_list, build_sha1):
"""Save a list of highlevel documents to the database.
@@ -190,9 +144,9 @@ def main(num_threads=DEFAULT_NUM_THREADS):
))
try:
- build_sha1 = get_build_sha1(HIGH_LEVEL_EXTRACTOR_BINARY)
- create_profile(PROFILE_CONF_TEMPLATE, PROFILE_CONF, build_sha1)
- except HighLevelConfigurationError as e:
+ build_sha1 = utils.hl_calc.get_build_sha1(HIGH_LEVEL_EXTRACTOR_BINARY)
+ utils.hl_calc.create_profile(PROFILE_CONF_TEMPLATE, PROFILE_CONF, build_sha1)
+ except utils.hl_calc.HighLevelConfigurationError as e:
current_app.logger.error(u'{}'.format(e))
sys.exit(-1)
diff --git a/hl_extractor/test/test_hl_calc.py b/hl_extractor/test/test_hl_calc.py
index 712625cb2..cdfec5034 100644
--- a/hl_extractor/test/test_hl_calc.py
+++ b/hl_extractor/test/test_hl_calc.py
@@ -8,6 +8,7 @@
import yaml
from hl_extractor import hl_calc
+import utils.hl_calc
class HlCalcTest(unittest.TestCase):
@@ -118,7 +119,7 @@ def test_create_profile(self):
""".strip()
with open(inputname, "w") as fp:
fp.write(source)
- hl_calc.create_profile(inputname, outputname, 'this_value_to_interpolate')
+ utils.hl_calc.create_profile(inputname, outputname, 'this_value_to_interpolate')
expected = {'indent': 0, 'mergeValues': {
'metadata': {'version': {
@@ -137,9 +138,9 @@ def test_create_profile(self):
def test_get_build_sha1(self):
data_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), "test_data")
data_file = os.path.join(data_dir, "known_file")
- result_sha1 = hl_calc.get_build_sha1(data_file)
+ result_sha1 = utils.hl_calc.get_build_sha1(data_file)
self.assertEqual(result_sha1, "018507c3c54e655320feee0a87e7b56447a45258")
- with self.assertRaises(hl_calc.HighLevelConfigurationError):
+ with self.assertRaises(utils.hl_calc.HighLevelConfigurationError):
data_file = os.path.join(data_dir, "unknown_file")
- hl_calc.get_build_sha1(data_file)
+ utils.hl_calc.get_build_sha1(data_file)
diff --git a/utils/hl_calc.py b/utils/hl_calc.py
new file mode 100644
index 000000000..bded69f09
--- /dev/null
+++ b/utils/hl_calc.py
@@ -0,0 +1,48 @@
+import hashlib
+
+import yaml
+
+
+class HighLevelConfigurationError(Exception):
+ """Indicates an error configuring the highlevel extractor on startup,
+ before processing items"""
+
+
+def create_profile(in_file, out_file, sha1):
+ """Prepare a profile file for use with essentia. Sanity check to make sure
+ important values are present.
+ """
+
+ try:
+ with open(in_file, 'r') as f:
+ doc = yaml.load(f, Loader=yaml.SafeLoader)
+ except IOError as e:
+ raise HighLevelConfigurationError(u"Cannot read profile {}: {}".format(in_file, e))
+
+ try:
+ models_ver = doc['mergeValues']['metadata']['version']['highlevel']['models_essentia_git_sha']
+ except KeyError:
+ models_ver = None
+
+ if not models_ver:
+ raise HighLevelConfigurationError("{} needs to have mergeValues.metadata.version.highlevel."
+ "models_essentia_git_sha defined".format(in_file))
+
+ doc['mergeValues']['metadata']['version']['highlevel']['essentia_build_sha'] = sha1
+
+ try:
+ with open(out_file, 'w') as yaml_file:
+ yaml.dump(doc, yaml_file, default_flow_style=False)
+ except IOError as e:
+ raise HighLevelConfigurationError(u"Cannot write profile {}: {}".format(out_file, e))
+
+
+def get_build_sha1(binary):
+ """Calculate the SHA1 of the binary we're using."""
+ try:
+ with open(binary, "rb") as fp:
+ contents = fp.read()
+ except IOError as e:
+ raise HighLevelConfigurationError("Cannot calculate the SHA1 of the high-level extractor binary: {}".format(e))
+
+ return hashlib.sha1(contents).hexdigest()
diff --git a/utils/models.py b/utils/models.py
new file mode 100644
index 000000000..378fc8364
--- /dev/null
+++ b/utils/models.py
@@ -0,0 +1,20 @@
+import os
+import os.path
+import sys
+import utils.path
+
+sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), ".."))
+import config
+
+HISTORY_STORAGE_DIR = os.path.join(config.FILE_STORAGE_DIR, "history")
+
+
+def get_model_dir_path(job_id, create=False):
+ directory = os.path.join(HISTORY_STORAGE_DIR, job_id[0:1], job_id[0:2])
+ if create:
+ utils.path.create_path(directory)
+ return directory
+
+
+def get_model_file_path(job_id, create_dir=False):
+ return os.path.join(get_model_dir_path(job_id, create=create_dir), "%s.history" % job_id)
diff --git a/webpack.config.js b/webpack.config.js
index 7e4531eea..7e4a3e724 100644
--- a/webpack.config.js
+++ b/webpack.config.js
@@ -16,7 +16,9 @@ module.exports = function (env) {
homepage: ['./scripts/homepage.js'],
profile: ['./scripts/profile.js'],
stats: ['./scripts/stats.js'],
- main: ['./styles/main.less']
+ main: ['./styles/main.less'],
+ challenges_add: ['./scripts/admin/challenges-add.js'],
+ eval_add: ['./scripts/eval-add.js']
},
output: {
chunkFilename: production ? '[name].[chunkhash].js' : '[name].js',
diff --git a/webserver/__init__.py b/webserver/__init__.py
index 131f80519..fca9546d9 100644
--- a/webserver/__init__.py
+++ b/webserver/__init__.py
@@ -128,9 +128,10 @@ def after_request_callbacks(response):
# Admin section
from flask_admin import Admin
- from webserver.admin import views as admin_views
- admin = Admin(app, index_view=admin_views.HomeView(name='Admin'))
- admin.add_view(admin_views.AdminsView(name='Admins'))
+ from webserver.admin.views import home, admins, challenges
+ admin = Admin(app, index_view=home.HomeView(name='Home'))
+ admin.add_view(admins.AdminsView(name='Admins'))
+ admin.add_view(challenges.ChallengesView(name='Challenges'))
@app.before_request
def prod_https_login_redirect():
@@ -176,18 +177,21 @@ def create_app_sphinx():
def _register_blueprints(app):
def register_ui(app):
+ # Blueprints
from webserver.views.index import index_bp
from webserver.views.data import data_bp
from webserver.views.stats import stats_bp
from webserver.views.login import login_bp
from webserver.views.user import user_bp
from webserver.views.datasets import datasets_bp
+ from webserver.views.challenges import challenges_bp
app.register_blueprint(index_bp)
app.register_blueprint(data_bp)
app.register_blueprint(stats_bp)
app.register_blueprint(login_bp, url_prefix='/login')
app.register_blueprint(user_bp)
app.register_blueprint(datasets_bp, url_prefix='/datasets')
+ app.register_blueprint(challenges_bp, url_prefix='/challenges')
def register_api(app):
v1_prefix = os.path.join(API_PREFIX, 'v1')
diff --git a/webserver/admin/forms.py b/webserver/admin/forms.py
index 77f1b1519..35be95b70 100644
--- a/webserver/admin/forms.py
+++ b/webserver/admin/forms.py
@@ -1,6 +1,8 @@
from flask_wtf import FlaskForm
from wtforms import StringField, BooleanField
-from wtforms.validators import DataRequired
+from wtforms.fields.html5 import DateField
+from wtforms.validators import DataRequired, UUID
+from webserver.forms import DynamicSelectField
class AddAdminForm(FlaskForm):
@@ -9,3 +11,30 @@ class AddAdminForm(FlaskForm):
validators=[DataRequired("MusicBrainz username is required!")],
)
force = BooleanField("Create user if doesn't exist")
+
+
+class EditChallengeForm(FlaskForm):
+ name = StringField(
+ "Name",
+ validators=[DataRequired("Name of the challenge is required!")],
+ )
+ start_time = DateField(
+ "Start time",
+ validators=[DataRequired("Start time is required!")],
+ )
+ end_time = DateField(
+ "End time",
+ validators=[DataRequired("End time is required!")],
+ )
+
+ def __init__(self, default_name=None, default_start_time=None, default_end_time=None, **kwargs):
+ kwargs.setdefault('name', default_name)
+ kwargs.setdefault('start_time', default_start_time)
+ kwargs.setdefault('end_time', default_end_time)
+ FlaskForm.__init__(self, **kwargs)
+
+
+class AddChallengeForm(EditChallengeForm):
+ classes = StringField("Classes", validators=[DataRequired("List of classes is required!")])
+ validation_dataset_id = DynamicSelectField("Validation dataset", choices=[],
+ validators=[UUID("Incorrect validation dataset ID!")])
diff --git a/webserver/admin/views/__init__.py b/webserver/admin/views/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/webserver/admin/views.py b/webserver/admin/views/admins.py
similarity index 87%
rename from webserver/admin/views.py
rename to webserver/admin/views/admins.py
index eabe7e9bf..bc2f5351b 100644
--- a/webserver/admin/views.py
+++ b/webserver/admin/views/admins.py
@@ -1,18 +1,11 @@
from flask import request, url_for, redirect
from flask_admin import expose
-from webserver.admin import AdminIndexView, AdminBaseView, forms
+from webserver.admin import AdminBaseView, forms
from webserver import flash
import db.user
import db.exceptions
-class HomeView(AdminIndexView):
-
- @expose("/")
- def index(self):
- return self.render("admin/home.html")
-
-
class AdminsView(AdminBaseView):
@expose("/")
diff --git a/webserver/admin/views/challenges.py b/webserver/admin/views/challenges.py
new file mode 100644
index 000000000..58dac43b0
--- /dev/null
+++ b/webserver/admin/views/challenges.py
@@ -0,0 +1,88 @@
+from flask import url_for, redirect, request
+from flask_login import current_user
+from flask_admin import expose
+from webserver.admin import AdminBaseView, forms
+from webserver import flash
+from werkzeug.exceptions import BadRequest
+from math import ceil
+import db.user
+import db.challenge
+import db.exceptions
+
+
+class ChallengesView(AdminBaseView):
+
+ @expose("/")
+ def index(self):
+ content_filter = request.args.get("content_filter", default="all")
+ if content_filter not in ["all", "upcoming", "active", "ended"]:
+ raise BadRequest("Invalid filter.")
+ page = int(request.args.get("page", default=1))
+ if page < 1:
+ return redirect(url_for('.index'))
+ limit = 30
+ offset = (page - 1) * limit
+ challenges, total_count = db.challenge.list_all(
+ content_filter=content_filter,
+ limit=limit,
+ offset=offset
+ )
+ last_page = int(ceil(total_count / limit))
+ if last_page != 0 and page > last_page:
+ return redirect(url_for('.index', content_filter=content_filter, page=last_page))
+ return self.render("admin/challenges/index.html",
+ challenges=challenges,
+ content_filter=content_filter,
+ page=page,
+ last_page=last_page)
+
+ @expose("/create", methods=["GET", "POST"])
+ def create(self):
+ form = forms.AddChallengeForm()
+ if form.validate_on_submit():
+ try:
+ id = db.challenge.create(
+ user_id=current_user.id,
+ name=form.name.data,
+ start_time=form.start_time.data,
+ end_time=form.end_time.data,
+ classes=form.classes.data.split(","),
+ validation_dataset_id=form.validation_dataset_id.data,
+ )
+ except db.exceptions.DatabaseException as e:
+ flash.error("Error: %s" % e)
+ return self.render("admin/challenges/add.html", form=form)
+ flash.success('Challenge "%s" has been created. ID: %s.' %
+ (form.name.data, id))
+ return redirect(url_for(".index"))
+ return self.render("admin/challenges/add.html", form=form)
+
+ @expose("/ Dataset creation challenges.
+ Challenges
+ Create a new challenge
+
+ {% for field in form.errors %}
+ {% for error in form.errors[field] %}
+ Challenges
+
| ID | +Name | +Creator (User ID) | +Start time | +End time | +{# Controls #} | +
|---|---|---|---|---|---|
| {{ challenge["id"] }} | +{{ challenge["name"] }} | +{{ challenge["creator"] }} | +{{ challenge["start_time"] }} | +{{ challenge["end_time"] }} | ++ Modify + Delete + | +
+ This challenge has started on {{ challenge['start_time']|datetime }} and
+ {{ 'ended' if past_deadline else 'will end' }} on {{ challenge['end_time']|datetime }}.
+
Challenge ID: {{ challenge['id'] }}
+
Organizer ID: {{ challenge['creator'] }} (created on {{ challenge['created']|datetime }})
+
| # | +Submitted by | +Accuracy | +Submission time | +Snapshot ID | +Job ID | +
|---|---|---|---|---|---|
| {{ loop.index }} | ++ + {{ s["eval_job"]["dataset"]["author"]["musicbrainz_id"] }} + + | ++ + {% if s["challenge_result"] %} + {{ (s["challenge_result"]["correct"] * 100 / s["challenge_result"]["total"]) | round(2) }}% + {% else %} + - + {% endif %} + + | +{{ s["eval_job"]["created"]|datetime }} | ++ + {{ s["eval_job"]["snapshot_id"] }} + + | ++ + {{ s["eval_job"]["id"] }} + + | +
+ Challenge ID: {{ challenge['id'] }}
+
Organizer ID: {{ challenge['creator'] }} (created on {{ challenge['created']|datetime }})
+
+
+ + +| User | +Time | +Job ID | +Job status | +Accuracy | +
|---|---|---|---|---|
| + + {{ s["eval_job"]["dataset"]["author"]["musicbrainz_id"] }} + + | +{{ s["eval_job"]["created"]|datetime }} | +{{ s["eval_job"]["id"] }} | +{{ render_job_status(s["eval_job"]["status"]) }} | ++ + {% if s["challenge_result"] %} + {{ (s["challenge_result"]["correct"] * 100 / s["challenge_result"]["total"]) | round(2) }}% + {% else %} + - + {% endif %} + + | +
+ Challenge ID: {{ challenge['id'] }}
+
Organizer ID: {{ challenge['creator'] }} (created on {{ challenge['created']|datetime }})
+
+
+ + +| Name | +Start time | +End time | +
|---|---|---|
| + + {{ challenge['name'] }} + + | +{{ challenge['start_time'] }} | +{{ challenge['end_time'] }} | +