From e6c1eab938f8eb3db0ed7b4514b6c37aaddffc20 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Fri, 18 May 2018 15:44:18 +0530 Subject: [PATCH 001/125] AB-334: Setup to directly access Musicbrainz Database --- config.py.example | 3 +++ docker/docker-compose.dev.yml | 11 +++++++++++ requirements.txt | 2 +- webserver/__init__.py | 4 ++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/config.py.example b/config.py.example index 2028f4833..7fe6e30b8 100644 --- a/config.py.example +++ b/config.py.example @@ -13,6 +13,9 @@ SECRET_KEY = "CHANGE_ME" # Primary database SQLALCHEMY_DATABASE_URI = "postgresql://acousticbrainz@db/acousticbrainz" +# MusicBrainz Database +MB_DATABASE_URI = "postgresql://musicbrainz:musicbrainz@musicbrainz_database:5432/musicbrainz_database" + # URI to connect to an empty database as the superuser POSTGRES_ADMIN_URI = "postgresql://postgres@db/template1" # URI to connect to the acousticbrainz database as the superuser (to install extensions) diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index 870b8e395..153426895 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -24,10 +24,21 @@ services: depends_on: - db - redis + - musicbrainz_database redis: image: redis:4.0-alpine + musicbrainz_database: + image: metabrainz/musicbrainz-test-database:schema-change-2017-q2 + volumes: + - ../data/mbdata:/var/lib/postgresql/data/pgdata + environment: + PGDATA: /var/lib/postgresql/data/pgdata + MB_IMPORT_DUMPS: "true" + ports: + - "5430:5432" + hl_extractor: build: context: .. diff --git a/requirements.txt b/requirements.txt index dd8e62957..f675183e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/metabrainz/brainzutils-python.git@v1.4.2 +git+https://github.com/rsh7/brainzutils-python.git@82e0ceab9ac19a253273e6185a7bcadc7b0f3248 click == 6.7 coverage == 4.5.1 Fabric == 1.14.0 diff --git a/webserver/__init__.py b/webserver/__init__.py index a72e973fc..ce521a520 100644 --- a/webserver/__init__.py +++ b/webserver/__init__.py @@ -77,6 +77,10 @@ def create_app(debug=None, config_path=None): from db import init_db_engine init_db_engine(app.config['SQLALCHEMY_DATABASE_URI']) + # MusicBrainz Database + from brainzutils import musicbrainz_db + musicbrainz_db.init_db_engine(app.config.get('MB_DATABASE_URI')) + # Cache if 'REDIS_HOST' in app.config and\ 'REDIS_PORT' in app.config and\ From cf9bb354e56e9ee53af1763307959cb9d96975b8 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sat, 19 May 2018 18:17:01 +0530 Subject: [PATCH 002/125] Database name as in brainzutils and update requirements.txt --- config.py.example | 2 +- docker/docker-compose.dev.yml | 4 ++-- requirements.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config.py.example b/config.py.example index 7fe6e30b8..407c584c1 100644 --- a/config.py.example +++ b/config.py.example @@ -14,7 +14,7 @@ SECRET_KEY = "CHANGE_ME" SQLALCHEMY_DATABASE_URI = "postgresql://acousticbrainz@db/acousticbrainz" # MusicBrainz Database -MB_DATABASE_URI = "postgresql://musicbrainz:musicbrainz@musicbrainz_database:5432/musicbrainz_database" +MB_DATABASE_URI = "postgresql://musicbrainz:musicbrainz@musicbrainz_db:5432/musicbrainz_db" # URI to connect to an empty database as the superuser POSTGRES_ADMIN_URI = "postgresql://postgres@db/template1" diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index 153426895..9a964d593 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -24,12 +24,12 @@ services: depends_on: - db - redis - - musicbrainz_database + - musicbrainz_db redis: image: redis:4.0-alpine - musicbrainz_database: + musicbrainz_db: image: metabrainz/musicbrainz-test-database:schema-change-2017-q2 volumes: - ../data/mbdata:/var/lib/postgresql/data/pgdata diff --git a/requirements.txt b/requirements.txt index f675183e0..450d5e0b5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/rsh7/brainzutils-python.git@82e0ceab9ac19a253273e6185a7bcadc7b0f3248 +git+https://github.com/rsh7/brainzutils-python.git@7c3366d8a432afe4b04438cfb86dc004310b70dd click == 6.7 coverage == 4.5.1 Fabric == 1.14.0 From 42e38021aaa8b1eaf448d37f42e192b73d91e682 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sun, 20 May 2018 23:28:33 +0530 Subject: [PATCH 003/125] AB-334: Update documentation for setting up MusicBrainz database --- README.md | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/README.md b/README.md index 3c91269d5..ca3447da5 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,71 @@ In order to load a psql session, use the following command: ./develop.sh run --rm db psql -U acousticbrainz -h db +### Setup the MusicBrainz Server + +MusicBrainz database containing all the MusicBrainz metadata is needed for +setting up your application. The ``mbdump.tar.bz2`` is the core MusicBrainz +archive which includes the tables for artist, release_group etc. +The ``mbdump-derived.tar.bz2`` archive contains annotations, user tags and search indexes. +These archives include all the data required for setting up an instance of +AcousticBrainz. + +You can import the database dump by downloading and importing the data in +a single command:: + + $ docker-compose -f docker/docker-compose.dev.yml run musicbrainz_db + +.. note:: + + One can also manually download the dumps and then import it:- + + i. For this, you have to download the dumps ``mbdump.tar.bz2`` and ``mbdump-derived.tar.bz2`` + from http://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport/. + + .. warning:: + + Make sure to get the latest dumps + + ii. Then the environment variable ``DUMPS_DIR`` must be set to the path of the + folders containing the dumps. This can be done by:: + + $ export DUMPS_DIR="Path of the folder containing the dumps" + + You can check that the variable ``DUMPS_DIR`` has been succesfully assigned or not by:: + + $ echo $DUMPS_DIR + + This must display the path of your folder containing the database dumps. The folder must contain at least the file ``mbdump.tar.bz2``. + + iii. Then import the database dumps by this command:: + + $ docker-compose -f docker/docker-compose.dev.yml run -v $DUMPS_DIR:/home/musicbrainz/dumps \ + -v $PWD/data/mbdata:/var/lib/postgresql/data/pgdata musicbrainz_db + +.. note:: + + You can also use the smaller sample dumps available at http://ftp.musicbrainz.org/pub/musicbrainz/data/sample/ + to set up the MusicBrainz database. However, note that these dumps are .tar.xz + dumps while AcousticBrainz currently only supports import of .tar.bz2 dumps. + So, a decompression of the sample dumps and recompression into .tar.bz2 dumps + will be needed. This can be done using the following command + + $ xzcat mbdump-sample.tar.xz | bzip2 > mbdump.tar.bz2 + +.. warning:: + + Keep in mind that this process is very time consuming, so make sure that you don't delete + the ``data/mbdata`` directory accidently. Also make sure that you have about 25GB of free + space to keep the MusicBrainz data. + +Initialization of AcousticBrainz database is also required: + +$ ./develop.sh run --rm webserver python2 manage.py init_db + +Then you can start all the services: + +$ ./develop.sh up --build + ### Manually Full installation instructions are available in [INSTALL.md](https://github.com/metabrainz/acousticbrainz-server/blob/master/INSTALL.md) file. After installing, continue the following steps. From 4b3b3d5cdaacfaf3f46419833eb886cd78902d89 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Mon, 21 May 2018 15:09:28 +0530 Subject: [PATCH 004/125] Convert documentation to markdown syntax --- README.md | 50 ++++++++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index ca3447da5..1a9fd0700 100644 --- a/README.md +++ b/README.md @@ -47,61 +47,59 @@ The ``mbdump-derived.tar.bz2`` archive contains annotations, user tags and searc These archives include all the data required for setting up an instance of AcousticBrainz. -You can import the database dump by downloading and importing the data in -a single command:: +You can import the database dumps by downloading and importing the data in +a single command: - $ docker-compose -f docker/docker-compose.dev.yml run musicbrainz_db + docker-compose -f docker/docker-compose.dev.yml run musicbrainz_db -.. note:: +**Note** - One can also manually download the dumps and then import it:- +One can also manually download the dumps and then import it:- - i. For this, you have to download the dumps ``mbdump.tar.bz2`` and ``mbdump-derived.tar.bz2`` + 1. For this, you have to download the dumps ``mbdump.tar.bz2`` and ``mbdump-derived.tar.bz2`` from http://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport/. - .. warning:: + **Warning** - Make sure to get the latest dumps + Make sure to get the latest dumps - ii. Then the environment variable ``DUMPS_DIR`` must be set to the path of the - folders containing the dumps. This can be done by:: + 2. Then the environment variable ``DUMPS_DIR`` must be set to the path of the + folders containing the dumps. This can be done by: - $ export DUMPS_DIR="Path of the folder containing the dumps" + export DUMPS_DIR="Path of the folder containing the dumps" - You can check that the variable ``DUMPS_DIR`` has been succesfully assigned or not by:: + You can check that the variable ``DUMPS_DIR`` has been succesfully assigned or not by: - $ echo $DUMPS_DIR + echo $DUMPS_DIR - This must display the path of your folder containing the database dumps. The folder must contain at least the file ``mbdump.tar.bz2``. + This must display the path of your folder containing the database dumps. The folder must contain at least the file ``mbdump.tar.bz2``. - iii. Then import the database dumps by this command:: + 3. Then import the database dumps by this command: - $ docker-compose -f docker/docker-compose.dev.yml run -v $DUMPS_DIR:/home/musicbrainz/dumps \ - -v $PWD/data/mbdata:/var/lib/postgresql/data/pgdata musicbrainz_db + docker-compose -f docker/docker-compose.dev.yml run -v $DUMPS_DIR:/home/musicbrainz/dumps \ + -v $PWD/data/mbdata:/var/lib/postgresql/data/pgdata musicbrainz_db -.. note:: +**Note** You can also use the smaller sample dumps available at http://ftp.musicbrainz.org/pub/musicbrainz/data/sample/ to set up the MusicBrainz database. However, note that these dumps are .tar.xz dumps while AcousticBrainz currently only supports import of .tar.bz2 dumps. So, a decompression of the sample dumps and recompression into .tar.bz2 dumps - will be needed. This can be done using the following command + will be needed. This can be done using the following command: - $ xzcat mbdump-sample.tar.xz | bzip2 > mbdump.tar.bz2 + xzcat mbdump-sample.tar.xz | bzip2 > mbdump.tar.bz2 -.. warning:: +**Warning** - Keep in mind that this process is very time consuming, so make sure that you don't delete - the ``data/mbdata`` directory accidently. Also make sure that you have about 25GB of free - space to keep the MusicBrainz data. +Keep in mind that this process is very time consuming, so make sure that you don't delete the ``data/mbdata`` directory accidently. Also make sure that you have about 25GB of free space to keep the MusicBrainz data. Initialization of AcousticBrainz database is also required: -$ ./develop.sh run --rm webserver python2 manage.py init_db + ./develop.sh run --rm webserver python2 manage.py init_db Then you can start all the services: -$ ./develop.sh up --build + ./develop.sh up --build ### Manually From 3a3e9c635d6c23a36182c849eea3d8ff04a80493 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Tue, 5 Jun 2018 23:28:50 +0530 Subject: [PATCH 005/125] Add recent BrainzUtils version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 450d5e0b5..e4883704f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/rsh7/brainzutils-python.git@7c3366d8a432afe4b04438cfb86dc004310b70dd +git+https://github.com/metabrainz/brainzutils-python.git@v1.5.0 click == 6.7 coverage == 4.5.1 Fabric == 1.14.0 From 6c0bfb97c4c16787d8c4cae74243857a84297117 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sat, 26 May 2018 21:31:20 +0530 Subject: [PATCH 006/125] AB-338: Create MusicBrainz schema in AB and add tables --- admin/sql/create_tables.sql | 264 ++++++++++++++++++++++++++++++++++++ 1 file changed, 264 insertions(+) diff --git a/admin/sql/create_tables.sql b/admin/sql/create_tables.sql index 0c2f926bb..5ef008c6d 100644 --- a/admin/sql/create_tables.sql +++ b/admin/sql/create_tables.sql @@ -156,3 +156,267 @@ CREATE TABLE feedback ( ); COMMIT; + + +BEGIN; + +CREATE SCHEMA musicbrainz; + +CREATE TABLE musicbrainz.artist ( + id SERIAL, + gid UUID NOT NULL, + name VARCHAR NOT NULL, + sort_name VARCHAR NOT NULL, + begin_date_year SMALLINT, + begin_date_month SMALLINT, + begin_date_day SMALLINT, + end_date_year SMALLINT, + end_date_month SMALLINT, + end_date_day SMALLINT, + type INTEGER, -- references artist_type.id + area INTEGER, -- references area.id + gender INTEGER, -- references gender.id + comment VARCHAR(255) NOT NULL DEFAULT '', + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + ended BOOLEAN NOT NULL DEFAULT FALSE + CONSTRAINT artist_ended_check CHECK ( + ( + -- If any end date fields are not null, then ended must be true + (end_date_year IS NOT NULL OR + end_date_month IS NOT NULL OR + end_date_day IS NOT NULL) AND + ended = TRUE + ) OR ( + -- Otherwise, all end date fields must be null + (end_date_year IS NULL AND + end_date_month IS NULL AND + end_date_day IS NULL) + ) + ), + begin_area INTEGER, -- references area.id + end_area INTEGER -- references area.id +); + +CREATE TABLE musicbrainz.artist_credit ( + id SERIAL, + name VARCHAR NOT NULL, + artist_count SMALLINT NOT NULL, + ref_count INTEGER DEFAULT 0, + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.artist_credit_name ( + artist_credit INTEGER NOT NULL, -- PK, references artist_credit.id CASCADE + position SMALLINT NOT NULL, -- PK + artist INTEGER NOT NULL, -- references artist.id CASCADE + name VARCHAR NOT NULL, + join_phrase TEXT NOT NULL DEFAULT '' +); + +CREATE TABLE musicbrainz.artist_gid_redirect ( + gid UUID NOT NULL, -- PK + new_id INTEGER NOT NULL, -- references artist.id + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.area ( + id SERIAL, -- PK + gid uuid NOT NULL, + name VARCHAR NOT NULL, + type INTEGER, -- references area_type.id + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >=0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + begin_date_year SMALLINT, + begin_date_month SMALLINT, + begin_date_day SMALLINT, + end_date_year SMALLINT, + end_date_month SMALLINT, + end_date_day SMALLINT, + ended BOOLEAN NOT NULL DEFAULT FALSE + CHECK ( + ( + -- If any end date fields are not null, then ended must be true + (end_date_year IS NOT NULL OR + end_date_month IS NOT NULL OR + end_date_day IS NOT NULL) AND + ended = TRUE + ) OR ( + -- Otherwise, all end date fields must be null + (end_date_year IS NULL AND + end_date_month IS NULL AND + end_date_day IS NULL) + ) + ), + comment VARCHAR(255) NOT NULL DEFAULT '' +); + +CREATE TABLE musicbrainz.recording ( + id SERIAL, + gid UUID NOT NULL, + name VARCHAR NOT NULL, + artist_credit INTEGER NOT NULL, -- references artist_credit.id + length INTEGER CHECK (length IS NULL OR length > 0), + comment VARCHAR(255) NOT NULL DEFAULT '', + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + video BOOLEAN NOT NULL DEFAULT FALSE +); + + +CREATE TABLE musicbrainz.recording_gid_redirect ( + gid UUID NOT NULL, -- PK + new_id INTEGER NOT NULL, -- references recording.id + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.release ( + id SERIAL, + gid UUID NOT NULL, + name VARCHAR NOT NULL, + artist_credit INTEGER NOT NULL, -- references artist_credit.id + release_group INTEGER NOT NULL, -- references release_group.id + status INTEGER, -- references release_status.id + packaging INTEGER, -- references release_packaging.id + language INTEGER, -- references language.id + script INTEGER, -- references script.id + barcode VARCHAR(255), + comment VARCHAR(255) NOT NULL DEFAULT '', + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + quality SMALLINT NOT NULL DEFAULT -1, + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.release_gid_redirect ( + gid UUID NOT NULL, -- PK + new_id INTEGER NOT NULL, -- references release.id + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.track ( + id SERIAL, + gid UUID NOT NULL, + recording INTEGER NOT NULL, -- references recording.id + medium INTEGER NOT NULL, -- references medium.id + position INTEGER NOT NULL, + number TEXT NOT NULL, + name VARCHAR NOT NULL, + artist_credit INTEGER NOT NULL, -- references artist_credit.id + length INTEGER CHECK (length IS NULL OR length > 0), + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + is_data_track BOOLEAN NOT NULL DEFAULT FALSE +); + +CREATE TABLE musicbrainz.track_gid_redirect ( + gid UUID NOT NULL, -- PK + new_id INTEGER NOT NULL, -- references track.id + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.release_group ( + id SERIAL, + gid UUID NOT NULL, + name VARCHAR NOT NULL, + artist_credit INTEGER NOT NULL, -- references artist_credit.id + type INTEGER, -- references release_group_primary_type.id + comment VARCHAR(255) NOT NULL DEFAULT '', + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.release_group_gid_redirect ( + gid UUID NOT NULL, -- PK + new_id INTEGER NOT NULL, -- references release_group.id + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.medium ( + id SERIAL, + release INTEGER NOT NULL, -- references release.id + position INTEGER NOT NULL, + format INTEGER, -- references medium_format.id + name VARCHAR NOT NULL DEFAULT '', + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + track_count INTEGER NOT NULL DEFAULT 0 +); + +CREATE TABLE musicbrainz.medium_format ( + id SERIAL, + name VARCHAR(100) NOT NULL, + parent INTEGER, -- references medium_format.id + child_order INTEGER NOT NULL DEFAULT 0, + year SMALLINT, + has_discids BOOLEAN NOT NULL DEFAULT FALSE, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.release_status ( + id SERIAL, + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references release_status.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.release_group_primary_type ( + id SERIAL, + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references release_group_primary_type.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.language ( + id SERIAL, + iso_code_2t CHAR(3), -- ISO 639-2 (T) + iso_code_2b CHAR(3), -- ISO 639-2 (B) + iso_code_1 CHAR(2), -- ISO 639 + name VARCHAR(100) NOT NULL, + frequency INTEGER NOT NULL DEFAULT 0, + iso_code_3 CHAR(3) -- ISO 639-3 +); +ALTER TABLE musicbrainz.language + ADD CONSTRAINT iso_code_check + CHECK (iso_code_2t IS NOT NULL OR iso_code_3 IS NOT NULL); + +CREATE TABLE musicbrainz.release_packaging ( + id SERIAL, + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references release_packaging.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.script ( + id SERIAL, + iso_code CHAR(4) NOT NULL, -- ISO 15924 + iso_number CHAR(3) NOT NULL, -- ISO 15924 + name VARCHAR(100) NOT NULL, + frequency INTEGER NOT NULL DEFAULT 0 +); + +CREATE TABLE musicbrainz.gender ( + id SERIAL, + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references gender.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.artist_type ( + id SERIAL, + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references artist_type.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +COMMIT; From d84c898309bd6409b40ee2dff77b4673e3a5705a Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sun, 27 May 2018 03:21:33 +0530 Subject: [PATCH 007/125] AB:338: Add sql commands for MB tables in db testing script and fix test errors --- admin/sql/create_tables.sql | 15 ++++++--- db/testing.py | 63 +++++++++++++++++++++++++------------ 2 files changed, 53 insertions(+), 25 deletions(-) diff --git a/admin/sql/create_tables.sql b/admin/sql/create_tables.sql index 5ef008c6d..9c4857aec 100644 --- a/admin/sql/create_tables.sql +++ b/admin/sql/create_tables.sql @@ -155,12 +155,8 @@ CREATE TABLE feedback ( suggestion TEXT ); -COMMIT; - - -BEGIN; -CREATE SCHEMA musicbrainz; +CREATE SCHEMA IF NOT EXISTS musicbrainz; CREATE TABLE musicbrainz.artist ( id SERIAL, @@ -251,6 +247,15 @@ CREATE TABLE musicbrainz.area ( comment VARCHAR(255) NOT NULL DEFAULT '' ); +CREATE TABLE musicbrainz.area_type ( + id SERIAL, -- PK + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references area_type.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + CREATE TABLE musicbrainz.recording ( id SERIAL, gid UUID NOT NULL, diff --git a/db/testing.py b/db/testing.py index b1fb3a6f4..de5a990ea 100644 --- a/db/testing.py +++ b/db/testing.py @@ -41,26 +41,49 @@ def init_db(self): def drop_tables(self): with db.engine.connect() as connection: # TODO(roman): See if there's a better way to drop all tables. - connection.execute('DROP TABLE IF EXISTS highlevel_model CASCADE;') - connection.execute('DROP TABLE IF EXISTS highlevel_meta CASCADE;') - connection.execute('DROP TABLE IF EXISTS highlevel CASCADE;') - connection.execute('DROP TABLE IF EXISTS model CASCADE;') - connection.execute('DROP TABLE IF EXISTS lowlevel_json CASCADE;') - connection.execute('DROP TABLE IF EXISTS lowlevel CASCADE;') - connection.execute('DROP TABLE IF EXISTS version CASCADE;') - connection.execute('DROP TABLE IF EXISTS statistics CASCADE;') - connection.execute('DROP TABLE IF EXISTS incremental_dumps CASCADE;') - connection.execute('DROP TABLE IF EXISTS dataset_snapshot CASCADE;') - connection.execute('DROP TABLE IF EXISTS dataset_eval_jobs CASCADE;') - connection.execute('DROP TABLE IF EXISTS dataset_class_member CASCADE;') - connection.execute('DROP TABLE IF EXISTS dataset_class CASCADE;') - connection.execute('DROP TABLE IF EXISTS dataset CASCADE;') - connection.execute('DROP TABLE IF EXISTS dataset_eval_sets CASCADE;') - connection.execute('DROP TABLE IF EXISTS "user" CASCADE;') - connection.execute('DROP TABLE IF EXISTS api_key CASCADE;') - connection.execute('DROP TABLE IF EXISTS challenge CASCADE;') - connection.execute('DROP TABLE IF EXISTS dataset_eval_challenge CASCADE;') - connection.execute('DROP TABLE IF EXISTS feedback CASCADE;') + connection.execute('DROP TABLE IF EXISTS highlevel_model CASCADE;') + connection.execute('DROP TABLE IF EXISTS highlevel_meta CASCADE;') + connection.execute('DROP TABLE IF EXISTS highlevel CASCADE;') + connection.execute('DROP TABLE IF EXISTS model CASCADE;') + connection.execute('DROP TABLE IF EXISTS lowlevel_json CASCADE;') + connection.execute('DROP TABLE IF EXISTS lowlevel CASCADE;') + connection.execute('DROP TABLE IF EXISTS version CASCADE;') + connection.execute('DROP TABLE IF EXISTS statistics CASCADE;') + connection.execute('DROP TABLE IF EXISTS incremental_dumps CASCADE;') + connection.execute('DROP TABLE IF EXISTS dataset_snapshot CASCADE;') + connection.execute('DROP TABLE IF EXISTS dataset_eval_jobs CASCADE;') + connection.execute('DROP TABLE IF EXISTS dataset_class_member CASCADE;') + connection.execute('DROP TABLE IF EXISTS dataset_class CASCADE;') + connection.execute('DROP TABLE IF EXISTS dataset CASCADE;') + connection.execute('DROP TABLE IF EXISTS dataset_eval_sets CASCADE;') + connection.execute('DROP TABLE IF EXISTS "user" CASCADE;') + connection.execute('DROP TABLE IF EXISTS api_key CASCADE;') + connection.execute('DROP TABLE IF EXISTS challenge CASCADE;') + connection.execute('DROP TABLE IF EXISTS dataset_eval_challenge CASCADE;') + connection.execute('DROP TABLE IF EXISTS feedback CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.artist CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.artist_credit CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.artist_credit_name CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.artist_gid_redirect CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.area CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.area_type CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.recording CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.recording_gid_redirect CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.release CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.release_gid_redirect CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.track CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.track_gid_redirect CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.release_group CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.release_group_gid_redirect CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.medium CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.medium_format CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.release_group_primary_type CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.release_status CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.release_packaging CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.language CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.script CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.gender CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.artist_type CASCADE;') def drop_types(self): with db.engine.connect() as connection: From 3a979cd174ec0339c4ffcf13b801f63640940951 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sun, 27 May 2018 03:23:42 +0530 Subject: [PATCH 008/125] AB-338: Add primary and foreign keys for MusicBrainz tables --- admin/sql/create_foreign_keys.sql | 173 ++++++++++++++++++++++++++++++ admin/sql/create_primary_keys.sql | 25 +++++ 2 files changed, 198 insertions(+) diff --git a/admin/sql/create_foreign_keys.sql b/admin/sql/create_foreign_keys.sql index adea39e0a..78880e8df 100644 --- a/admin/sql/create_foreign_keys.sql +++ b/admin/sql/create_foreign_keys.sql @@ -115,4 +115,177 @@ ALTER TABLE feedback FOREIGN KEY (user_id) REFERENCES "user" (id); + +ALTER TABLE musicbrainz.artist + ADD CONSTRAINT artist_fk_type + FOREIGN KEY (type) + REFERENCES musicbrainz.artist_type(id); + +ALTER TABLE musicbrainz.artist + ADD CONSTRAINT artist_fk_area + FOREIGN KEY (area) + REFERENCES musicbrainz.area(id); + +ALTER TABLE musicbrainz.artist + ADD CONSTRAINT artist_fk_gender + FOREIGN KEY (gender) + REFERENCES musicbrainz.gender(id); + +ALTER TABLE musicbrainz.artist + ADD CONSTRAINT artist_fk_begin_area + FOREIGN KEY (begin_area) + REFERENCES musicbrainz.area(id); + +ALTER TABLE musicbrainz.artist + ADD CONSTRAINT artist_fk_end_area + FOREIGN KEY (end_area) + REFERENCES musicbrainz.area(id); + +ALTER TABLE musicbrainz.artist_credit_name + ADD CONSTRAINT artist_credit_name_fk_artist_credit + FOREIGN KEY (artist_credit) + REFERENCES musicbrainz.artist_credit(id) + ON DELETE CASCADE; + +ALTER TABLE musicbrainz.artist_credit_name + ADD CONSTRAINT artist_credit_name_fk_artist + FOREIGN KEY (artist) + REFERENCES musicbrainz.artist(id) + ON DELETE CASCADE; + +ALTER TABLE musicbrainz.artist_gid_redirect + ADD CONSTRAINT artist_gid_redirect_fk_new_id + FOREIGN KEY (new_id) + REFERENCES musicbrainz.artist(id); + +ALTER TABLE musicbrainz.area + ADD CONSTRAINT area_fk_type + FOREIGN KEY (type) + REFERENCES musicbrainz.area_type(id); + +ALTER TABLE musicbrainz.area_type + ADD CONSTRAINT area_type_fk_parent + FOREIGN KEY (parent) +REFERENCES musicbrainz.area_type(id); + +ALTER TABLE musicbrainz.recording + ADD CONSTRAINT recording_fk_artist_credit + FOREIGN KEY (artist_credit) + REFERENCES musicbrainz.artist_credit(id); + +ALTER TABLE musicbrainz.recording_gid_redirect + ADD CONSTRAINT recording_gid_redirect_fk_new_id + FOREIGN KEY (new_id) + REFERENCES musicbrainz.recording(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_artist_credit + FOREIGN KEY (artist_credit) + REFERENCES musicbrainz.artist_credit(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_release_group + FOREIGN KEY (release_group) + REFERENCES musicbrainz.release_group(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_status + FOREIGN KEY (status) + REFERENCES musicbrainz.release_status(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_packaging + FOREIGN KEY (packaging) + REFERENCES musicbrainz.release_packaging(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_language + FOREIGN KEY (language) + REFERENCES musicbrainz.language(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_script + FOREIGN KEY (script) + REFERENCES musicbrainz.script(id); + +ALTER TABLE musicbrainz.release_gid_redirect + ADD CONSTRAINT release_gid_redirect_fk_new_id + FOREIGN KEY (new_id) + REFERENCES musicbrainz.release(id); + +ALTER TABLE musicbrainz.track + ADD CONSTRAINT track_fk_recording + FOREIGN KEY (recording) + REFERENCES musicbrainz.recording(id); + +ALTER TABLE musicbrainz.track + ADD CONSTRAINT track_fk_medium + FOREIGN KEY (medium) + REFERENCES musicbrainz.medium(id); + +ALTER TABLE musicbrainz.track + ADD CONSTRAINT track_fk_artist_credit + FOREIGN KEY (artist_credit) + REFERENCES musicbrainz.artist_credit(id); + +ALTER TABLE musicbrainz.track_gid_redirect + ADD CONSTRAINT track_gid_redirect_fk_new_id + FOREIGN KEY (new_id) + REFERENCES musicbrainz.track(id); + +ALTER TABLE musicbrainz.release_group + ADD CONSTRAINT release_group_fk_artist_credit + FOREIGN KEY (artist_credit) + REFERENCES musicbrainz.artist_credit(id); + +ALTER TABLE musicbrainz.release_group + ADD CONSTRAINT release_group_fk_type + FOREIGN KEY (type) + REFERENCES musicbrainz.release_group_primary_type(id); + +ALTER TABLE musicbrainz.release_group_primary_type + ADD CONSTRAINT release_group_primary_type_fk_parent + FOREIGN KEY (parent) + REFERENCES musicbrainz.release_group_primary_type; + +ALTER TABLE musicbrainz.release_group_gid_redirect + ADD CONSTRAINT release_group_gid_redirect_fk_new_id + FOREIGN KEY (new_id) + REFERENCES musicbrainz.release_group(id); + +ALTER TABLE musicbrainz.medium + ADD CONSTRAINT medium_fk_release + FOREIGN KEY (release) + REFERENCES musicbrainz.release(id); + +ALTER TABLE musicbrainz.medium + ADD CONSTRAINT medium_fk_format + FOREIGN KEY (format) + REFERENCES musicbrainz.medium_format(id); + +ALTER TABLE musicbrainz.medium_format + ADD CONSTRAINT medium_format_fk_parent + FOREIGN KEY (parent) +REFERENCES musicbrainz.medium_format(id); + +ALTER TABLE musicbrainz.release_status + ADD CONSTRAINT release_status_fk_parent + FOREIGN KEY (parent) + REFERENCES musicbrainz.release_status(id); + +ALTER TABLE musicbrainz.release_packaging + ADD CONSTRAINT release_packaging_fk_parent + FOREIGN KEY (parent) + REFERENCES musicbrainz.release_packaging(id); + +ALTER TABLE musicbrainz.gender + ADD CONSTRAINT gender_fk_parent + FOREIGN KEY (parent) + REFERENCES musicbrainz.gender(id); + +ALTER TABLE musicbrainz.artist_type + ADD CONSTRAINT artist_type_fk_parent + FOREIGN KEY (parent) + REFERENCES musicbrainz.artist_type(id); + COMMIT; diff --git a/admin/sql/create_primary_keys.sql b/admin/sql/create_primary_keys.sql index fed96ccba..e27ff3ccb 100644 --- a/admin/sql/create_primary_keys.sql +++ b/admin/sql/create_primary_keys.sql @@ -21,4 +21,29 @@ ALTER TABLE dataset_eval_challenge ADD CONSTRAINT dataset_eval_challenge_pkey PR ALTER TABLE api_key ADD CONSTRAINT api_key_pkey PRIMARY KEY (value); ALTER TABLE feedback ADD CONSTRAINT feedback_pkey PRIMARY KEY (user_id, highlevel_model_id); + +ALTER TABLE musicbrainz.artist ADD CONSTRAINT artist_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.artist_credit ADD CONSTRAINT artist_credit_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.artist_credit_name ADD CONSTRAINT artist_credit_name_pkey PRIMARY KEY (artist_credit, position); +ALTER TABLE musicbrainz.artist_gid_redirect ADD CONSTRAINT artist_gid_redirect_pkey PRIMARY KEY (gid); +ALTER TABLE musicbrainz.area ADD CONSTRAINT area_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.area_type ADD CONSTRAINT area_type_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.recording ADD CONSTRAINT recording_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.recording_gid_redirect ADD CONSTRAINT recording_gid_redirect_pkey PRIMARY KEY (gid); +ALTER TABLE musicbrainz.release ADD CONSTRAINT release_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.release_gid_redirect ADD CONSTRAINT release_gid_redirect_pkey PRIMARY KEY (gid); +ALTER TABLE musicbrainz.track ADD CONSTRAINT track_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.track_gid_redirect ADD CONSTRAINT track_gid_redirect_pkey PRIMARY KEY (gid); +ALTER TABLE musicbrainz.release_group ADD CONSTRAINT release_group_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.release_group_gid_redirect ADD CONSTRAINT release_group_gid_redirect_pkey PRIMARY KEY (gid); +ALTER TABLE musicbrainz.medium ADD CONSTRAINT medium_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.medium_format ADD CONSTRAINT medium_format_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.release_status ADD CONSTRAINT release_status_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.release_group_primary_type ADD CONSTRAINT release_group_primary_type_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.language ADD CONSTRAINT language_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.release_packaging ADD CONSTRAINT release_packaging_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.script ADD CONSTRAINT script_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.gender ADD CONSTRAINT gender_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.artist_type ADD CONSTRAINT artist_type_pkey PRIMARY KEY (id); + COMMIT; From 67ff7446da719d4c433d96fa26d468be4c4f24b9 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sun, 27 May 2018 03:29:11 +0530 Subject: [PATCH 009/125] Create indexes for columns of MB tables and add update script for schema change --- admin/sql/create_indexes.sql | 45 +- admin/updates/20180525-musicbrainz-schema.sql | 509 ++++++++++++++++++ 2 files changed, 553 insertions(+), 1 deletion(-) create mode 100644 admin/updates/20180525-musicbrainz-schema.sql diff --git a/admin/sql/create_indexes.sql b/admin/sql/create_indexes.sql index 67937583d..bd64b2ae5 100644 --- a/admin/sql/create_indexes.sql +++ b/admin/sql/create_indexes.sql @@ -23,4 +23,47 @@ CREATE INDEX highlevel_ndx_highlevel_model ON highlevel_model (highlevel); CREATE UNIQUE INDEX lower_musicbrainz_id_ndx_user ON "user" (lower(musicbrainz_id)); -COMMIT; + +CREATE UNIQUE INDEX artist_idx_gid ON musicbrainz.artist (gid); +CREATE INDEX artist_idx_name ON musicbrainz.artist (name); +CREATE INDEX artist_idx_sort_name ON musicbrainz.artist (sort_name); +CREATE INDEX artist_idx_area ON musicbrainz.artist (area); +CREATE UNIQUE INDEX artist_idx_null_comment ON musicbrainz.artist (name) WHERE comment IS NULL; +CREATE UNIQUE INDEX artist_idx_uniq_name_comment ON musicbrainz.artist (name, comment) WHERE comment IS NOT NULL; + +CREATE UNIQUE INDEX area_idx_gid ON musicbrainz.area (gid); +CREATE INDEX area_idx_name ON musicbrainz.area (name); + +CREATE INDEX artist_credit_name_idx_artist ON musicbrainz.artist_credit_name (artist); + +CREATE UNIQUE INDEX recording_idx_gid ON musicbrainz.recording (gid); +CREATE INDEX recording_idx_name ON musicbrainz.recording (name); +CREATE INDEX recording_idx_artist_credit ON musicbrainz.recording (artist_credit); + +CREATE UNIQUE INDEX release_idx_gid ON musicbrainz.release (gid); +CREATE INDEX release_idx_name ON musicbrainz.release (name); +CREATE INDEX release_idx_release_group ON musicbrainz.release (release_group); +CREATE INDEX release_idx_artist_credit ON musicbrainz.release (artist_credit); + +CREATE UNIQUE INDEX track_idx_gid ON musicbrainz.track (gid); +CREATE INDEX track_idx_recording ON musicbrainz.track (recording); +CREATE INDEX track_idx_name ON musicbrainz.track (name); +CREATE INDEX track_idx_artist_credit ON musicbrainz.track (artist_credit); + +CREATE INDEX artist_gid_redirect_idx_new_id ON musicbrainz.artist_gid_redirect (new_id); + +CREATE INDEX recording_gid_redirect_idx_new_id ON musicbrainz.recording_gid_redirect (new_id); + +CREATE INDEX release_gid_redirect_idx_new_id ON musicbrainz.release_gid_redirect (new_id); + +CREATE INDEX release_group_gid_redirect_idx_new_id ON musicbrainz.release_group_gid_redirect (new_id); + +CREATE INDEX track_gid_redirect_idx_new_id ON musicbrainz.track_gid_redirect (new_id); + +CREATE UNIQUE INDEX release_group_idx_gid ON musicbrainz.release_group (gid); +CREATE INDEX release_group_idx_name ON musicbrainz.release_group (name); +CREATE INDEX release_group_idx_artist_credit ON musicbrainz.release_group (artist_credit); + +CREATE INDEX medium_idx_track_count ON musicbrainz.medium (track_count); + +COMMIT; \ No newline at end of file diff --git a/admin/updates/20180525-musicbrainz-schema.sql b/admin/updates/20180525-musicbrainz-schema.sql new file mode 100644 index 000000000..0b48b0aaf --- /dev/null +++ b/admin/updates/20180525-musicbrainz-schema.sql @@ -0,0 +1,509 @@ +BEGIN; + +CREATE SCHEMA musicbrainz; + +CREATE TABLE musicbrainz.artist ( + id SERIAL, + gid UUID NOT NULL, + name VARCHAR NOT NULL, + sort_name VARCHAR NOT NULL, + begin_date_year SMALLINT, + begin_date_month SMALLINT, + begin_date_day SMALLINT, + end_date_year SMALLINT, + end_date_month SMALLINT, + end_date_day SMALLINT, + type INTEGER, -- references artist_type.id + area INTEGER, -- references area.id + gender INTEGER, -- references gender.id + comment VARCHAR(255) NOT NULL DEFAULT '', + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + ended BOOLEAN NOT NULL DEFAULT FALSE + CONSTRAINT artist_ended_check CHECK ( + ( + -- If any end date fields are not null, then ended must be true + (end_date_year IS NOT NULL OR + end_date_month IS NOT NULL OR + end_date_day IS NOT NULL) AND + ended = TRUE + ) OR ( + -- Otherwise, all end date fields must be null + (end_date_year IS NULL AND + end_date_month IS NULL AND + end_date_day IS NULL) + ) + ), + begin_area INTEGER, -- references area.id + end_area INTEGER -- references area.id +); + +CREATE TABLE musicbrainz.artist_credit ( + id SERIAL, + name VARCHAR NOT NULL, + artist_count SMALLINT NOT NULL, + ref_count INTEGER DEFAULT 0, + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.artist_credit_name ( + artist_credit INTEGER NOT NULL, -- PK, references artist_credit.id CASCADE + position SMALLINT NOT NULL, -- PK + artist INTEGER NOT NULL, -- references artist.id CASCADE + name VARCHAR NOT NULL, + join_phrase TEXT NOT NULL DEFAULT '' +); + +CREATE TABLE musicbrainz.artist_gid_redirect ( + gid UUID NOT NULL, -- PK + new_id INTEGER NOT NULL, -- references artist.id + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.area ( + id SERIAL, -- PK + gid uuid NOT NULL, + name VARCHAR NOT NULL, + type INTEGER, -- references area_type.id + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >=0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + begin_date_year SMALLINT, + begin_date_month SMALLINT, + begin_date_day SMALLINT, + end_date_year SMALLINT, + end_date_month SMALLINT, + end_date_day SMALLINT, + ended BOOLEAN NOT NULL DEFAULT FALSE + CHECK ( + ( + -- If any end date fields are not null, then ended must be true + (end_date_year IS NOT NULL OR + end_date_month IS NOT NULL OR + end_date_day IS NOT NULL) AND + ended = TRUE + ) OR ( + -- Otherwise, all end date fields must be null + (end_date_year IS NULL AND + end_date_month IS NULL AND + end_date_day IS NULL) + ) + ), + comment VARCHAR(255) NOT NULL DEFAULT '' +); + +CREATE TABLE musicbrainz.area_type ( + id SERIAL, -- PK + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references area_type.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.recording ( + id SERIAL, + gid UUID NOT NULL, + name VARCHAR NOT NULL, + artist_credit INTEGER NOT NULL, -- references artist_credit.id + length INTEGER CHECK (length IS NULL OR length > 0), + comment VARCHAR(255) NOT NULL DEFAULT '', + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + video BOOLEAN NOT NULL DEFAULT FALSE +); + + +CREATE TABLE musicbrainz.recording_gid_redirect ( + gid UUID NOT NULL, -- PK + new_id INTEGER NOT NULL, -- references recording.id + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.release ( + id SERIAL, + gid UUID NOT NULL, + name VARCHAR NOT NULL, + artist_credit INTEGER NOT NULL, -- references artist_credit.id + release_group INTEGER NOT NULL, -- references release_group.id + status INTEGER, -- references release_status.id + packaging INTEGER, -- references release_packaging.id + language INTEGER, -- references language.id + script INTEGER, -- references script.id + barcode VARCHAR(255), + comment VARCHAR(255) NOT NULL DEFAULT '', + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + quality SMALLINT NOT NULL DEFAULT -1, + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.release_gid_redirect ( + gid UUID NOT NULL, -- PK + new_id INTEGER NOT NULL, -- references release.id + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.track ( + id SERIAL, + gid UUID NOT NULL, + recording INTEGER NOT NULL, -- references recording.id + medium INTEGER NOT NULL, -- references medium.id + position INTEGER NOT NULL, + number TEXT NOT NULL, + name VARCHAR NOT NULL, + artist_credit INTEGER NOT NULL, -- references artist_credit.id + length INTEGER CHECK (length IS NULL OR length > 0), + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + is_data_track BOOLEAN NOT NULL DEFAULT FALSE +); + +CREATE TABLE musicbrainz.track_gid_redirect ( + gid UUID NOT NULL, -- PK + new_id INTEGER NOT NULL, -- references track.id + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.release_group ( + id SERIAL, + gid UUID NOT NULL, + name VARCHAR NOT NULL, + artist_credit INTEGER NOT NULL, -- references artist_credit.id + type INTEGER, -- references release_group_primary_type.id + comment VARCHAR(255) NOT NULL DEFAULT '', + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.release_group_gid_redirect ( + gid UUID NOT NULL, -- PK + new_id INTEGER NOT NULL, -- references release_group.id + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.medium ( + id SERIAL, + release INTEGER NOT NULL, -- references release.id + position INTEGER NOT NULL, + format INTEGER, -- references medium_format.id + name VARCHAR NOT NULL DEFAULT '', + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + track_count INTEGER NOT NULL DEFAULT 0 +); + +CREATE TABLE musicbrainz.medium_format ( + id SERIAL, + name VARCHAR(100) NOT NULL, + parent INTEGER, -- references medium_format.id + child_order INTEGER NOT NULL DEFAULT 0, + year SMALLINT, + has_discids BOOLEAN NOT NULL DEFAULT FALSE, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.release_status ( + id SERIAL, + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references release_status.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.release_group_primary_type ( + id SERIAL, + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references release_group_primary_type.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.language ( + id SERIAL, + iso_code_2t CHAR(3), -- ISO 639-2 (T) + iso_code_2b CHAR(3), -- ISO 639-2 (B) + iso_code_1 CHAR(2), -- ISO 639 + name VARCHAR(100) NOT NULL, + frequency INTEGER NOT NULL DEFAULT 0, + iso_code_3 CHAR(3) -- ISO 639-3 +); +ALTER TABLE musicbrainz.language + ADD CONSTRAINT iso_code_check + CHECK (iso_code_2t IS NOT NULL OR iso_code_3 IS NOT NULL); + +CREATE TABLE musicbrainz.release_packaging ( + id SERIAL, + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references release_packaging.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.script ( + id SERIAL, + iso_code CHAR(4) NOT NULL, -- ISO 15924 + iso_number CHAR(3) NOT NULL, -- ISO 15924 + name VARCHAR(100) NOT NULL, + frequency INTEGER NOT NULL DEFAULT 0 +); + +CREATE TABLE musicbrainz.gender ( + id SERIAL, + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references gender.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.artist_type ( + id SERIAL, + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references artist_type.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +ALTER TABLE musicbrainz.artist ADD CONSTRAINT artist_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.artist_credit ADD CONSTRAINT artist_credit_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.artist_credit_name ADD CONSTRAINT artist_credit_name_pkey PRIMARY KEY (artist_credit, position); +ALTER TABLE musicbrainz.artist_gid_redirect ADD CONSTRAINT artist_gid_redirect_pkey PRIMARY KEY (gid); +ALTER TABLE musicbrainz.area ADD CONSTRAINT area_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.area_type ADD CONSTRAINT area_type_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.recording ADD CONSTRAINT recording_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.recording_gid_redirect ADD CONSTRAINT recording_gid_redirect_pkey PRIMARY KEY (gid); +ALTER TABLE musicbrainz.release ADD CONSTRAINT release_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.release_gid_redirect ADD CONSTRAINT release_gid_redirect_pkey PRIMARY KEY (gid); +ALTER TABLE musicbrainz.track ADD CONSTRAINT track_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.track_gid_redirect ADD CONSTRAINT track_gid_redirect_pkey PRIMARY KEY (gid); +ALTER TABLE musicbrainz.release_group ADD CONSTRAINT release_group_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.release_group_gid_redirect ADD CONSTRAINT release_group_gid_redirect_pkey PRIMARY KEY (gid); +ALTER TABLE musicbrainz.medium ADD CONSTRAINT medium_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.medium_format ADD CONSTRAINT medium_format_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.release_status ADD CONSTRAINT release_status_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.release_group_primary_type ADD CONSTRAINT release_group_primary_type_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.language ADD CONSTRAINT language_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.release_packaging ADD CONSTRAINT release_packaging_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.script ADD CONSTRAINT script_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.gender ADD CONSTRAINT gender_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.artist_type ADD CONSTRAINT artist_type_pkey PRIMARY KEY (id); + +ALTER TABLE musicbrainz.artist + ADD CONSTRAINT artist_fk_type + FOREIGN KEY (type) + REFERENCES musicbrainz.artist_type(id); + +ALTER TABLE musicbrainz.artist + ADD CONSTRAINT artist_fk_area + FOREIGN KEY (area) + REFERENCES musicbrainz.area(id); + +ALTER TABLE musicbrainz.artist + ADD CONSTRAINT artist_fk_gender + FOREIGN KEY (gender) + REFERENCES musicbrainz.gender(id); + +ALTER TABLE musicbrainz.artist + ADD CONSTRAINT artist_fk_begin_area + FOREIGN KEY (begin_area) + REFERENCES musicbrainz.area(id); + +ALTER TABLE musicbrainz.artist + ADD CONSTRAINT artist_fk_end_area + FOREIGN KEY (end_area) + REFERENCES musicbrainz.area(id); + +ALTER TABLE musicbrainz.artist_credit_name + ADD CONSTRAINT artist_credit_name_fk_artist_credit + FOREIGN KEY (artist_credit) + REFERENCES musicbrainz.artist_credit(id) + ON DELETE CASCADE; + +ALTER TABLE musicbrainz.artist_credit_name + ADD CONSTRAINT artist_credit_name_fk_artist + FOREIGN KEY (artist) + REFERENCES musicbrainz.artist(id) + ON DELETE CASCADE; + +ALTER TABLE musicbrainz.artist_gid_redirect + ADD CONSTRAINT artist_gid_redirect_fk_new_id + FOREIGN KEY (new_id) + REFERENCES musicbrainz.artist(id); + +ALTER TABLE musicbrainz.area + ADD CONSTRAINT area_fk_type + FOREIGN KEY (type) + REFERENCES musicbrainz.area_type(id); + +ALTER TABLE musicbrainz.area_type + ADD CONSTRAINT area_type_fk_parent + FOREIGN KEY (parent) +REFERENCES musicbrainz.area_type(id); + +ALTER TABLE musicbrainz.recording + ADD CONSTRAINT recording_fk_artist_credit + FOREIGN KEY (artist_credit) + REFERENCES musicbrainz.artist_credit(id); + +ALTER TABLE musicbrainz.recording_gid_redirect + ADD CONSTRAINT recording_gid_redirect_fk_new_id + FOREIGN KEY (new_id) + REFERENCES musicbrainz.recording(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_artist_credit + FOREIGN KEY (artist_credit) + REFERENCES musicbrainz.artist_credit(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_release_group + FOREIGN KEY (release_group) + REFERENCES musicbrainz.release_group(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_status + FOREIGN KEY (status) + REFERENCES musicbrainz.release_status(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_packaging + FOREIGN KEY (packaging) + REFERENCES musicbrainz.release_packaging(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_language + FOREIGN KEY (language) + REFERENCES musicbrainz.language(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_script + FOREIGN KEY (script) + REFERENCES musicbrainz.script(id); + +ALTER TABLE musicbrainz.release_gid_redirect + ADD CONSTRAINT release_gid_redirect_fk_new_id + FOREIGN KEY (new_id) + REFERENCES musicbrainz.release(id); + +ALTER TABLE musicbrainz.track + ADD CONSTRAINT track_fk_recording + FOREIGN KEY (recording) + REFERENCES musicbrainz.recording(id); + +ALTER TABLE musicbrainz.track + ADD CONSTRAINT track_fk_medium + FOREIGN KEY (medium) + REFERENCES musicbrainz.medium(id); + +ALTER TABLE musicbrainz.track + ADD CONSTRAINT track_fk_artist_credit + FOREIGN KEY (artist_credit) + REFERENCES musicbrainz.artist_credit(id); + +ALTER TABLE musicbrainz.track_gid_redirect + ADD CONSTRAINT track_gid_redirect_fk_new_id + FOREIGN KEY (new_id) + REFERENCES musicbrainz.track(id); + +ALTER TABLE musicbrainz.release_group + ADD CONSTRAINT release_group_fk_artist_credit + FOREIGN KEY (artist_credit) + REFERENCES musicbrainz.artist_credit(id); + +ALTER TABLE musicbrainz.release_group + ADD CONSTRAINT release_group_fk_type + FOREIGN KEY (type) + REFERENCES musicbrainz.release_group_primary_type(id); + +ALTER TABLE musicbrainz.release_group_primary_type + ADD CONSTRAINT release_group_primary_type_fk_parent + FOREIGN KEY (parent) + REFERENCES musicbrainz.release_group_primary_type; + +ALTER TABLE musicbrainz.release_group_gid_redirect + ADD CONSTRAINT release_group_gid_redirect_fk_new_id + FOREIGN KEY (new_id) + REFERENCES musicbrainz.release_group(id); + +ALTER TABLE musicbrainz.medium + ADD CONSTRAINT medium_fk_release + FOREIGN KEY (release) + REFERENCES musicbrainz.release(id); + +ALTER TABLE musicbrainz.medium + ADD CONSTRAINT medium_fk_format + FOREIGN KEY (format) + REFERENCES musicbrainz.medium_format(id); + +ALTER TABLE musicbrainz.medium_format + ADD CONSTRAINT medium_format_fk_parent + FOREIGN KEY (parent) +REFERENCES musicbrainz.medium_format(id); + +ALTER TABLE musicbrainz.release_status + ADD CONSTRAINT release_status_fk_parent + FOREIGN KEY (parent) + REFERENCES musicbrainz.release_status(id); + +ALTER TABLE musicbrainz.release_packaging + ADD CONSTRAINT release_packaging_fk_parent + FOREIGN KEY (parent) + REFERENCES musicbrainz.release_packaging(id); + +ALTER TABLE musicbrainz.gender + ADD CONSTRAINT gender_fk_parent + FOREIGN KEY (parent) + REFERENCES musicbrainz.gender(id); + +ALTER TABLE musicbrainz.artist_type + ADD CONSTRAINT artist_type_fk_parent + FOREIGN KEY (parent) + REFERENCES musicbrainz.artist_type(id); + +CREATE UNIQUE INDEX artist_idx_gid ON musicbrainz.artist (gid); +CREATE INDEX artist_idx_name ON musicbrainz.artist (name); +CREATE INDEX artist_idx_sort_name ON musicbrainz.artist (sort_name); +CREATE INDEX artist_idx_area ON musicbrainz.artist (area); +CREATE UNIQUE INDEX artist_idx_null_comment ON musicbrainz.artist (name) WHERE comment IS NULL; +CREATE UNIQUE INDEX artist_idx_uniq_name_comment ON musicbrainz.artist (name, comment) WHERE comment IS NOT NULL; + +CREATE UNIQUE INDEX area_idx_gid ON musicbrainz.area (gid); +CREATE INDEX area_idx_name ON musicbrainz.area (name) + +CREATE INDEX artist_credit_name_idx_artist ON musicbrainz.artist_credit_name (artist); + +CREATE UNIQUE INDEX recording_idx_gid ON musicbrainz.recording (gid); +CREATE INDEX recording_idx_name ON musicbrainz.recording (name); +CREATE INDEX recording_idx_artist_credit ON musicbrainz.recording (artist_credit); + +CREATE UNIQUE INDEX release_idx_gid ON musicbrainz.release (gid); +CREATE INDEX release_idx_name ON musicbrainz.release (name); +CREATE INDEX release_idx_release_group ON musicbrainz.release (release_group); +CREATE INDEX release_idx_artist_credit ON musicbrainz.release (artist_credit); + +CREATE UNIQUE INDEX track_idx_gid ON musicbrainz.track (gid); +CREATE INDEX track_idx_recording ON musicbrainz.track (recording); +CREATE INDEX track_idx_name ON musicbrainz.track (name); +CREATE INDEX track_idx_artist_credit ON musicbrainz.track (artist_credit); + +CREATE INDEX artist_gid_redirect_idx_new_id ON musicbrainz.artist_gid_redirect (new_id); + +CREATE INDEX recording_gid_redirect_idx_new_id ON musicbrainz.recording_gid_redirect (new_id); + +CREATE INDEX release_gid_redirect_idx_new_id ON musicbrainz.release_gid_redirect (new_id); + +CREATE INDEX release_group_gid_redirect_idx_new_id ON musicbrainz.release_group_gid_redirect (new_id); + +CREATE INDEX track_gid_redirect_idx_new_id ON musicbrainz.track_gid_redirect (new_id); + +CREATE UNIQUE INDEX release_group_idx_gid ON musicbrainz.release_group (gid); +CREATE INDEX release_group_idx_name ON musicbrainz.release_group (name); +CREATE INDEX release_group_idx_artist_credit ON musicbrainz.release_group (artist_credit); + +CREATE INDEX medium_idx_track_count ON musicbrainz.medium (track_count); + +COMMIT; From fe1186aea85f2685a71bc3c91940e7b21a40d388 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sun, 27 May 2018 03:34:36 +0530 Subject: [PATCH 010/125] Add a condition to create schema command --- admin/updates/20180525-musicbrainz-schema.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/admin/updates/20180525-musicbrainz-schema.sql b/admin/updates/20180525-musicbrainz-schema.sql index 0b48b0aaf..054a6b09b 100644 --- a/admin/updates/20180525-musicbrainz-schema.sql +++ b/admin/updates/20180525-musicbrainz-schema.sql @@ -1,6 +1,6 @@ BEGIN; -CREATE SCHEMA musicbrainz; +CREATE SCHEMA IF NOT EXISTS musicbrainz; CREATE TABLE musicbrainz.artist ( id SERIAL, From ec7e8ca4920c3a620f9a05e8482ddd7717e0fbe6 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Thu, 7 Jun 2018 15:28:56 +0530 Subject: [PATCH 011/125] Add separate sql files for musicbrainz schema, table, indexes, primary & foreign keys --- admin/sql/create_foreign_keys.sql | 173 ----------- admin/sql/create_indexes.sql | 45 +-- admin/sql/create_musicbrainz_foreign_keys.sql | 118 ++++++++ admin/sql/create_musicbrainz_indexes.sql | 45 +++ admin/sql/create_musicbrainz_primary_keys.sql | 27 ++ admin/sql/create_musicbrainz_schema.sql | 7 + admin/sql/create_musicbrainz_tables.sql | 269 ++++++++++++++++++ admin/sql/create_primary_keys.sql | 25 -- admin/sql/create_tables.sql | 269 ------------------ 9 files changed, 467 insertions(+), 511 deletions(-) create mode 100644 admin/sql/create_musicbrainz_foreign_keys.sql create mode 100644 admin/sql/create_musicbrainz_indexes.sql create mode 100644 admin/sql/create_musicbrainz_primary_keys.sql create mode 100644 admin/sql/create_musicbrainz_schema.sql create mode 100644 admin/sql/create_musicbrainz_tables.sql diff --git a/admin/sql/create_foreign_keys.sql b/admin/sql/create_foreign_keys.sql index 78880e8df..adea39e0a 100644 --- a/admin/sql/create_foreign_keys.sql +++ b/admin/sql/create_foreign_keys.sql @@ -115,177 +115,4 @@ ALTER TABLE feedback FOREIGN KEY (user_id) REFERENCES "user" (id); - -ALTER TABLE musicbrainz.artist - ADD CONSTRAINT artist_fk_type - FOREIGN KEY (type) - REFERENCES musicbrainz.artist_type(id); - -ALTER TABLE musicbrainz.artist - ADD CONSTRAINT artist_fk_area - FOREIGN KEY (area) - REFERENCES musicbrainz.area(id); - -ALTER TABLE musicbrainz.artist - ADD CONSTRAINT artist_fk_gender - FOREIGN KEY (gender) - REFERENCES musicbrainz.gender(id); - -ALTER TABLE musicbrainz.artist - ADD CONSTRAINT artist_fk_begin_area - FOREIGN KEY (begin_area) - REFERENCES musicbrainz.area(id); - -ALTER TABLE musicbrainz.artist - ADD CONSTRAINT artist_fk_end_area - FOREIGN KEY (end_area) - REFERENCES musicbrainz.area(id); - -ALTER TABLE musicbrainz.artist_credit_name - ADD CONSTRAINT artist_credit_name_fk_artist_credit - FOREIGN KEY (artist_credit) - REFERENCES musicbrainz.artist_credit(id) - ON DELETE CASCADE; - -ALTER TABLE musicbrainz.artist_credit_name - ADD CONSTRAINT artist_credit_name_fk_artist - FOREIGN KEY (artist) - REFERENCES musicbrainz.artist(id) - ON DELETE CASCADE; - -ALTER TABLE musicbrainz.artist_gid_redirect - ADD CONSTRAINT artist_gid_redirect_fk_new_id - FOREIGN KEY (new_id) - REFERENCES musicbrainz.artist(id); - -ALTER TABLE musicbrainz.area - ADD CONSTRAINT area_fk_type - FOREIGN KEY (type) - REFERENCES musicbrainz.area_type(id); - -ALTER TABLE musicbrainz.area_type - ADD CONSTRAINT area_type_fk_parent - FOREIGN KEY (parent) -REFERENCES musicbrainz.area_type(id); - -ALTER TABLE musicbrainz.recording - ADD CONSTRAINT recording_fk_artist_credit - FOREIGN KEY (artist_credit) - REFERENCES musicbrainz.artist_credit(id); - -ALTER TABLE musicbrainz.recording_gid_redirect - ADD CONSTRAINT recording_gid_redirect_fk_new_id - FOREIGN KEY (new_id) - REFERENCES musicbrainz.recording(id); - -ALTER TABLE musicbrainz.release - ADD CONSTRAINT release_fk_artist_credit - FOREIGN KEY (artist_credit) - REFERENCES musicbrainz.artist_credit(id); - -ALTER TABLE musicbrainz.release - ADD CONSTRAINT release_fk_release_group - FOREIGN KEY (release_group) - REFERENCES musicbrainz.release_group(id); - -ALTER TABLE musicbrainz.release - ADD CONSTRAINT release_fk_status - FOREIGN KEY (status) - REFERENCES musicbrainz.release_status(id); - -ALTER TABLE musicbrainz.release - ADD CONSTRAINT release_fk_packaging - FOREIGN KEY (packaging) - REFERENCES musicbrainz.release_packaging(id); - -ALTER TABLE musicbrainz.release - ADD CONSTRAINT release_fk_language - FOREIGN KEY (language) - REFERENCES musicbrainz.language(id); - -ALTER TABLE musicbrainz.release - ADD CONSTRAINT release_fk_script - FOREIGN KEY (script) - REFERENCES musicbrainz.script(id); - -ALTER TABLE musicbrainz.release_gid_redirect - ADD CONSTRAINT release_gid_redirect_fk_new_id - FOREIGN KEY (new_id) - REFERENCES musicbrainz.release(id); - -ALTER TABLE musicbrainz.track - ADD CONSTRAINT track_fk_recording - FOREIGN KEY (recording) - REFERENCES musicbrainz.recording(id); - -ALTER TABLE musicbrainz.track - ADD CONSTRAINT track_fk_medium - FOREIGN KEY (medium) - REFERENCES musicbrainz.medium(id); - -ALTER TABLE musicbrainz.track - ADD CONSTRAINT track_fk_artist_credit - FOREIGN KEY (artist_credit) - REFERENCES musicbrainz.artist_credit(id); - -ALTER TABLE musicbrainz.track_gid_redirect - ADD CONSTRAINT track_gid_redirect_fk_new_id - FOREIGN KEY (new_id) - REFERENCES musicbrainz.track(id); - -ALTER TABLE musicbrainz.release_group - ADD CONSTRAINT release_group_fk_artist_credit - FOREIGN KEY (artist_credit) - REFERENCES musicbrainz.artist_credit(id); - -ALTER TABLE musicbrainz.release_group - ADD CONSTRAINT release_group_fk_type - FOREIGN KEY (type) - REFERENCES musicbrainz.release_group_primary_type(id); - -ALTER TABLE musicbrainz.release_group_primary_type - ADD CONSTRAINT release_group_primary_type_fk_parent - FOREIGN KEY (parent) - REFERENCES musicbrainz.release_group_primary_type; - -ALTER TABLE musicbrainz.release_group_gid_redirect - ADD CONSTRAINT release_group_gid_redirect_fk_new_id - FOREIGN KEY (new_id) - REFERENCES musicbrainz.release_group(id); - -ALTER TABLE musicbrainz.medium - ADD CONSTRAINT medium_fk_release - FOREIGN KEY (release) - REFERENCES musicbrainz.release(id); - -ALTER TABLE musicbrainz.medium - ADD CONSTRAINT medium_fk_format - FOREIGN KEY (format) - REFERENCES musicbrainz.medium_format(id); - -ALTER TABLE musicbrainz.medium_format - ADD CONSTRAINT medium_format_fk_parent - FOREIGN KEY (parent) -REFERENCES musicbrainz.medium_format(id); - -ALTER TABLE musicbrainz.release_status - ADD CONSTRAINT release_status_fk_parent - FOREIGN KEY (parent) - REFERENCES musicbrainz.release_status(id); - -ALTER TABLE musicbrainz.release_packaging - ADD CONSTRAINT release_packaging_fk_parent - FOREIGN KEY (parent) - REFERENCES musicbrainz.release_packaging(id); - -ALTER TABLE musicbrainz.gender - ADD CONSTRAINT gender_fk_parent - FOREIGN KEY (parent) - REFERENCES musicbrainz.gender(id); - -ALTER TABLE musicbrainz.artist_type - ADD CONSTRAINT artist_type_fk_parent - FOREIGN KEY (parent) - REFERENCES musicbrainz.artist_type(id); - COMMIT; diff --git a/admin/sql/create_indexes.sql b/admin/sql/create_indexes.sql index bd64b2ae5..67937583d 100644 --- a/admin/sql/create_indexes.sql +++ b/admin/sql/create_indexes.sql @@ -23,47 +23,4 @@ CREATE INDEX highlevel_ndx_highlevel_model ON highlevel_model (highlevel); CREATE UNIQUE INDEX lower_musicbrainz_id_ndx_user ON "user" (lower(musicbrainz_id)); - -CREATE UNIQUE INDEX artist_idx_gid ON musicbrainz.artist (gid); -CREATE INDEX artist_idx_name ON musicbrainz.artist (name); -CREATE INDEX artist_idx_sort_name ON musicbrainz.artist (sort_name); -CREATE INDEX artist_idx_area ON musicbrainz.artist (area); -CREATE UNIQUE INDEX artist_idx_null_comment ON musicbrainz.artist (name) WHERE comment IS NULL; -CREATE UNIQUE INDEX artist_idx_uniq_name_comment ON musicbrainz.artist (name, comment) WHERE comment IS NOT NULL; - -CREATE UNIQUE INDEX area_idx_gid ON musicbrainz.area (gid); -CREATE INDEX area_idx_name ON musicbrainz.area (name); - -CREATE INDEX artist_credit_name_idx_artist ON musicbrainz.artist_credit_name (artist); - -CREATE UNIQUE INDEX recording_idx_gid ON musicbrainz.recording (gid); -CREATE INDEX recording_idx_name ON musicbrainz.recording (name); -CREATE INDEX recording_idx_artist_credit ON musicbrainz.recording (artist_credit); - -CREATE UNIQUE INDEX release_idx_gid ON musicbrainz.release (gid); -CREATE INDEX release_idx_name ON musicbrainz.release (name); -CREATE INDEX release_idx_release_group ON musicbrainz.release (release_group); -CREATE INDEX release_idx_artist_credit ON musicbrainz.release (artist_credit); - -CREATE UNIQUE INDEX track_idx_gid ON musicbrainz.track (gid); -CREATE INDEX track_idx_recording ON musicbrainz.track (recording); -CREATE INDEX track_idx_name ON musicbrainz.track (name); -CREATE INDEX track_idx_artist_credit ON musicbrainz.track (artist_credit); - -CREATE INDEX artist_gid_redirect_idx_new_id ON musicbrainz.artist_gid_redirect (new_id); - -CREATE INDEX recording_gid_redirect_idx_new_id ON musicbrainz.recording_gid_redirect (new_id); - -CREATE INDEX release_gid_redirect_idx_new_id ON musicbrainz.release_gid_redirect (new_id); - -CREATE INDEX release_group_gid_redirect_idx_new_id ON musicbrainz.release_group_gid_redirect (new_id); - -CREATE INDEX track_gid_redirect_idx_new_id ON musicbrainz.track_gid_redirect (new_id); - -CREATE UNIQUE INDEX release_group_idx_gid ON musicbrainz.release_group (gid); -CREATE INDEX release_group_idx_name ON musicbrainz.release_group (name); -CREATE INDEX release_group_idx_artist_credit ON musicbrainz.release_group (artist_credit); - -CREATE INDEX medium_idx_track_count ON musicbrainz.medium (track_count); - -COMMIT; \ No newline at end of file +COMMIT; diff --git a/admin/sql/create_musicbrainz_foreign_keys.sql b/admin/sql/create_musicbrainz_foreign_keys.sql new file mode 100644 index 000000000..adea39e0a --- /dev/null +++ b/admin/sql/create_musicbrainz_foreign_keys.sql @@ -0,0 +1,118 @@ +BEGIN; + +ALTER TABLE lowlevel_json + ADD CONSTRAINT lowlevel_json_fk_lowlevel + FOREIGN KEY (id) + REFERENCES lowlevel (id); + +ALTER TABLE lowlevel_json + ADD CONSTRAINT lowlevel_json_fk_version + FOREIGN KEY (version) + REFERENCES version (id); + +ALTER TABLE highlevel + ADD CONSTRAINT highlevel_fk_lowlevel + FOREIGN KEY (id) + REFERENCES lowlevel (id); + +ALTER TABLE highlevel_meta + ADD CONSTRAINT highlevel_meta_fk_highlevel + FOREIGN KEY (id) + REFERENCES highlevel (id); + +ALTER TABLE highlevel_model + ADD CONSTRAINT highlevel_model_fk_highlevel + FOREIGN KEY (highlevel) + REFERENCES highlevel (id); + +ALTER TABLE highlevel_model + ADD CONSTRAINT highlevel_model_fk_version + FOREIGN KEY (version) + REFERENCES version (id); + +ALTER TABLE highlevel_model + ADD CONSTRAINT highlevel_model_fk_model + FOREIGN KEY (model) + REFERENCES model (id); + +ALTER TABLE dataset + ADD CONSTRAINT dataset_fk_user + FOREIGN KEY (author) + REFERENCES "user" (id) + ON UPDATE CASCADE + ON DELETE CASCADE; + +ALTER TABLE dataset_class + ADD CONSTRAINT class_fk_dataset + FOREIGN KEY (dataset) + REFERENCES dataset (id) + ON UPDATE CASCADE + ON DELETE CASCADE; + +ALTER TABLE dataset_class_member + ADD CONSTRAINT class_member_fk_class + FOREIGN KEY (class) + REFERENCES dataset_class (id) + ON UPDATE CASCADE + ON DELETE CASCADE; + +ALTER TABLE dataset_eval_jobs + ADD CONSTRAINT dataset_eval_jobs_fk_dataset_snapshot + FOREIGN KEY (snapshot_id) + REFERENCES dataset_snapshot (id); + +ALTER TABLE dataset_eval_jobs + ADD CONSTRAINT dataset_eval_jobs_fk_training_snapshot + FOREIGN KEY (training_snapshot) + REFERENCES dataset_eval_sets (id) + ON UPDATE CASCADE + ON DELETE CASCADE; + +ALTER TABLE dataset_eval_jobs + ADD CONSTRAINT dataset_eval_jobs_fk_testing_snapshot + FOREIGN KEY (testing_snapshot) + REFERENCES dataset_eval_sets (id) + ON UPDATE CASCADE + ON DELETE CASCADE; + +ALTER TABLE dataset_snapshot + ADD CONSTRAINT dataset_id_fk_dataset + FOREIGN KEY (dataset_id) + REFERENCES dataset (id); + +ALTER TABLE challenge + ADD CONSTRAINT challenge_fk_dataset_snapshot + FOREIGN KEY (validation_snapshot) + REFERENCES dataset_snapshot (id); + +ALTER TABLE challenge + ADD CONSTRAINT challenge_fk_user + FOREIGN KEY (creator) + REFERENCES "user" (id); + +ALTER TABLE dataset_eval_challenge + ADD CONSTRAINT dataset_eval_challenge_fk_dataset_eval_job + FOREIGN KEY (dataset_eval_job) + REFERENCES dataset_eval_jobs (id); + +ALTER TABLE dataset_eval_challenge + ADD CONSTRAINT dataset_eval_challenge_fk_challenge + FOREIGN KEY (challenge_id) + REFERENCES challenge (id); + +ALTER TABLE api_key + ADD CONSTRAINT api_key_fk_user + FOREIGN KEY (owner) + REFERENCES "user" (id); + +ALTER TABLE feedback + ADD CONSTRAINT feedback_fk_highlevel_model + FOREIGN KEY (highlevel_model_id) + REFERENCES highlevel_model (id); + +ALTER TABLE feedback + ADD CONSTRAINT feedback_fk_user + FOREIGN KEY (user_id) + REFERENCES "user" (id); + +COMMIT; diff --git a/admin/sql/create_musicbrainz_indexes.sql b/admin/sql/create_musicbrainz_indexes.sql new file mode 100644 index 000000000..4b90bf402 --- /dev/null +++ b/admin/sql/create_musicbrainz_indexes.sql @@ -0,0 +1,45 @@ +BEGIN; + +CREATE UNIQUE INDEX artist_idx_gid ON musicbrainz.artist (gid); +CREATE INDEX artist_idx_name ON musicbrainz.artist (name); +CREATE INDEX artist_idx_sort_name ON musicbrainz.artist (sort_name); +CREATE INDEX artist_idx_area ON musicbrainz.artist (area); +CREATE UNIQUE INDEX artist_idx_null_comment ON musicbrainz.artist (name) WHERE comment IS NULL; +CREATE UNIQUE INDEX artist_idx_uniq_name_comment ON musicbrainz.artist (name, comment) WHERE comment IS NOT NULL; + +CREATE UNIQUE INDEX area_idx_gid ON musicbrainz.area (gid); +CREATE INDEX area_idx_name ON musicbrainz.area (name); + +CREATE INDEX artist_credit_name_idx_artist ON musicbrainz.artist_credit_name (artist); + +CREATE UNIQUE INDEX recording_idx_gid ON musicbrainz.recording (gid); +CREATE INDEX recording_idx_name ON musicbrainz.recording (name); +CREATE INDEX recording_idx_artist_credit ON musicbrainz.recording (artist_credit); + +CREATE UNIQUE INDEX release_idx_gid ON musicbrainz.release (gid); +CREATE INDEX release_idx_name ON musicbrainz.release (name); +CREATE INDEX release_idx_release_group ON musicbrainz.release (release_group); +CREATE INDEX release_idx_artist_credit ON musicbrainz.release (artist_credit); + +CREATE UNIQUE INDEX track_idx_gid ON musicbrainz.track (gid); +CREATE INDEX track_idx_recording ON musicbrainz.track (recording); +CREATE INDEX track_idx_name ON musicbrainz.track (name); +CREATE INDEX track_idx_artist_credit ON musicbrainz.track (artist_credit); + +CREATE INDEX artist_gid_redirect_idx_new_id ON musicbrainz.artist_gid_redirect (new_id); + +CREATE INDEX recording_gid_redirect_idx_new_id ON musicbrainz.recording_gid_redirect (new_id); + +CREATE INDEX release_gid_redirect_idx_new_id ON musicbrainz.release_gid_redirect (new_id); + +CREATE INDEX release_group_gid_redirect_idx_new_id ON musicbrainz.release_group_gid_redirect (new_id); + +CREATE INDEX track_gid_redirect_idx_new_id ON musicbrainz.track_gid_redirect (new_id); + +CREATE UNIQUE INDEX release_group_idx_gid ON musicbrainz.release_group (gid); +CREATE INDEX release_group_idx_name ON musicbrainz.release_group (name); +CREATE INDEX release_group_idx_artist_credit ON musicbrainz.release_group (artist_credit); + +CREATE INDEX medium_idx_track_count ON musicbrainz.medium (track_count); + +COMMIT; diff --git a/admin/sql/create_musicbrainz_primary_keys.sql b/admin/sql/create_musicbrainz_primary_keys.sql new file mode 100644 index 000000000..ec595879e --- /dev/null +++ b/admin/sql/create_musicbrainz_primary_keys.sql @@ -0,0 +1,27 @@ +BEGIN; + +ALTER TABLE musicbrainz.artist ADD CONSTRAINT artist_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.artist_credit ADD CONSTRAINT artist_credit_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.artist_credit_name ADD CONSTRAINT artist_credit_name_pkey PRIMARY KEY (artist_credit, position); +ALTER TABLE musicbrainz.artist_gid_redirect ADD CONSTRAINT artist_gid_redirect_pkey PRIMARY KEY (gid); +ALTER TABLE musicbrainz.area ADD CONSTRAINT area_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.area_type ADD CONSTRAINT area_type_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.recording ADD CONSTRAINT recording_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.recording_gid_redirect ADD CONSTRAINT recording_gid_redirect_pkey PRIMARY KEY (gid); +ALTER TABLE musicbrainz.release ADD CONSTRAINT release_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.release_gid_redirect ADD CONSTRAINT release_gid_redirect_pkey PRIMARY KEY (gid); +ALTER TABLE musicbrainz.track ADD CONSTRAINT track_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.track_gid_redirect ADD CONSTRAINT track_gid_redirect_pkey PRIMARY KEY (gid); +ALTER TABLE musicbrainz.release_group ADD CONSTRAINT release_group_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.release_group_gid_redirect ADD CONSTRAINT release_group_gid_redirect_pkey PRIMARY KEY (gid); +ALTER TABLE musicbrainz.medium ADD CONSTRAINT medium_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.medium_format ADD CONSTRAINT medium_format_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.release_status ADD CONSTRAINT release_status_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.release_group_primary_type ADD CONSTRAINT release_group_primary_type_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.language ADD CONSTRAINT language_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.release_packaging ADD CONSTRAINT release_packaging_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.script ADD CONSTRAINT script_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.gender ADD CONSTRAINT gender_pkey PRIMARY KEY (id); +ALTER TABLE musicbrainz.artist_type ADD CONSTRAINT artist_type_pkey PRIMARY KEY (id); + +COMMIT; diff --git a/admin/sql/create_musicbrainz_schema.sql b/admin/sql/create_musicbrainz_schema.sql new file mode 100644 index 000000000..2bbfa59fc --- /dev/null +++ b/admin/sql/create_musicbrainz_schema.sql @@ -0,0 +1,7 @@ +-- Create the musicbrainz schema. + +BEGIN; + +CREATE SCHEMA IF NOT EXISTS musicbrainz; + +COMMIT; diff --git a/admin/sql/create_musicbrainz_tables.sql b/admin/sql/create_musicbrainz_tables.sql new file mode 100644 index 000000000..fdf187422 --- /dev/null +++ b/admin/sql/create_musicbrainz_tables.sql @@ -0,0 +1,269 @@ +BEGIN; + +CREATE TABLE musicbrainz.artist ( + id SERIAL, + gid UUID NOT NULL, + name VARCHAR NOT NULL, + sort_name VARCHAR NOT NULL, + begin_date_year SMALLINT, + begin_date_month SMALLINT, + begin_date_day SMALLINT, + end_date_year SMALLINT, + end_date_month SMALLINT, + end_date_day SMALLINT, + type INTEGER, -- references artist_type.id + area INTEGER, -- references area.id + gender INTEGER, -- references gender.id + comment VARCHAR(255) NOT NULL DEFAULT '', + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + ended BOOLEAN NOT NULL DEFAULT FALSE + CONSTRAINT artist_ended_check CHECK ( + ( + -- If any end date fields are not null, then ended must be true + (end_date_year IS NOT NULL OR + end_date_month IS NOT NULL OR + end_date_day IS NOT NULL) AND + ended = TRUE + ) OR ( + -- Otherwise, all end date fields must be null + (end_date_year IS NULL AND + end_date_month IS NULL AND + end_date_day IS NULL) + ) + ), + begin_area INTEGER, -- references area.id + end_area INTEGER -- references area.id +); + +CREATE TABLE musicbrainz.artist_credit ( + id SERIAL, + name VARCHAR NOT NULL, + artist_count SMALLINT NOT NULL, + ref_count INTEGER DEFAULT 0, + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.artist_credit_name ( + artist_credit INTEGER NOT NULL, -- PK, references artist_credit.id CASCADE + position SMALLINT NOT NULL, -- PK + artist INTEGER NOT NULL, -- references artist.id CASCADE + name VARCHAR NOT NULL, + join_phrase TEXT NOT NULL DEFAULT '' +); + +CREATE TABLE musicbrainz.artist_gid_redirect ( + gid UUID NOT NULL, -- PK + new_id INTEGER NOT NULL, -- references artist.id + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.area ( + id SERIAL, -- PK + gid uuid NOT NULL, + name VARCHAR NOT NULL, + type INTEGER, -- references area_type.id + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >=0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + begin_date_year SMALLINT, + begin_date_month SMALLINT, + begin_date_day SMALLINT, + end_date_year SMALLINT, + end_date_month SMALLINT, + end_date_day SMALLINT, + ended BOOLEAN NOT NULL DEFAULT FALSE + CHECK ( + ( + -- If any end date fields are not null, then ended must be true + (end_date_year IS NOT NULL OR + end_date_month IS NOT NULL OR + end_date_day IS NOT NULL) AND + ended = TRUE + ) OR ( + -- Otherwise, all end date fields must be null + (end_date_year IS NULL AND + end_date_month IS NULL AND + end_date_day IS NULL) + ) + ), + comment VARCHAR(255) NOT NULL DEFAULT '' +); + +CREATE TABLE musicbrainz.area_type ( + id SERIAL, -- PK + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references area_type.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.recording ( + id SERIAL, + gid UUID NOT NULL, + name VARCHAR NOT NULL, + artist_credit INTEGER NOT NULL, -- references artist_credit.id + length INTEGER CHECK (length IS NULL OR length > 0), + comment VARCHAR(255) NOT NULL DEFAULT '', + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + video BOOLEAN NOT NULL DEFAULT FALSE +); + + +CREATE TABLE musicbrainz.recording_gid_redirect ( + gid UUID NOT NULL, -- PK + new_id INTEGER NOT NULL, -- references recording.id + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.release ( + id SERIAL, + gid UUID NOT NULL, + name VARCHAR NOT NULL, + artist_credit INTEGER NOT NULL, -- references artist_credit.id + release_group INTEGER NOT NULL, -- references release_group.id + status INTEGER, -- references release_status.id + packaging INTEGER, -- references release_packaging.id + language INTEGER, -- references language.id + script INTEGER, -- references script.id + barcode VARCHAR(255), + comment VARCHAR(255) NOT NULL DEFAULT '', + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + quality SMALLINT NOT NULL DEFAULT -1, + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.release_gid_redirect ( + gid UUID NOT NULL, -- PK + new_id INTEGER NOT NULL, -- references release.id + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.track ( + id SERIAL, + gid UUID NOT NULL, + recording INTEGER NOT NULL, -- references recording.id + medium INTEGER NOT NULL, -- references medium.id + position INTEGER NOT NULL, + number TEXT NOT NULL, + name VARCHAR NOT NULL, + artist_credit INTEGER NOT NULL, -- references artist_credit.id + length INTEGER CHECK (length IS NULL OR length > 0), + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + is_data_track BOOLEAN NOT NULL DEFAULT FALSE +); + +CREATE TABLE musicbrainz.track_gid_redirect ( + gid UUID NOT NULL, -- PK + new_id INTEGER NOT NULL, -- references track.id + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.release_group ( + id SERIAL, + gid UUID NOT NULL, + name VARCHAR NOT NULL, + artist_credit INTEGER NOT NULL, -- references artist_credit.id + type INTEGER, -- references release_group_primary_type.id + comment VARCHAR(255) NOT NULL DEFAULT '', + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.release_group_gid_redirect ( + gid UUID NOT NULL, -- PK + new_id INTEGER NOT NULL, -- references release_group.id + created TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE TABLE musicbrainz.medium ( + id SERIAL, + release INTEGER NOT NULL, -- references release.id + position INTEGER NOT NULL, + format INTEGER, -- references medium_format.id + name VARCHAR NOT NULL DEFAULT '', + edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), + last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + track_count INTEGER NOT NULL DEFAULT 0 +); + +CREATE TABLE musicbrainz.medium_format ( + id SERIAL, + name VARCHAR(100) NOT NULL, + parent INTEGER, -- references medium_format.id + child_order INTEGER NOT NULL DEFAULT 0, + year SMALLINT, + has_discids BOOLEAN NOT NULL DEFAULT FALSE, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.release_status ( + id SERIAL, + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references release_status.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.release_group_primary_type ( + id SERIAL, + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references release_group_primary_type.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.language ( + id SERIAL, + iso_code_2t CHAR(3), -- ISO 639-2 (T) + iso_code_2b CHAR(3), -- ISO 639-2 (B) + iso_code_1 CHAR(2), -- ISO 639 + name VARCHAR(100) NOT NULL, + frequency INTEGER NOT NULL DEFAULT 0, + iso_code_3 CHAR(3) -- ISO 639-3 +); +ALTER TABLE musicbrainz.language + ADD CONSTRAINT iso_code_check + CHECK (iso_code_2t IS NOT NULL OR iso_code_3 IS NOT NULL); + +CREATE TABLE musicbrainz.release_packaging ( + id SERIAL, + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references release_packaging.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.script ( + id SERIAL, + iso_code CHAR(4) NOT NULL, -- ISO 15924 + iso_number CHAR(3) NOT NULL, -- ISO 15924 + name VARCHAR(100) NOT NULL, + frequency INTEGER NOT NULL DEFAULT 0 +); + +CREATE TABLE musicbrainz.gender ( + id SERIAL, + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references gender.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +CREATE TABLE musicbrainz.artist_type ( + id SERIAL, + name VARCHAR(255) NOT NULL, + parent INTEGER, -- references artist_type.id + child_order INTEGER NOT NULL DEFAULT 0, + description TEXT, + gid uuid NOT NULL +); + +COMMIT; diff --git a/admin/sql/create_primary_keys.sql b/admin/sql/create_primary_keys.sql index e27ff3ccb..fed96ccba 100644 --- a/admin/sql/create_primary_keys.sql +++ b/admin/sql/create_primary_keys.sql @@ -21,29 +21,4 @@ ALTER TABLE dataset_eval_challenge ADD CONSTRAINT dataset_eval_challenge_pkey PR ALTER TABLE api_key ADD CONSTRAINT api_key_pkey PRIMARY KEY (value); ALTER TABLE feedback ADD CONSTRAINT feedback_pkey PRIMARY KEY (user_id, highlevel_model_id); - -ALTER TABLE musicbrainz.artist ADD CONSTRAINT artist_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.artist_credit ADD CONSTRAINT artist_credit_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.artist_credit_name ADD CONSTRAINT artist_credit_name_pkey PRIMARY KEY (artist_credit, position); -ALTER TABLE musicbrainz.artist_gid_redirect ADD CONSTRAINT artist_gid_redirect_pkey PRIMARY KEY (gid); -ALTER TABLE musicbrainz.area ADD CONSTRAINT area_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.area_type ADD CONSTRAINT area_type_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.recording ADD CONSTRAINT recording_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.recording_gid_redirect ADD CONSTRAINT recording_gid_redirect_pkey PRIMARY KEY (gid); -ALTER TABLE musicbrainz.release ADD CONSTRAINT release_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.release_gid_redirect ADD CONSTRAINT release_gid_redirect_pkey PRIMARY KEY (gid); -ALTER TABLE musicbrainz.track ADD CONSTRAINT track_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.track_gid_redirect ADD CONSTRAINT track_gid_redirect_pkey PRIMARY KEY (gid); -ALTER TABLE musicbrainz.release_group ADD CONSTRAINT release_group_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.release_group_gid_redirect ADD CONSTRAINT release_group_gid_redirect_pkey PRIMARY KEY (gid); -ALTER TABLE musicbrainz.medium ADD CONSTRAINT medium_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.medium_format ADD CONSTRAINT medium_format_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.release_status ADD CONSTRAINT release_status_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.release_group_primary_type ADD CONSTRAINT release_group_primary_type_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.language ADD CONSTRAINT language_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.release_packaging ADD CONSTRAINT release_packaging_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.script ADD CONSTRAINT script_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.gender ADD CONSTRAINT gender_pkey PRIMARY KEY (id); -ALTER TABLE musicbrainz.artist_type ADD CONSTRAINT artist_type_pkey PRIMARY KEY (id); - COMMIT; diff --git a/admin/sql/create_tables.sql b/admin/sql/create_tables.sql index 9c4857aec..0c2f926bb 100644 --- a/admin/sql/create_tables.sql +++ b/admin/sql/create_tables.sql @@ -155,273 +155,4 @@ CREATE TABLE feedback ( suggestion TEXT ); - -CREATE SCHEMA IF NOT EXISTS musicbrainz; - -CREATE TABLE musicbrainz.artist ( - id SERIAL, - gid UUID NOT NULL, - name VARCHAR NOT NULL, - sort_name VARCHAR NOT NULL, - begin_date_year SMALLINT, - begin_date_month SMALLINT, - begin_date_day SMALLINT, - end_date_year SMALLINT, - end_date_month SMALLINT, - end_date_day SMALLINT, - type INTEGER, -- references artist_type.id - area INTEGER, -- references area.id - gender INTEGER, -- references gender.id - comment VARCHAR(255) NOT NULL DEFAULT '', - edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), - last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), - ended BOOLEAN NOT NULL DEFAULT FALSE - CONSTRAINT artist_ended_check CHECK ( - ( - -- If any end date fields are not null, then ended must be true - (end_date_year IS NOT NULL OR - end_date_month IS NOT NULL OR - end_date_day IS NOT NULL) AND - ended = TRUE - ) OR ( - -- Otherwise, all end date fields must be null - (end_date_year IS NULL AND - end_date_month IS NULL AND - end_date_day IS NULL) - ) - ), - begin_area INTEGER, -- references area.id - end_area INTEGER -- references area.id -); - -CREATE TABLE musicbrainz.artist_credit ( - id SERIAL, - name VARCHAR NOT NULL, - artist_count SMALLINT NOT NULL, - ref_count INTEGER DEFAULT 0, - created TIMESTAMP WITH TIME ZONE DEFAULT NOW() -); - -CREATE TABLE musicbrainz.artist_credit_name ( - artist_credit INTEGER NOT NULL, -- PK, references artist_credit.id CASCADE - position SMALLINT NOT NULL, -- PK - artist INTEGER NOT NULL, -- references artist.id CASCADE - name VARCHAR NOT NULL, - join_phrase TEXT NOT NULL DEFAULT '' -); - -CREATE TABLE musicbrainz.artist_gid_redirect ( - gid UUID NOT NULL, -- PK - new_id INTEGER NOT NULL, -- references artist.id - created TIMESTAMP WITH TIME ZONE DEFAULT NOW() -); - -CREATE TABLE musicbrainz.area ( - id SERIAL, -- PK - gid uuid NOT NULL, - name VARCHAR NOT NULL, - type INTEGER, -- references area_type.id - edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >=0), - last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), - begin_date_year SMALLINT, - begin_date_month SMALLINT, - begin_date_day SMALLINT, - end_date_year SMALLINT, - end_date_month SMALLINT, - end_date_day SMALLINT, - ended BOOLEAN NOT NULL DEFAULT FALSE - CHECK ( - ( - -- If any end date fields are not null, then ended must be true - (end_date_year IS NOT NULL OR - end_date_month IS NOT NULL OR - end_date_day IS NOT NULL) AND - ended = TRUE - ) OR ( - -- Otherwise, all end date fields must be null - (end_date_year IS NULL AND - end_date_month IS NULL AND - end_date_day IS NULL) - ) - ), - comment VARCHAR(255) NOT NULL DEFAULT '' -); - -CREATE TABLE musicbrainz.area_type ( - id SERIAL, -- PK - name VARCHAR(255) NOT NULL, - parent INTEGER, -- references area_type.id - child_order INTEGER NOT NULL DEFAULT 0, - description TEXT, - gid uuid NOT NULL -); - -CREATE TABLE musicbrainz.recording ( - id SERIAL, - gid UUID NOT NULL, - name VARCHAR NOT NULL, - artist_credit INTEGER NOT NULL, -- references artist_credit.id - length INTEGER CHECK (length IS NULL OR length > 0), - comment VARCHAR(255) NOT NULL DEFAULT '', - edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), - last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), - video BOOLEAN NOT NULL DEFAULT FALSE -); - - -CREATE TABLE musicbrainz.recording_gid_redirect ( - gid UUID NOT NULL, -- PK - new_id INTEGER NOT NULL, -- references recording.id - created TIMESTAMP WITH TIME ZONE DEFAULT NOW() -); - -CREATE TABLE musicbrainz.release ( - id SERIAL, - gid UUID NOT NULL, - name VARCHAR NOT NULL, - artist_credit INTEGER NOT NULL, -- references artist_credit.id - release_group INTEGER NOT NULL, -- references release_group.id - status INTEGER, -- references release_status.id - packaging INTEGER, -- references release_packaging.id - language INTEGER, -- references language.id - script INTEGER, -- references script.id - barcode VARCHAR(255), - comment VARCHAR(255) NOT NULL DEFAULT '', - edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), - quality SMALLINT NOT NULL DEFAULT -1, - last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW() -); - -CREATE TABLE musicbrainz.release_gid_redirect ( - gid UUID NOT NULL, -- PK - new_id INTEGER NOT NULL, -- references release.id - created TIMESTAMP WITH TIME ZONE DEFAULT NOW() -); - -CREATE TABLE musicbrainz.track ( - id SERIAL, - gid UUID NOT NULL, - recording INTEGER NOT NULL, -- references recording.id - medium INTEGER NOT NULL, -- references medium.id - position INTEGER NOT NULL, - number TEXT NOT NULL, - name VARCHAR NOT NULL, - artist_credit INTEGER NOT NULL, -- references artist_credit.id - length INTEGER CHECK (length IS NULL OR length > 0), - edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), - last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), - is_data_track BOOLEAN NOT NULL DEFAULT FALSE -); - -CREATE TABLE musicbrainz.track_gid_redirect ( - gid UUID NOT NULL, -- PK - new_id INTEGER NOT NULL, -- references track.id - created TIMESTAMP WITH TIME ZONE DEFAULT NOW() -); - -CREATE TABLE musicbrainz.release_group ( - id SERIAL, - gid UUID NOT NULL, - name VARCHAR NOT NULL, - artist_credit INTEGER NOT NULL, -- references artist_credit.id - type INTEGER, -- references release_group_primary_type.id - comment VARCHAR(255) NOT NULL DEFAULT '', - edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), - last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW() -); - -CREATE TABLE musicbrainz.release_group_gid_redirect ( - gid UUID NOT NULL, -- PK - new_id INTEGER NOT NULL, -- references release_group.id - created TIMESTAMP WITH TIME ZONE DEFAULT NOW() -); - -CREATE TABLE musicbrainz.medium ( - id SERIAL, - release INTEGER NOT NULL, -- references release.id - position INTEGER NOT NULL, - format INTEGER, -- references medium_format.id - name VARCHAR NOT NULL DEFAULT '', - edits_pending INTEGER NOT NULL DEFAULT 0 CHECK (edits_pending >= 0), - last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(), - track_count INTEGER NOT NULL DEFAULT 0 -); - -CREATE TABLE musicbrainz.medium_format ( - id SERIAL, - name VARCHAR(100) NOT NULL, - parent INTEGER, -- references medium_format.id - child_order INTEGER NOT NULL DEFAULT 0, - year SMALLINT, - has_discids BOOLEAN NOT NULL DEFAULT FALSE, - description TEXT, - gid uuid NOT NULL -); - -CREATE TABLE musicbrainz.release_status ( - id SERIAL, - name VARCHAR(255) NOT NULL, - parent INTEGER, -- references release_status.id - child_order INTEGER NOT NULL DEFAULT 0, - description TEXT, - gid uuid NOT NULL -); - -CREATE TABLE musicbrainz.release_group_primary_type ( - id SERIAL, - name VARCHAR(255) NOT NULL, - parent INTEGER, -- references release_group_primary_type.id - child_order INTEGER NOT NULL DEFAULT 0, - description TEXT, - gid uuid NOT NULL -); - -CREATE TABLE musicbrainz.language ( - id SERIAL, - iso_code_2t CHAR(3), -- ISO 639-2 (T) - iso_code_2b CHAR(3), -- ISO 639-2 (B) - iso_code_1 CHAR(2), -- ISO 639 - name VARCHAR(100) NOT NULL, - frequency INTEGER NOT NULL DEFAULT 0, - iso_code_3 CHAR(3) -- ISO 639-3 -); -ALTER TABLE musicbrainz.language - ADD CONSTRAINT iso_code_check - CHECK (iso_code_2t IS NOT NULL OR iso_code_3 IS NOT NULL); - -CREATE TABLE musicbrainz.release_packaging ( - id SERIAL, - name VARCHAR(255) NOT NULL, - parent INTEGER, -- references release_packaging.id - child_order INTEGER NOT NULL DEFAULT 0, - description TEXT, - gid uuid NOT NULL -); - -CREATE TABLE musicbrainz.script ( - id SERIAL, - iso_code CHAR(4) NOT NULL, -- ISO 15924 - iso_number CHAR(3) NOT NULL, -- ISO 15924 - name VARCHAR(100) NOT NULL, - frequency INTEGER NOT NULL DEFAULT 0 -); - -CREATE TABLE musicbrainz.gender ( - id SERIAL, - name VARCHAR(255) NOT NULL, - parent INTEGER, -- references gender.id - child_order INTEGER NOT NULL DEFAULT 0, - description TEXT, - gid uuid NOT NULL -); - -CREATE TABLE musicbrainz.artist_type ( - id SERIAL, - name VARCHAR(255) NOT NULL, - parent INTEGER, -- references artist_type.id - child_order INTEGER NOT NULL DEFAULT 0, - description TEXT, - gid uuid NOT NULL -); - COMMIT; From ae72050d2041529bbabdea1c09cd7942e92ec61c Mon Sep 17 00:00:00 2001 From: RashiSah Date: Thu, 7 Jun 2018 15:32:54 +0530 Subject: [PATCH 012/125] Add a function to initiate MB db and a command to initialize & create table structure --- README.md | 4 ++++ db/testing.py | 5 +++++ manage.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/README.md b/README.md index 1a9fd0700..579ef7f0c 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,10 @@ Then you can start all the services: ./develop.sh up --build +## Initialize the MusicBrainz database: + + ./develop.sh run --rm webserver python2 manage.py init_mb_db + ### Manually Full installation instructions are available in [INSTALL.md](https://github.com/metabrainz/acousticbrainz-server/blob/master/INSTALL.md) file. After installing, continue the following steps. diff --git a/db/testing.py b/db/testing.py index de5a990ea..27eb3549f 100644 --- a/db/testing.py +++ b/db/testing.py @@ -37,6 +37,11 @@ def init_db(self): db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_primary_keys.sql')) db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_foreign_keys.sql')) db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_indexes.sql')) + db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_schema.sql')) + db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_tables.sql')) + db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_primary_keys.sql')) + db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_foreign_keys.sql')) + db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_indexes.sql')) def drop_tables(self): with db.engine.connect() as connection: diff --git a/manage.py b/manage.py index d1ba41d80..a0708633e 100644 --- a/manage.py +++ b/manage.py @@ -15,6 +15,7 @@ import db.stats import db.user import webserver +from brainzutils import musicbrainz_db from db.testing import DatabaseTestCase ADMIN_SQL_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'admin', 'sql') @@ -95,6 +96,35 @@ def init_db(archive, force, skip_create_db=False): print("Done!") +@cli.command() +def init_mb_db(): + """Initialize the MusicBrainz database. + + This process involves several steps: + 1. MusicBrainz schema is created. + 2. MusicBrainz Table structure is created. + 3. Primary keys and foreign keys are created. + 4. Indexes are created. + """ + + musicbrainz_db.init_db_engine(current_app.config['MB_DATABASE_URI']) + + print('Creating MusicBrainz schema...') + db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_schema.sql')) + + print('Creating MusicBrainz tables...') + db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_tables.sql')) + + print('Creating MusicBrainz primary and foreign keys...') + db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_primary_keys.sql')) + db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_foreign_keys.sql')) + + print('Creating MusicBrainz indexes...') + db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_indexes.sql')) + + print("Done!") + + @cli.command() @click.argument("archive", type=click.Path(exists=True)) def import_data(archive): From dd80e11b2d874dd857d2d919919b438049df1600 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Thu, 7 Jun 2018 17:08:01 +0530 Subject: [PATCH 013/125] Correct the create musicbrainz foreign keys file --- admin/sql/create_musicbrainz_foreign_keys.sql | 277 +++++++++++------- 1 file changed, 167 insertions(+), 110 deletions(-) diff --git a/admin/sql/create_musicbrainz_foreign_keys.sql b/admin/sql/create_musicbrainz_foreign_keys.sql index adea39e0a..d5280aadc 100644 --- a/admin/sql/create_musicbrainz_foreign_keys.sql +++ b/admin/sql/create_musicbrainz_foreign_keys.sql @@ -1,118 +1,175 @@ BEGIN; -ALTER TABLE lowlevel_json - ADD CONSTRAINT lowlevel_json_fk_lowlevel - FOREIGN KEY (id) - REFERENCES lowlevel (id); - -ALTER TABLE lowlevel_json - ADD CONSTRAINT lowlevel_json_fk_version - FOREIGN KEY (version) - REFERENCES version (id); - -ALTER TABLE highlevel - ADD CONSTRAINT highlevel_fk_lowlevel - FOREIGN KEY (id) - REFERENCES lowlevel (id); - -ALTER TABLE highlevel_meta - ADD CONSTRAINT highlevel_meta_fk_highlevel - FOREIGN KEY (id) - REFERENCES highlevel (id); - -ALTER TABLE highlevel_model - ADD CONSTRAINT highlevel_model_fk_highlevel - FOREIGN KEY (highlevel) - REFERENCES highlevel (id); - -ALTER TABLE highlevel_model - ADD CONSTRAINT highlevel_model_fk_version - FOREIGN KEY (version) - REFERENCES version (id); - -ALTER TABLE highlevel_model - ADD CONSTRAINT highlevel_model_fk_model - FOREIGN KEY (model) - REFERENCES model (id); - -ALTER TABLE dataset - ADD CONSTRAINT dataset_fk_user - FOREIGN KEY (author) - REFERENCES "user" (id) - ON UPDATE CASCADE +ALTER TABLE musicbrainz.artist + ADD CONSTRAINT artist_fk_type + FOREIGN KEY (type) + REFERENCES musicbrainz.artist_type(id); + +ALTER TABLE musicbrainz.artist + ADD CONSTRAINT artist_fk_area + FOREIGN KEY (area) + REFERENCES musicbrainz.area(id); + +ALTER TABLE musicbrainz.artist + ADD CONSTRAINT artist_fk_gender + FOREIGN KEY (gender) + REFERENCES musicbrainz.gender(id); + +ALTER TABLE musicbrainz.artist + ADD CONSTRAINT artist_fk_begin_area + FOREIGN KEY (begin_area) + REFERENCES musicbrainz.area(id); + +ALTER TABLE musicbrainz.artist + ADD CONSTRAINT artist_fk_end_area + FOREIGN KEY (end_area) + REFERENCES musicbrainz.area(id); + +ALTER TABLE musicbrainz.artist_credit_name + ADD CONSTRAINT artist_credit_name_fk_artist_credit + FOREIGN KEY (artist_credit) + REFERENCES musicbrainz.artist_credit(id) ON DELETE CASCADE; -ALTER TABLE dataset_class - ADD CONSTRAINT class_fk_dataset - FOREIGN KEY (dataset) - REFERENCES dataset (id) - ON UPDATE CASCADE +ALTER TABLE musicbrainz.artist_credit_name + ADD CONSTRAINT artist_credit_name_fk_artist + FOREIGN KEY (artist) + REFERENCES musicbrainz.artist(id) ON DELETE CASCADE; -ALTER TABLE dataset_class_member - ADD CONSTRAINT class_member_fk_class - FOREIGN KEY (class) - REFERENCES dataset_class (id) - ON UPDATE CASCADE - ON DELETE CASCADE; - -ALTER TABLE dataset_eval_jobs - ADD CONSTRAINT dataset_eval_jobs_fk_dataset_snapshot - FOREIGN KEY (snapshot_id) - REFERENCES dataset_snapshot (id); - -ALTER TABLE dataset_eval_jobs - ADD CONSTRAINT dataset_eval_jobs_fk_training_snapshot - FOREIGN KEY (training_snapshot) - REFERENCES dataset_eval_sets (id) - ON UPDATE CASCADE - ON DELETE CASCADE; - -ALTER TABLE dataset_eval_jobs - ADD CONSTRAINT dataset_eval_jobs_fk_testing_snapshot - FOREIGN KEY (testing_snapshot) - REFERENCES dataset_eval_sets (id) - ON UPDATE CASCADE - ON DELETE CASCADE; - -ALTER TABLE dataset_snapshot - ADD CONSTRAINT dataset_id_fk_dataset - FOREIGN KEY (dataset_id) - REFERENCES dataset (id); - -ALTER TABLE challenge - ADD CONSTRAINT challenge_fk_dataset_snapshot - FOREIGN KEY (validation_snapshot) - REFERENCES dataset_snapshot (id); - -ALTER TABLE challenge - ADD CONSTRAINT challenge_fk_user - FOREIGN KEY (creator) - REFERENCES "user" (id); - -ALTER TABLE dataset_eval_challenge - ADD CONSTRAINT dataset_eval_challenge_fk_dataset_eval_job - FOREIGN KEY (dataset_eval_job) - REFERENCES dataset_eval_jobs (id); - -ALTER TABLE dataset_eval_challenge - ADD CONSTRAINT dataset_eval_challenge_fk_challenge - FOREIGN KEY (challenge_id) - REFERENCES challenge (id); - -ALTER TABLE api_key - ADD CONSTRAINT api_key_fk_user - FOREIGN KEY (owner) - REFERENCES "user" (id); - -ALTER TABLE feedback - ADD CONSTRAINT feedback_fk_highlevel_model - FOREIGN KEY (highlevel_model_id) - REFERENCES highlevel_model (id); - -ALTER TABLE feedback - ADD CONSTRAINT feedback_fk_user - FOREIGN KEY (user_id) - REFERENCES "user" (id); +ALTER TABLE musicbrainz.artist_gid_redirect + ADD CONSTRAINT artist_gid_redirect_fk_new_id + FOREIGN KEY (new_id) + REFERENCES musicbrainz.artist(id); + +ALTER TABLE musicbrainz.area + ADD CONSTRAINT area_fk_type + FOREIGN KEY (type) + REFERENCES musicbrainz.area_type(id); + +ALTER TABLE musicbrainz.area_type + ADD CONSTRAINT area_type_fk_parent + FOREIGN KEY (parent) +REFERENCES musicbrainz.area_type(id); + +ALTER TABLE musicbrainz.recording + ADD CONSTRAINT recording_fk_artist_credit + FOREIGN KEY (artist_credit) + REFERENCES musicbrainz.artist_credit(id); + +ALTER TABLE musicbrainz.recording_gid_redirect + ADD CONSTRAINT recording_gid_redirect_fk_new_id + FOREIGN KEY (new_id) + REFERENCES musicbrainz.recording(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_artist_credit + FOREIGN KEY (artist_credit) + REFERENCES musicbrainz.artist_credit(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_release_group + FOREIGN KEY (release_group) + REFERENCES musicbrainz.release_group(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_status + FOREIGN KEY (status) + REFERENCES musicbrainz.release_status(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_packaging + FOREIGN KEY (packaging) + REFERENCES musicbrainz.release_packaging(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_language + FOREIGN KEY (language) + REFERENCES musicbrainz.language(id); + +ALTER TABLE musicbrainz.release + ADD CONSTRAINT release_fk_script + FOREIGN KEY (script) + REFERENCES musicbrainz.script(id); + +ALTER TABLE musicbrainz.release_gid_redirect + ADD CONSTRAINT release_gid_redirect_fk_new_id + FOREIGN KEY (new_id) + REFERENCES musicbrainz.release(id); + +ALTER TABLE musicbrainz.track + ADD CONSTRAINT track_fk_recording + FOREIGN KEY (recording) + REFERENCES musicbrainz.recording(id); + +ALTER TABLE musicbrainz.track + ADD CONSTRAINT track_fk_medium + FOREIGN KEY (medium) + REFERENCES musicbrainz.medium(id); + +ALTER TABLE musicbrainz.track + ADD CONSTRAINT track_fk_artist_credit + FOREIGN KEY (artist_credit) + REFERENCES musicbrainz.artist_credit(id); + +ALTER TABLE musicbrainz.track_gid_redirect + ADD CONSTRAINT track_gid_redirect_fk_new_id + FOREIGN KEY (new_id) + REFERENCES musicbrainz.track(id); + +ALTER TABLE musicbrainz.release_group + ADD CONSTRAINT release_group_fk_artist_credit + FOREIGN KEY (artist_credit) + REFERENCES musicbrainz.artist_credit(id); + +ALTER TABLE musicbrainz.release_group + ADD CONSTRAINT release_group_fk_type + FOREIGN KEY (type) + REFERENCES musicbrainz.release_group_primary_type(id); + +ALTER TABLE musicbrainz.release_group_primary_type + ADD CONSTRAINT release_group_primary_type_fk_parent + FOREIGN KEY (parent) + REFERENCES musicbrainz.release_group_primary_type; + +ALTER TABLE musicbrainz.release_group_gid_redirect + ADD CONSTRAINT release_group_gid_redirect_fk_new_id + FOREIGN KEY (new_id) + REFERENCES musicbrainz.release_group(id); + +ALTER TABLE musicbrainz.medium + ADD CONSTRAINT medium_fk_release + FOREIGN KEY (release) + REFERENCES musicbrainz.release(id); + +ALTER TABLE musicbrainz.medium + ADD CONSTRAINT medium_fk_format + FOREIGN KEY (format) + REFERENCES musicbrainz.medium_format(id); + +ALTER TABLE musicbrainz.medium_format + ADD CONSTRAINT medium_format_fk_parent + FOREIGN KEY (parent) +REFERENCES musicbrainz.medium_format(id); + +ALTER TABLE musicbrainz.release_status + ADD CONSTRAINT release_status_fk_parent + FOREIGN KEY (parent) + REFERENCES musicbrainz.release_status(id); + +ALTER TABLE musicbrainz.release_packaging + ADD CONSTRAINT release_packaging_fk_parent + FOREIGN KEY (parent) + REFERENCES musicbrainz.release_packaging(id); + +ALTER TABLE musicbrainz.gender + ADD CONSTRAINT gender_fk_parent + FOREIGN KEY (parent) + REFERENCES musicbrainz.gender(id); + +ALTER TABLE musicbrainz.artist_type + ADD CONSTRAINT artist_type_fk_parent + FOREIGN KEY (parent) + REFERENCES musicbrainz.artist_type(id); COMMIT; From 98ef544f0ae69823875ef67747d05a79c5665aea Mon Sep 17 00:00:00 2001 From: RashiSah Date: Wed, 13 Jun 2018 19:33:37 +0530 Subject: [PATCH 014/125] Add more indexes to musicbrainz indexes file and add an update file --- admin/sql/create_musicbrainz_indexes.sql | 22 ++++++++++++++++ .../20180613-update-musicbrainz-indexes.sql | 25 +++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 admin/updates/20180613-update-musicbrainz-indexes.sql diff --git a/admin/sql/create_musicbrainz_indexes.sql b/admin/sql/create_musicbrainz_indexes.sql index 4b90bf402..8b5707655 100644 --- a/admin/sql/create_musicbrainz_indexes.sql +++ b/admin/sql/create_musicbrainz_indexes.sql @@ -4,9 +4,14 @@ CREATE UNIQUE INDEX artist_idx_gid ON musicbrainz.artist (gid); CREATE INDEX artist_idx_name ON musicbrainz.artist (name); CREATE INDEX artist_idx_sort_name ON musicbrainz.artist (sort_name); CREATE INDEX artist_idx_area ON musicbrainz.artist (area); +CREATE INDEX artist_idx_begin_area ON musicbrainz.artist (begin_area); +CREATE INDEX artist_idx_end_area ON musicbrainz.artist (end_area); + CREATE UNIQUE INDEX artist_idx_null_comment ON musicbrainz.artist (name) WHERE comment IS NULL; CREATE UNIQUE INDEX artist_idx_uniq_name_comment ON musicbrainz.artist (name, comment) WHERE comment IS NOT NULL; +CREATE UNIQUE INDEX area_type_idx_gid ON musicbrainz.area_type (gid); + CREATE UNIQUE INDEX area_idx_gid ON musicbrainz.area (gid); CREATE INDEX area_idx_name ON musicbrainz.area (name); @@ -42,4 +47,21 @@ CREATE INDEX release_group_idx_artist_credit ON musicbrainz.release_group (artis CREATE INDEX medium_idx_track_count ON musicbrainz.medium (track_count); +CREATE UNIQUE INDEX medium_format_idx_gid ON musicbrainz.medium_format (gid); + +CREATE UNIQUE INDEX release_status_idx_gid ON musicbrainz.release_status (gid); + +CREATE UNIQUE INDEX language_idx_iso_code_2b ON musicbrainz.language (iso_code_2b); +CREATE UNIQUE INDEX language_idx_iso_code_2t ON musicbrainz.language (iso_code_2t); +CREATE UNIQUE INDEX language_idx_iso_code_1 ON musicbrainz.language (iso_code_1); +CREATE UNIQUE INDEX language_idx_iso_code_3 ON musicbrainz.language (iso_code_3); + +CREATE UNIQUE INDEX release_packaging_idx_gid ON musicbrainz.release_packaging (gid); + +CREATE UNIQUE INDEX script_idx_iso_code ON musicbrainz.script (iso_code); + +CREATE UNIQUE INDEX gender_idx_gid ON musicbrainz.gender (gid); + +CREATE UNIQUE INDEX artist_type_idx_gid ON musicbrainz.artist_type (gid); + COMMIT; diff --git a/admin/updates/20180613-update-musicbrainz-indexes.sql b/admin/updates/20180613-update-musicbrainz-indexes.sql new file mode 100644 index 000000000..620771335 --- /dev/null +++ b/admin/updates/20180613-update-musicbrainz-indexes.sql @@ -0,0 +1,25 @@ +BEGIN; + +CREATE INDEX artist_idx_begin_area ON musicbrainz.artist (begin_area); +CREATE INDEX artist_idx_end_area ON musicbrainz.artist (end_area); + +CREATE UNIQUE INDEX area_type_idx_gid ON musicbrainz.area_type (gid); + +CREATE UNIQUE INDEX medium_format_idx_gid ON musicbrainz.medium_format (gid); + +CREATE UNIQUE INDEX release_status_idx_gid ON musicbrainz.release_status (gid); + +CREATE UNIQUE INDEX language_idx_iso_code_2b ON musicbrainz.language (iso_code_2b); +CREATE UNIQUE INDEX language_idx_iso_code_2t ON musicbrainz.language (iso_code_2t); +CREATE UNIQUE INDEX language_idx_iso_code_1 ON musicbrainz.language (iso_code_1); +CREATE UNIQUE INDEX language_idx_iso_code_3 ON musicbrainz.language (iso_code_3); + +CREATE UNIQUE INDEX release_packaging_idx_gid ON musicbrainz.release_packaging (gid); + +CREATE UNIQUE INDEX script_idx_iso_code ON musicbrainz.script (iso_code); + +CREATE UNIQUE INDEX gender_idx_gid ON musicbrainz.gender (gid); + +CREATE UNIQUE INDEX artist_type_idx_gid ON musicbrainz.artist_type (gid); + +COMMIT; From 8d9b43ac438869124cf6e773dd9d7e7e4c5ad148 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Wed, 13 Jun 2018 22:51:20 +0530 Subject: [PATCH 015/125] Update the musicbrainz schema update sql file and delete the separate one --- admin/updates/20180525-musicbrainz-schema.sql | 22 ++++++++++++++++ .../20180613-update-musicbrainz-indexes.sql | 25 ------------------- 2 files changed, 22 insertions(+), 25 deletions(-) delete mode 100644 admin/updates/20180613-update-musicbrainz-indexes.sql diff --git a/admin/updates/20180525-musicbrainz-schema.sql b/admin/updates/20180525-musicbrainz-schema.sql index 054a6b09b..cc1bb328a 100644 --- a/admin/updates/20180525-musicbrainz-schema.sql +++ b/admin/updates/20180525-musicbrainz-schema.sql @@ -506,4 +506,26 @@ CREATE INDEX release_group_idx_artist_credit ON musicbrainz.release_group (artis CREATE INDEX medium_idx_track_count ON musicbrainz.medium (track_count); +CREATE INDEX artist_idx_begin_area ON musicbrainz.artist (begin_area); +CREATE INDEX artist_idx_end_area ON musicbrainz.artist (end_area); + +CREATE UNIQUE INDEX area_type_idx_gid ON musicbrainz.area_type (gid); + +CREATE UNIQUE INDEX medium_format_idx_gid ON musicbrainz.medium_format (gid); + +CREATE UNIQUE INDEX release_status_idx_gid ON musicbrainz.release_status (gid); + +CREATE UNIQUE INDEX language_idx_iso_code_2b ON musicbrainz.language (iso_code_2b); +CREATE UNIQUE INDEX language_idx_iso_code_2t ON musicbrainz.language (iso_code_2t); +CREATE UNIQUE INDEX language_idx_iso_code_1 ON musicbrainz.language (iso_code_1); +CREATE UNIQUE INDEX language_idx_iso_code_3 ON musicbrainz.language (iso_code_3); + +CREATE UNIQUE INDEX release_packaging_idx_gid ON musicbrainz.release_packaging (gid); + +CREATE UNIQUE INDEX script_idx_iso_code ON musicbrainz.script (iso_code); + +CREATE UNIQUE INDEX gender_idx_gid ON musicbrainz.gender (gid); + +CREATE UNIQUE INDEX artist_type_idx_gid ON musicbrainz.artist_type (gid); + COMMIT; diff --git a/admin/updates/20180613-update-musicbrainz-indexes.sql b/admin/updates/20180613-update-musicbrainz-indexes.sql deleted file mode 100644 index 620771335..000000000 --- a/admin/updates/20180613-update-musicbrainz-indexes.sql +++ /dev/null @@ -1,25 +0,0 @@ -BEGIN; - -CREATE INDEX artist_idx_begin_area ON musicbrainz.artist (begin_area); -CREATE INDEX artist_idx_end_area ON musicbrainz.artist (end_area); - -CREATE UNIQUE INDEX area_type_idx_gid ON musicbrainz.area_type (gid); - -CREATE UNIQUE INDEX medium_format_idx_gid ON musicbrainz.medium_format (gid); - -CREATE UNIQUE INDEX release_status_idx_gid ON musicbrainz.release_status (gid); - -CREATE UNIQUE INDEX language_idx_iso_code_2b ON musicbrainz.language (iso_code_2b); -CREATE UNIQUE INDEX language_idx_iso_code_2t ON musicbrainz.language (iso_code_2t); -CREATE UNIQUE INDEX language_idx_iso_code_1 ON musicbrainz.language (iso_code_1); -CREATE UNIQUE INDEX language_idx_iso_code_3 ON musicbrainz.language (iso_code_3); - -CREATE UNIQUE INDEX release_packaging_idx_gid ON musicbrainz.release_packaging (gid); - -CREATE UNIQUE INDEX script_idx_iso_code ON musicbrainz.script (iso_code); - -CREATE UNIQUE INDEX gender_idx_gid ON musicbrainz.gender (gid); - -CREATE UNIQUE INDEX artist_type_idx_gid ON musicbrainz.artist_type (gid); - -COMMIT; From dc1c999cf09413b4fedb29c52bd7de6b1b71356e Mon Sep 17 00:00:00 2001 From: RashiSah Date: Tue, 5 Jun 2018 18:35:21 +0530 Subject: [PATCH 016/125] AB-340: Import MusicBrainz data into separate schema tables in AB database --- db/import_mb_data.py | 1130 ++++++++++++++++++++++++++++++++++++++++++ manage.py | 11 + 2 files changed, 1141 insertions(+) create mode 100644 db/import_mb_data.py diff --git a/db/import_mb_data.py b/db/import_mb_data.py new file mode 100644 index 000000000..47e7fe58a --- /dev/null +++ b/db/import_mb_data.py @@ -0,0 +1,1130 @@ +import db +from brainzutils import musicbrainz_db +from sqlalchemy import text +from sqlalchemy.exc import IntegrityError + +def start_import(): + with db.engine.begin() as conn: + lowlevel_query = text("""SELECT gid from lowlevel""") + gids = conn.execute(lowlevel_query) + gids_in_AB = gids.fetchall() + for recording_gid in gids_in_AB: + MB_artist_credit_data, MB_recording_data, MB_artist_data, MB_artist_type_data, MB_area_data, \ + MB_script_data, MB_release_data, MB_release_group_primary_type_data, MB_medium_data, \ + MB_track_data, MB_gender_data, MB_language_data, MB_medium_format_data, MB_release_group_data, \ + MB_release_status_data, MB_artist_gid_redirect_data, MB_recording_gid_redirect_data, \ + MB_release_group_gid_redirect_data, MB_release_gid_redirect_data, MB_artist_credit_name_data, \ + MB_area_type_data, MB_release_packaging_data = (0,)*22 + + # FROM MUSICBRAINZ + with musicbrainz_db.engine.begin() as connection: + # ARTIST CREDIT + try: + artist_credit_query = text("""SELECT artist_credit.id, artist_credit.name, artist_credit.artist_count, + artist_credit.ref_count, artist_credit.created + FROM artist_credit + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid= :recording_gid + """) + result = connection.execute(artist_credit_query, {"recording_gid" : recording_gid[0]}) + MB_artist_credit_data = result.fetchall() + except ValueError: + pass + + try: + artist_query = text(""" + SELECT artist.id, artist.gid, artist.name, artist.sort_name, artist.begin_date_year, + artist.begin_date_month, artist.begin_date_day, artist.end_date_year, artist.end_date_month, + artist.end_date_day, artist.type, artist.area, artist.gender, artist.comment, artist.edits_pending, + artist.last_updated, artist.ended, artist.begin_area, artist.end_area + FROM artist + INNER JOIN artist_credit + ON artist_credit.id = artist.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(artist_query, {"recording_gid": recording_gid[0]}) + MB_artist_data = result.fetchall() + except ValueError: + pass + + # ARTIST TYPE + try: + artist_type_query = text("""SELECT artist_type.id, + artist_type.name, + artist_type.parent, + artist_type.child_order, + artist_type.description, + artist_type.gid + FROM artist_type + INNER JOIN artist + ON artist.type = artist_type.id + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(artist_type_query, {"recording_gid": recording_gid[0]}) + MB_artist_type_data = result.fetchall() + except ValueError: + pass + + # RECORDING + try: + recording_query = text("""SELECT recording.id, recording.gid, recording.name, recording.artist_credit, + recording.length, recording.comment, recording.edits_pending, recording.last_updated, + recording.video + FROM recording + WHERE recording.gid = :recording_gid + """) + result = connection.execute(recording_query, {"recording_gid": recording_gid[0]}) + MB_recording_data = result.fetchall() + except ValueError: + pass + + # AREA + try: + area_query = text(""" + SELECT area.id, + area.gid, + area.name, + area.type, + area.edits_pending, + area.last_updated, + area.begin_date_year, + area.begin_date_month, + area.begin_date_day, + area.end_date_year, + area.end_date_month, + area.end_date_day, + area.ended, + area.comment + FROM area + INNER JOIN artist + ON area.id = artist.area + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(area_query, {"recording_gid": recording_gid[0]}) + MB_area_data = result.fetchall() + except ValueError: + pass + + # BEGIN AREA + try: + begin_area_query = text(""" + SELECT area.id, + area.gid, + area.name, + area.type, + area.edits_pending, + area.last_updated, + area.begin_date_year, + area.begin_date_month, + area.begin_date_day, + area.end_date_year, + area.end_date_month, + area.end_date_day, + area.ended, + area.comment + FROM area + INNER JOIN artist + ON area.id = artist.begin_area + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(begin_area_query, {"recording_gid": recording_gid[0]}) + MB_begin_area_data = result.fetchall() + except ValueError: + pass + + # END AREA + try: + end_area_query = text(""" + SELECT area.id, + area.gid, + area.name, + area.type, + area.edits_pending, + area.last_updated, + area.begin_date_year, + area.begin_date_month, + area.begin_date_day, + area.end_date_year, + area.end_date_month, + area.end_date_day, + area.ended, + area.comment + FROM area + INNER JOIN artist + ON area.id = artist.end_area + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(end_area_query, {"recording_gid": recording_gid[0]}) + MB_end_area_data = result.fetchall() + except ValueError: + pass + + # AREA TYPE + try: + area_type_query = text("""SELECT area_type.id, + area_type.name, + area_type.parent, + area_type.child_order, + area_type.description, + area_type.gid + FROM area_type + INNER JOIN area + ON area.type = area_type.id + INNER JOIN artist + ON area.id = artist.area + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(area_type_query, {"recording_gid": recording_gid[0]}) + MB_area_type_data = result.fetchall() + except ValueError: + pass + + # BEGIN AREA TYPE + try: + begin_area_type_query = text("""SELECT area_type.id, + area_type.name, + area_type.parent, + area_type.child_order, + area_type.description, + area_type.gid + FROM area_type + INNER JOIN area + ON area.type = area_type.id + INNER JOIN artist + ON area.id = artist.begin_area + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(begin_area_type_query, {"recording_gid": recording_gid[0]}) + MB_begin_area_type_data = result.fetchall() + except ValueError: + pass + + # END AREA TYPE + try: + end_area_type_query = text("""SELECT area_type.id, + area_type.name, + area_type.parent, + area_type.child_order, + area_type.description, + area_type.gid + FROM area_type + INNER JOIN area + ON area.type = area_type.id + INNER JOIN artist + ON area.id = artist.end_area + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(end_area_type_query, {"recording_gid": recording_gid[0]}) + MB_end_area_type_data = result.fetchall() + except ValueError: + pass + + # ARTIST CREDIT NAME + try: + artist_credit_name_query = text("""SELECT artist_credit_name.artist_credit, + artist_credit_name.position, + artist_credit_name.artist, + artist_credit_name.name, + artist_credit_name.join_phrase + FROM artist_credit_name + INNER JOIN artist_credit + ON artist_credit_name.artist_credit = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(artist_credit_name_query, {"recording_gid": recording_gid[0]}) + MB_artist_credit_name_data = result.fetchall() + except ValueError: + pass + + # ARTIST GID REDIRECT + try: + artist_gid_redirect_query = text("""SELECT artist_gid_redirect.gid, + artist_gid_redirect.new_id, + artist_gid_redirect.created + FROM artist_gid_redirect + INNER JOIN artist + ON artist.id = artist_gid_redirect.new_id + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(artist_gid_redirect_query, {"recording_gid": recording_gid[0]}) + MB_artist_gid_redirect_data = result.fetchall() + except ValueError: + pass + + + # GENDER + try: + gender_query = text("""SELECT gender.id, + gender.name, + gender.parent, + gender.child_order, + gender.description, + gender.gid + FROM gender + INNER JOIN artist + ON artist.gender = gender.id + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(gender_query, {"recording_gid": recording_gid[0]}) + MB_gender_data = result.fetchall() + except ValueError: + pass + + # RELEASE + try: + release_query = text("""SELECT release.id, + release.gid, + release.name, + release.artist_credit, + release.release_group, + release.status, + release.packaging, + release.language, + release.script, + release.barcode, + release.comment, + release.edits_pending, + release.quality, + release.last_updated + FROM release + INNER JOIN recording + ON recording.artist_credit = release.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(release_query, {"recording_gid": recording_gid[0]}) + MB_release_data = result.fetchall() + except ValueError: + pass + + # LANGUAGE + try: + language_query = text("""SELECT language.id, + language.iso_code_2t, + language.iso_code_2b, + language.iso_code_1, + language.name, + language.frequency, + language.iso_code_3 + FROM language + INNER JOIN release + ON release.language = language.id + INNER JOIN recording + ON recording.artist_credit=release.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(language_query, {"recording_gid": recording_gid[0]}) + MB_language_data = result.fetchall() + except ValueError: + pass + + # MEDIUM + try: + medium_query = text("""SELECT medium.id, + medium.release, + medium.position, + medium.format, + medium.name, + medium.edits_pending, + medium.last_updated, + medium.track_count + FROM medium + INNER JOIN release + ON release.id = medium.release + INNER JOIN recording + ON recording.artist_credit=release.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(medium_query, {"recording_gid": recording_gid[0]}) + MB_medium_data = result.fetchall() + except ValueError: + pass + + # MEDIUM FORMAT + try: + medium_format_query = text("""SELECT medium_format.id, + medium_format.name, + medium_format.parent, + medium_format.child_order, + medium_format.year, + medium_format.has_discids, + medium_format.description, + medium_format.gid + FROM medium_format + INNER JOIN medium + ON medium_format.id = medium.format + INNER JOIN release + ON release.id = medium.release + INNER JOIN recording + ON recording.artist_credit = release.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(medium_format_query, {"recording_gid": recording_gid[0]}) + MB_medium_format_data = result.fetchall() + except ValueError: + pass + + # RECORDING GID REDIRECT + try: + recording_gid_redirect_query = text("""SELECT recording_gid_redirect.gid, + recording_gid_redirect.new_id, + recording_gid_redirect.created + FROM recording_gid_redirect + INNER JOIN recording + ON recording.id = recording_gid_redirect.new_id + WHERE recording.gid = :recording_gid + """) + result = connection.execute(recording_gid_redirect_query, {"recording_gid": recording_gid[0]}) + MB_recording_gid_redirect_data = result.fetchall() + except ValueError: + pass + + # release_gid_redirect + try: + release_gid_redirect_query = text("""SELECT release_gid_redirect.gid, + release_gid_redirect.new_id, + release_gid_redirect.created + FROM release_gid_redirect + INNER JOIN release + ON release.id = release_gid_redirect.new_id + INNER JOIN recording + ON recording.artist_credit = release.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(release_gid_redirect_query, {"recording_gid": recording_gid[0]}) + MB_release_gid_redirect_data = result.fetchall() + except ValueError: + pass + + # release_group + try: + release_group_query = text("""SELECT release_group.id, + release_group.gid, + release_group.name, + release_group.artist_credit, + release_group.type, + release_group.comment, + release_group.edits_pending, + release_group.last_updated + FROM release_group + INNER JOIN recording + ON recording.artist_credit = release_group.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(release_group_query, {"recording_gid": recording_gid[0]}) + MB_release_group_data = result.fetchall() + except ValueError: + pass + + # release_group gid redirect + try: + release_group_gid_redirect_query = text("""SELECT release_group_gid_redirect.gid, + release_group_gid_redirect.new_id, + release_group_gid_redirect.created + FROM release_group_gid_redirect + INNER JOIN release_group + ON release_group.id = release_group_gid_redirect.new_id + INNER JOIN recording + ON recording.artist_credit = release_group.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(release_group_gid_redirect_query, {"recording_gid": recording_gid[0]}) + MB_release_group_gid_redirect_data = result.fetchall() + except ValueError: + pass + + # release_group_primary_type + try: + release_group_primary_type_query = text("""SELECT release_group_primary_type.id, release_group_primary_type.name, + release_group_primary_type.parent, release_group_primary_type.child_order, + release_group_primary_type.description, release_group_primary_type.gid + FROM release_group_primary_type INNER JOIN release_group + ON release_group_primary_type.id = release_group.type + INNER JOIN recording + ON recording.artist_credit = release_group.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(release_group_primary_type_query, {"recording_gid": recording_gid[0]}) + MB_release_group_primary_type_data = result.fetchall() + except ValueError: + pass + + # release_packaging + try: + release_packaging_query = text("""SELECT release_packaging.id, + release_packaging.name, + release_packaging.parent, + release_packaging.child_order, + release_packaging.description, + release_packaging.gid + FROM release_packaging + INNER JOIN release + ON release.packaging = release_packaging.id + INNER JOIN recording + ON recording.artist_credit = release.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(release_packaging_query, {"recording_gid": recording_gid[0]}) + MB_release_packaging_data = result.fetchall() + except ValueError: + pass + + # release_status + try: + release_status_query = text("""SELECT release_status.id, + release_status.name, + release_status.parent, + release_status.child_order, + release_status.description, + release_status.gid + FROM release_status + INNER JOIN release + ON release.status = release_status.id + INNER JOIN recording + ON recording.artist_credit = release.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(release_status_query, {"recording_gid": recording_gid[0]}) + MB_release_status_data = result.fetchall() + except ValueError: + pass + + # script + try: + script_query = text("""SELECT script.id, + script.iso_code, + script.iso_number, + script.name, + script.frequency + FROM script + INNER JOIN release + ON release.script = script.id + INNER JOIN recording + ON recording.artist_credit = release.artist_credit + WHERE recording.gid = :recording_gid + """) + result = connection.execute(script_query, {"recording_gid": recording_gid[0]}) + MB_script_data = result.fetchall() + except ValueError: + pass + + # track + try: + track_query = text("""SELECT track.id, + track.gid, + track.recording, + track.medium, + track.position, + track.number, + track.name, + track.artist_credit, + track.length, + track.edits_pending, + track.last_updated, + track.is_data_track + FROM track + INNER JOIN recording + ON track.recording = recording.id + WHERE recording.gid = :recording_gid + """) + result = connection.execute(track_query, {"recording_gid": recording_gid[0]}) + MB_track_data = result.fetchall() + except ValueError: + pass + + + # TO ACOUSTICBRAINZ + with db.engine.connect() as connection: + if MB_artist_credit_data: + for value in MB_artist_credit_data: + transaction = connection.begin() + try: + artist_credit_query = text(""" + INSERT INTO musicbrainz.artist_credit + VALUES (:id, :name, :artist_count, :ref_count, :created)""") + connection.execute(artist_credit_query, {"id" : value[0], + "name" : value[1], + "artist_count" : value[2], + "ref_count" : value[3], + "created" : value[4] + }) + transaction.commit() + print("INSERTED artist_credit data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_artist_type_data: + for value in MB_artist_type_data: + transaction = connection.begin() + try: + artist_type_query = text(""" + INSERT INTO musicbrainz.artist_type + VALUES (:id, :name, :parent, :child_order, :description, :gid)""") + connection.execute(artist_type_query, {"id":value[0], + "name":value[1], + "parent":value[2], + "child_order":value[3], + "description":value[4], + "gid":value[5] + }) + transaction.commit() + print("INSERTED artist_type data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_area_type_data: + for value in MB_area_type_data: + transaction = connection.begin() + try: + area_type_query = text(""" + INSERT INTO musicbrainz.area_type + VALUES (:id, :name, :parent, :child_order, :description, :gid)""") + connection.execute(area_type_query, {"id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "description": value[4], + "gid": value[5]}) + transaction.commit() + print("INSERTED area_type data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_begin_area_type_data: + for value in MB_begin_area_type_data: + transaction = connection.begin() + try: + begin_area_type_query = text(""" + INSERT INTO musicbrainz.area_type + VALUES (:id, :name, :parent, :child_order, :description, :gid)""") + connection.execute(begin_area_type_query, {"id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "description": value[4], + "gid": value[5]}) + transaction.commit() + print("INSERTED begin_area_type data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_end_area_type_data: + for value in MB_end_area_type_data: + transaction = connection.begin() + try: + end_area_type_query = text(""" + INSERT INTO musicbrainz.area_type + VALUES (:id, :name, :parent, :child_order, :description, :gid)""") + connection.execute(end_area_type_query, {"id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "description": value[4], + "gid": value[5]}) + transaction.commit() + print("INSERTED end_area_type data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_release_status_data: + for value in MB_release_status_data: + transaction = connection.begin() + try: + release_status_query = text(""" + INSERT INTO musicbrainz.release_status + VALUES (:id, :name, :parent, :child_order, :description, :gid)""") + result = connection.execute(release_status_query, {"id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "description": value[4], + "gid": value[5]}) + transaction.commit() + print("INSERTED release_status data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_release_group_primary_type_data: + for value in MB_release_group_primary_type_data: + transaction = connection.begin() + try: + release_group_primary_type_query = text(""" + INSERT INTO musicbrainz.release_group_primary_type + VALUES (:id, :name, :parent, :child_order, :description, :gid)""") + connection.execute(release_group_primary_type_query, {"id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "description": value[4], + "gid": value[5]}) + transaction.commit() + print("INSERTED release_group_primary_type data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_medium_format_data: + for value in MB_medium_format_data: + transaction = connection.begin() + try: + medium_format_query = text(""" + INSERT INTO musicbrainz.medium_format + VALUES (:id, :name, :parent, :child_order, :year, :has_discids, :description, :gid)""") + connection.execute(medium_format_query, {"id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "year": value[4], + "has_discids": value[5], + "description": value[6], + "gid": value[7]}) + transaction.commit() + print("INSERTED medium_format data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_release_packaging_data: + for value in MB_release_packaging_data: + transaction = connection.begin() + try: + release_packaging_query = text(""" + INSERT INTO musicbrainz.release_packaging + VALUES (:id, :name, :parent, :child_order, :description, :gid)""") + connection.execute(release_packaging_query, {"id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "description": value[4], + "gid": value[5]}) + transaction.commit() + print("INSERTED release_packaging data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_language_data: + for value in MB_language_data: + transaction = connection.begin() + try: + language_query = text(""" + INSERT INTO musicbrainz.language + VALUES (:iso_code_2t, :iso_code_2b, :iso_code_1, :name, :frequency, :iso_code_3)""") + connection.execute(language_query, {"iso_code_2t": value[0], + "iso_code_2b": value[1], + "iso_code_1": value[2], + "name": value[3], + "frequency": value[4], + "iso_code_3": value[5]}) + transaction.commit() + print("INSERTED language data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_script_data: + for value in MB_script_data: + transaction = connection.begin() + try: + script_query = text(""" + INSERT INTO musicbrainz.script + VALUES (:id, :iso_code, :iso_number, :name, :frequency)""") + connection.execute(script_query, {"id": value[0], + "iso_code": value[1], + "iso_number": value[2], + "name": value[3], + "frequency": value[4]}) + transaction.commit() + print("INSERTED script data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_gender_data: + for value in MB_gender_data: + transaction = connection.begin() + try: + gender_query = text(""" + INSERT INTO musicbrainz.gender + VALUES (:id, :name, :parent, :child_order, :description, :gid)""") + connection.execute(gender_query, {"id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "description": value[4], + "gid": value[5]}) + transaction.commit() + print("INSERTED gender data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_area_data: + for value in MB_area_data: + transaction = connection.begin() + try: + area_query = text(""" + INSERT INTO musicbrainz.area + VALUES (:id, :gid, :name, :type, :edits_pending, :last_updated, :begin_date_year, + :begin_date_month, :begin_date_day, :end_date_year, :end_date_month, :end_date_day, + :ended, :comment)""") + connection.execute(area_query, {"id": value[0], + "gid": value[1], + "name": value[2], + "type": value[3], + "edits_pending": value[4], + "last_updated": value[5], + "begin_date_year": value[6], + "begin_date_month": value[7], + "begin_date_day": value[8], + "end_date_year": value[9], + "end_date_month": value[10], + "end_date_day": value[11], + "ended": value[12], + "comment": value[13]}) + transaction.commit() + print("INSERTED area data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_begin_area_data: + for value in MB_begin_area_data: + transaction = connection.begin() + try: + begin_area_query = text(""" + INSERT INTO musicbrainz.area + VALUES (:id, :gid, :name, :type, :edits_pending, :last_updated, :begin_date_year, + :begin_date_month, :begin_date_day, :end_date_year, :end_date_month, :end_date_day, + :ended, :comment)""") + connection.execute(begin_area_query, {"id": value[0], + "gid": value[1], + "name": value[2], + "type": value[3], + "edits_pending": value[4], + "last_updated": value[5], + "begin_date_year": value[6], + "begin_date_month": value[7], + "begin_date_day": value[8], + "end_date_year": value[9], + "end_date_month": value[10], + "end_date_day": value[11], + "ended": value[12], + "comment": value[13]}) + transaction.commit() + print("INSERTED begin_area data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_end_area_data: + for value in MB_end_area_data: + transaction = connection.begin() + try: + end_area_query = text(""" + INSERT INTO musicbrainz.area + VALUES (:id, :gid, :name, :type, :edits_pending, :last_updated, :begin_date_year, + :begin_date_month, :begin_date_day, :end_date_year, :end_date_month, :end_date_day, + :ended, :comment)""") + connection.execute(end_area_query, {"id": value[0], + "gid": value[1], + "name": value[2], + "type": value[3], + "edits_pending": value[4], + "last_updated": value[5], + "begin_date_year": value[6], + "begin_date_month": value[7], + "begin_date_day": value[8], + "end_date_year": value[9], + "end_date_month": value[10], + "end_date_day": value[11], + "ended": value[12], + "comment": value[13]}) + transaction.commit() + print("INSERTED end_area data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_artist_data: + for value in MB_artist_data: + transaction = connection.begin() + try: + artist_query = text(""" + INSERT INTO musicbrainz.artist + VALUES (:id, :gid, :name, :sort_name, :begin_date_year, :begin_date_month, :begin_date_day, + :end_date_year, :end_date_month, :end_date_day, :type, :area, :gender, :comment, :edits_pending, + :last_updated, :ended, :begin_area, :end_area)""") + connection.execute(artist_query, {"id": value[0], + "gid": value[1], + "name": value[2], + "sort_name": value[3], + "begin_date_year": value[4], + "begin_date_month": value[5], + "begin_date_day": value[6], + "end_date_year": value[7], + "end_date_month": value[8], + "end_date_day": value[9], + "type": value[10], + "area": value[11], + "gender": value[12], + "comment": value[13], + "edits_pending": value[14], + "last_updated": value[15], + "ended": value[16], + "begin_area": value[17], + "end_area": value[18]}) + transaction.commit() + print("INSERTED artist data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_artist_credit_name_data: + for value in MB_artist_credit_name_data: + transaction = connection.begin() + try: + artist_credit_name_query = text(""" + INSERT INTO musicbrainz.artist_credit_name + VALUES (:artist_credit, :position, :artist, :name, :join_phrase)""") + connection.execute(artist_credit_name_query, {"artist_credit": value[0], + "position": value[1], + "artist": value[2], + "name": value[3], + "join_phrase": value[4]}) + transaction.commit() + print("INSERTED artist_credit_name data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_artist_gid_redirect_data: + for value in MB_artist_gid_redirect_data: + transaction = connection.begin() + try: + artist_gid_redirect_query = text(""" + INSERT INTO musicbrainz.artist_gid_redirect + VALUES (:gid, :new_id, :created)""") + connection.execute(artist_gid_redirect_query, {"gid": value[0], + "new_id": value[1], + "created": value[2]}) + transaction.commit() + print("INSERTED artist_gid_redirect data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + + if MB_recording_data: + for value in MB_recording_data: + transaction = connection.begin() + try: + recording_query = text(""" + INSERT INTO musicbrainz.recording + VALUES (:id, :gid, :name, :artist_credit, :length, :comment, :edits_pending, :last_updated, :video)""") + connection.execute(recording_query, {"id": value[0], + "gid": value[1], + "name": value[2], + "artist_credit": value[3], + "length": value[4], + "comment": value[5], + "edits_pending": value[6], + "last_updated": value[7], + "video": value[8]}) + transaction.commit() + print("INSERTED recording data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_recording_gid_redirect_data: + for value in MB_recording_gid_redirect_data: + transaction = connection.begin() + try: + recording_gid_redirect_query = text(""" + INSERT INTO musicbrainz.recording_gid_redirect + VALUES (:gid, :new_id, :created)""") + connection.execute(recording_gid_redirect_query, {"gid": value[0], + "new_id": value[1], + "created": value[2]}) + transaction.commit() + print("INSERTED recording_gid_redirect data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_release_group_data: + for value in MB_release_group_data: + transaction = connection.begin() + try: + release_group_query = text(""" + INSERT INTO musicbrainz.release_group + VALUES (:id, :gid, :name, :artist_credit, :type, :comment, :edits_pending, :last_updated)""") + connection.execute(release_group_query, {"id": value[0], + "gid": value[1], + "name": value[2], + "artist_credit": value[3], + "type": value[4], + "comment": value[5], + "edits_pending": value[6], + "last_updated": value[7]}) + transaction.commit() + print("INSERTED release_group data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_release_group_gid_redirect_data: + for value in MB_release_group_gid_redirect_data: + transaction = connection.begin() + try: + release_group_gid_redirect_query = text(""" + INSERT INTO musicbrainz.release_group_gid_redirect + VALUES (:gid, :new_id, :created)""") + connection.execute(release_group_gid_redirect_query, {"gid": value[0], + "new_id": value[1], + "created": value[2]}) + transaction.commit() + print("INSERTED release_gid_redirect data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_release_data: + for value in MB_release_data: + transaction = connection.begin() + try: + release_query = text(""" + INSERT INTO musicbrainz.release + VALUES (:id, :gid, :name, :artist_credit, :release_group, :status, :packaging, :language, + :script, :barcode, :comment, :edits_pending, :quality, :last_updated)""") + connection.execute(release_query, {"id": value[0], + "gid": value[1], + "name": value[2], + "artist_credit": value[3], + "release_group": value[4], + "status": value[5], + "packaging": value[6], + "language": value[7], + "script": value[8], + "barcode": value[9], + "comment": value[10], + "edits_pending": value[11], + "quality": value[12], + "last_updated": value[13]}) + transaction.commit() + print("INSERTED release data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_release_gid_redirect_data: + for value in MB_release_gid_redirect_data: + transaction = connection.begin() + try: + release_gid_redirect_query = text(""" + INSERT INTO musicbrainz.release_gid_redirect + VALUES (:gid, :new_id, :created)""") + connection.execute(release_gid_redirect_query, {"gid": value[0], + "new_id": value[1], + "created": value[2]}) + transaction.commit() + print("INSERTED release_gid_redirect data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_medium_data: + for value in MB_medium_data: + transaction = connection.begin() + try: + medium_query = text(""" + INSERT INTO musicbrainz.medium + VALUES (:id, :release, :position, :format, :name, :edits_pending, :last_updated, :track_count)""") + connection.execute(medium_query, {"id": value[0], + "release": value[1], + "position": value[2], + "format": value[3], + "name": value[4], + "edits_pending": value[5], + "last_updated": value[6], + "track_count": value[7]}) + transaction.commit() + print("INSERTED medium data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_track_data: + for value in MB_track_data: + transaction = connection.begin() + try: + track_query = text(""" + INSERT INTO musicbrainz.track + VALUES (:id, :gid, :recording, :medium, :position, :number, :name, :artist_credit, :length, + :edits_pending, :last_updated, :is_data_track)""") + connection.execute(track_query, {"id": value[0], + "gid": value[1], + "recording": value[2], + "medium": value[3], + "position": value[4], + "number": value[5], + "name": value[6], + "artist_credit": value[7], + "length": value[8], + "edits_pending": value[9], + "last_updated": value[10], + "is_data_track": value[11]}) + transaction.commit() + print("INSERTED track data\n") + except IntegrityError as e: + print(e.message) + transaction.rollback() + + print("--------------------------------DONE-----------------------------------") diff --git a/manage.py b/manage.py index a0708633e..e582ce5ab 100644 --- a/manage.py +++ b/manage.py @@ -14,6 +14,8 @@ import db.exceptions import db.stats import db.user +from brainzutils import musicbrainz_db +import db.import_mb_data import webserver from brainzutils import musicbrainz_db from db.testing import DatabaseTestCase @@ -179,6 +181,15 @@ def remove_admin(username): sys.exit(1) +@cli.command() +def init_mb_db(): + musicbrainz_db.init_db_engine(current_app.config['MB_DATABASE_URI']) + +@cli.command() +def import_musicbrainz_db(): + print("Importing MusicBrainz data...") + db.import_mb_data.start_import() + # Please keep additional sets of commands down there cli.add_command(db.dump_manage.cli, name="dump") From 41e093789cdcb91ccbf6cea98a5c895c5589fe55 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Tue, 5 Jun 2018 18:48:35 +0530 Subject: [PATCH 017/125] Add commands to start the import in readme --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 579ef7f0c..cadf02ad3 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,10 @@ Then you can start all the services: ./develop.sh run --rm webserver python2 manage.py init_mb_db +## Import the MusicBrainz database in AcousticBrainz database: + + ./develop.sh run --rm webserver python2 manage.py import_musicbrainz_db + ### Manually Full installation instructions are available in [INSTALL.md](https://github.com/metabrainz/acousticbrainz-server/blob/master/INSTALL.md) file. After installing, continue the following steps. From 131b4c4c08db29ab2065a363518c47ae2e4ea1fd Mon Sep 17 00:00:00 2001 From: RashiSah Date: Tue, 12 Jun 2018 03:39:09 +0530 Subject: [PATCH 018/125] Get data in batches, 10000 recordings data at a time and make code modular --- db/import_mb_data.py | 2628 ++++++++++++++++++++++++------------------ 1 file changed, 1505 insertions(+), 1123 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 47e7fe58a..feedc9fc0 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -3,1128 +3,1510 @@ from sqlalchemy import text from sqlalchemy.exc import IntegrityError + +def load_artist_credit(connection, gids_in_AB): + artist_credit_query = text(""" + SELECT DISTINCT artist_credit.id, artist_credit.name, artist_credit.artist_count, + artist_credit.ref_count, artist_credit.created + FROM artist_credit + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid in :gids OR artist_credit.id in :data + """) + MB_release_fk_artist_credit = [] + for value in MB_release_data: + MB_release_fk_artist_credit.append(value[3]) + MB_release_fk_artist_credit = list(set(MB_release_fk_artist_credit)) + + result = connection.execute(artist_credit_query, {'gids': tuple(gids_in_AB), 'data': tuple(MB_release_fk_artist_credit)}) + global MB_artist_credit_data + MB_artist_credit_data = result.fetchall() + + +def load_artist_type(connection, gids_in_AB): + artist_type_query = text(""" + SELECT DISTINCT artist_type.id, + artist_type.name, + artist_type.parent, + artist_type.child_order, + artist_type.description, + artist_type.gid + FROM artist_type + INNER JOIN artist + ON artist.type = artist_type.id + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid in :gids + """) + result = connection.execute(artist_type_query, {'gids': tuple(gids_in_AB)}) + global MB_artist_type_data + MB_artist_type_data = result.fetchall() + + +def load_area_type(connection, gids_in_AB): + area_type_query = text(""" + SELECT DISTINCT area_type.id, + area_type.name, + area_type.parent, + area_type.child_order, + area_type.description, + area_type.gid + FROM area_type + INNER JOIN area + ON area.type = area_type.id + INNER JOIN artist + ON area.id = artist.area + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid in :gids + """) + result = connection.execute(area_type_query, {'gids': tuple(gids_in_AB)}) + global MB_area_type_data + MB_area_type_data = result.fetchall() + + +def load_begin_area_type(connection, gids_in_AB): + begin_area_type_query = text(""" + SELECT DISTINCT area_type.id, + area_type.name, + area_type.parent, + area_type.child_order, + area_type.description, + area_type.gid + FROM area_type + INNER JOIN area + ON area.type = area_type.id + INNER JOIN artist + ON area.id = artist.begin_area + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid in :gids + """) + result = connection.execute(begin_area_type_query, {'gids': tuple(gids_in_AB)}) + global MB_begin_area_type_data + MB_begin_area_type_data = result.fetchall() + + +def load_end_area_type(connection, gids_in_AB): + end_area_type_query = text(""" + SELECT DISTINCT area_type.id, + area_type.name, + area_type.parent, + area_type.child_order, + area_type.description, + area_type.gid + FROM area_type + INNER JOIN area + ON area.type = area_type.id + INNER JOIN artist + ON area.id = artist.end_area + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid in :gids + """) + result = connection.execute(end_area_type_query, {'gids': tuple(gids_in_AB)}) + global MB_end_area_type_data + MB_end_area_type_data = result.fetchall() + + +def load_release_status(connection, gids_in_AB): + release_status_query = text(""" + SELECT DISTINCT release_status.id, + release_status.name, + release_status.parent, + release_status.child_order, + release_status.description, + release_status.gid + FROM release_status + INNER JOIN release + ON release.status = release_status.id + INNER JOIN recording + ON recording.artist_credit = release.artist_credit + WHERE recording.gid in :gids + """) + result = connection.execute(release_status_query, {'gids': tuple(gids_in_AB)}) + global MB_release_status_data + MB_release_status_data = result.fetchall() + + +def load_release_group_primary_type(connection, gids_in_AB): + release_group_primary_type_query = text(""" + SELECT DISTINCT release_group_primary_type.id, release_group_primary_type.name, + release_group_primary_type.parent, release_group_primary_type.child_order, + release_group_primary_type.description, release_group_primary_type.gid + FROM release_group_primary_type INNER JOIN release_group + ON release_group_primary_type.id = release_group.type + INNER JOIN recording + ON recording.artist_credit = release_group.artist_credit + WHERE recording.gid in :gids OR release_group_primary_type.id in :data + """) + MB_release_group_fk_type = [] + for value in MB_release_group_data: + MB_release_group_fk_type.append(value[4]) + MB_release_group_fk_type = list(set(MB_release_group_fk_type)) + + result = connection.execute(release_group_primary_type_query, {'gids': tuple(gids_in_AB), 'data': tuple(MB_release_group_fk_type)}) + global MB_release_group_primary_type_data + MB_release_group_primary_type_data = result.fetchall() + + +def load_medium_format(connection, gids_in_AB): + medium_format_query = text(""" + SELECT * FROM medium_format + ORDER BY id + """) + result = connection.execute(medium_format_query) + global MB_medium_format_data + MB_medium_format_data = result.fetchall() + + +def load_release_packaging(connection, gids_in_AB): + release_packaging_query = text(""" + SELECT DISTINCT release_packaging.id, + release_packaging.name, + release_packaging.parent, + release_packaging.child_order, + release_packaging.description, + release_packaging.gid + FROM release_packaging + INNER JOIN release + ON release.packaging = release_packaging.id + INNER JOIN recording + ON recording.artist_credit = release.artist_credit + WHERE recording.gid in :gids OR release_packaging.id in :data + """) + MB_release_fk_packaging = [] + for value in MB_release_data: + MB_release_fk_packaging.append(value[6]) + MB_release_fk_packaging = list(set(MB_release_fk_packaging)) + + result = connection.execute(release_packaging_query, {'gids': tuple(gids_in_AB), 'data': tuple(MB_release_fk_packaging)}) + global MB_release_packaging_data + MB_release_packaging_data = result.fetchall() + + +def load_language(connection, gids_in_AB): + language_query = text(""" + SELECT DISTINCT language.id, + language.iso_code_2t, + language.iso_code_2b, + language.iso_code_1, + language.name, + language.frequency, + language.iso_code_3 + FROM language + INNER JOIN release + ON release.language = language.id + INNER JOIN recording + ON recording.artist_credit=release.artist_credit + WHERE recording.gid in :gids + """) + result = connection.execute(language_query, {'gids': tuple(gids_in_AB)}) + global MB_language_data + MB_language_data = result.fetchall() + + +def load_script(connection, gids_in_AB): + script_query = text(""" + SELECT DISTINCT script.id, + script.iso_code, + script.iso_number, + script.name, + script.frequency + FROM script + INNER JOIN release + ON release.script = script.id + INNER JOIN recording + ON recording.artist_credit = release.artist_credit + WHERE recording.gid in :gids + """) + result = connection.execute(script_query, {'gids': tuple(gids_in_AB)}) + global MB_script_data + MB_script_data = result.fetchall() + + +def load_gender(connection, gids_in_AB): + gender_query = text(""" + SELECT DISTINCT gender.id, + gender.name, + gender.parent, + gender.child_order, + gender.description, + gender.gid + FROM gender + INNER JOIN artist + ON artist.gender = gender.id + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid in :gids + """) + result = connection.execute(gender_query, {'gids': tuple(gids_in_AB)}) + global MB_gender_data + MB_gender_data = result.fetchall() + + +def load_area(connection, gids_in_AB): + area_query = text(""" + SELECT DISTINCT area.id, + area.gid, + area.name, + area.type, + area.edits_pending, + area.last_updated, + area.begin_date_year, + area.begin_date_month, + area.begin_date_day, + area.end_date_year, + area.end_date_month, + area.end_date_day, + area.ended, + area.comment + FROM area + INNER JOIN artist + ON area.id = artist.area + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid in :gids + """) + result = connection.execute(area_query, {'gids': tuple(gids_in_AB)}) + global MB_area_data + MB_area_data = result.fetchall() + + +def load_begin_area(connection, gids_in_AB): + begin_area_query = text(""" + SELECT DISTINCT area.id, + area.gid, + area.name, + area.type, + area.edits_pending, + area.last_updated, + area.begin_date_year, + area.begin_date_month, + area.begin_date_day, + area.end_date_year, + area.end_date_month, + area.end_date_day, + area.ended, + area.comment + FROM area + INNER JOIN artist + ON area.id = artist.begin_area + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid in :gids OR area.id in :data + """) + MB_artist_fk_begin_area = [] + for value in MB_artist_data: + MB_artist_fk_begin_area.append(value[17]) + + result = connection.execute(begin_area_query, {'gids': tuple(gids_in_AB), 'data': tuple(MB_artist_fk_begin_area)}) + global MB_begin_area_data + MB_begin_area_data = result.fetchall() + + +def load_end_area(connection, gids_in_AB): + end_area_query = text(""" + SELECT DISTINCT area.id, + area.gid, + area.name, + area.type, + area.edits_pending, + area.last_updated, + area.begin_date_year, + area.begin_date_month, + area.begin_date_day, + area.end_date_year, + area.end_date_month, + area.end_date_day, + area.ended, + area.comment + FROM area + INNER JOIN artist + ON area.id = artist.end_area + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid in :gids + """) + result = connection.execute(end_area_query, {'gids': tuple(gids_in_AB)}) + global MB_end_area_data + MB_end_area_data = result.fetchall() + + +def load_artist_credit_name(connection, gids_in_AB): + artist_credit_name_query = text(""" + SELECT DISTINCT artist_credit_name.artist_credit, + artist_credit_name.position, + artist_credit_name.artist, + artist_credit_name.name, + artist_credit_name.join_phrase + FROM artist_credit_name + INNER JOIN artist_credit + ON artist_credit_name.artist_credit = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid in :gids + """) + result = connection.execute(artist_credit_name_query, {'gids': tuple(gids_in_AB)}) + global MB_artist_credit_name_data + MB_artist_credit_name_data = result.fetchall() + + +def load_artist(connection, gids_in_AB): + artist_query = text(""" + SELECT DISTINCT artist.id, artist.gid, artist.name, artist.sort_name, artist.begin_date_year, + artist.begin_date_month, artist.begin_date_day, artist.end_date_year, artist.end_date_month, + artist.end_date_day, artist.type, artist.area, artist.gender, artist.comment, artist.edits_pending, + artist.last_updated, artist.ended, artist.begin_area, artist.end_area + FROM artist + INNER JOIN artist_credit + ON artist_credit.id = artist.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid in :gids OR artist.id in :data + """) + MB_artist_credit_name_fk_artist = [] + for value in MB_artist_credit_name_data: + MB_artist_credit_name_fk_artist.append(value[2]) + + result = connection.execute(artist_query, {'gids': tuple(gids_in_AB), 'data': tuple(MB_artist_credit_name_fk_artist)}) + global MB_artist_data + MB_artist_data = result.fetchall() + + +def load_artist_gid_redirect(connection, gids_in_AB): + artist_gid_redirect_query = text(""" + SELECT DISTINCT artist_gid_redirect.gid, + artist_gid_redirect.new_id, + artist_gid_redirect.created + FROM artist_gid_redirect + INNER JOIN artist + ON artist.id = artist_gid_redirect.new_id + INNER JOIN artist_credit + ON artist.id = artist_credit.id + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + WHERE recording.gid in :gids + """) + result = connection.execute(artist_gid_redirect_query, {'gids': tuple(gids_in_AB)}) + global MB_artist_gid_redirect_data + MB_artist_gid_redirect_data = result.fetchall() + + +def load_recording(connection, gids_in_AB): + recording_query = text(""" + SELECT DISTINCT recording.id, recording.gid, recording.name, recording.artist_credit, + recording.length, recording.comment, recording.edits_pending, recording.last_updated, + recording.video + FROM recording + WHERE recording.gid in :gids + """) + result = connection.execute(recording_query, {'gids': tuple(gids_in_AB)}) + global MB_recording_data + MB_recording_data = result.fetchall() + + +def load_recording_gid_redirect(connection, gids_in_AB): + recording_gid_redirect_query = text(""" + SELECT DISTINCT recording_gid_redirect.gid, + recording_gid_redirect.new_id, + recording_gid_redirect.created + FROM recording_gid_redirect + INNER JOIN recording + ON recording.id = recording_gid_redirect.new_id + WHERE recording.gid in :gids + """) + result = connection.execute(recording_gid_redirect_query, {'gids': tuple(gids_in_AB)}) + global MB_recording_gid_redirect_data + MB_recording_gid_redirect_data = result.fetchall() + + +def load_release_group(connection, gids_in_AB): + release_group_query = text(""" + SELECT DISTINCT release_group.id, + release_group.gid, + release_group.name, + release_group.artist_credit, + release_group.type, + release_group.comment, + release_group.edits_pending, + release_group.last_updated + FROM release_group + INNER JOIN recording + ON recording.artist_credit = release_group.artist_credit + WHERE recording.gid in :gids OR release_group.id in :redirect_data OR release_group.id in :release_data + """) + MB_release_group_gid_redirect_fk_release_group = [] + for value in MB_release_group_gid_redirect_data: + MB_release_group_gid_redirect_fk_release_group.append(value[1]) + MB_release_group_gid_redirect_fk_release_group = list(set(MB_release_group_gid_redirect_fk_release_group)) + + MB_release_fk_release_group = [] + for value in MB_release_data: + MB_release_fk_release_group.append(value[4]) + MB_release_fk_release_group = list(set(MB_release_fk_release_group)) + + result = connection.execute(release_group_query, {'gids': tuple(gids_in_AB), + 'redirect_data': tuple(MB_release_group_gid_redirect_fk_release_group), + 'release_data': tuple(MB_release_fk_release_group)}) + global MB_release_group_data + MB_release_group_data = result.fetchall() + + +def load_release_group_gid_redirect(connection, gids_in_AB): + release_group_gid_redirect_query = text(""" + SELECT DISTINCT release_group_gid_redirect.gid, + release_group_gid_redirect.new_id, + release_group_gid_redirect.created + FROM release_group_gid_redirect + INNER JOIN release_group + ON release_group.id = release_group_gid_redirect.new_id + INNER JOIN recording + ON recording.artist_credit = release_group.artist_credit + WHERE recording.gid in :gids + """) + result = connection.execute(release_group_gid_redirect_query, {'gids': tuple(gids_in_AB)}) + global MB_release_group_gid_redirect_data + MB_release_group_gid_redirect_data = result.fetchall() + + +def load_release(connection, gids_in_AB): + release_query = text(""" + SELECT DISTINCT release.id, + release.gid, + release.name, + release.artist_credit, + release.release_group, + release.status, + release.packaging, + release.language, + release.script, + release.barcode, + release.comment, + release.edits_pending, + release.quality, + release.last_updated + FROM release + INNER JOIN recording + ON recording.artist_credit = release.artist_credit + WHERE recording.gid in :gids OR release.id in :medium_data OR release.id in :redirect_data + """) + MB_medium_fk_release = [] + for value in MB_medium_data: + MB_medium_fk_release.append(value[1]) + MB_medium_fk_release = list(set(MB_medium_fk_release)) + + MB_release_gid_redirect_fk_release = [] + for value in MB_release_gid_redirect_data: + MB_release_gid_redirect_fk_release.append(value[1]) + MB_release_gid_redirect_fk_release = list(set(MB_release_gid_redirect_fk_release)) + + result = connection.execute(release_query, {'gids': tuple(gids_in_AB), + 'medium_data': tuple(MB_medium_fk_release), + 'redirect_data': tuple(MB_release_gid_redirect_fk_release) + }) + global MB_release_data + MB_release_data = result.fetchall() + + +def load_release_gid_redirect(connection, gids_in_AB): + release_gid_redirect_query = text(""" + SELECT DISTINCT release_gid_redirect.gid, + release_gid_redirect.new_id, + release_gid_redirect.created + FROM release_gid_redirect + INNER JOIN release + ON release.id = release_gid_redirect.new_id + INNER JOIN recording + ON recording.artist_credit = release.artist_credit + WHERE recording.gid in :gids + """) + result = connection.execute(release_gid_redirect_query, {'gids': tuple(gids_in_AB)}) + global MB_release_gid_redirect_data + MB_release_gid_redirect_data = result.fetchall() + + +def load_medium(connection, gids_in_AB): + medium_query = text(""" + SELECT DISTINCT medium.id, + medium.release, + medium.position, + medium.format, + medium.name, + medium.edits_pending, + medium.last_updated, + medium.track_count + FROM medium + INNER JOIN release + ON release.id = medium.release + INNER JOIN recording + ON recording.artist_credit=release.artist_credit + WHERE recording.gid in :gids OR medium.id in :data + """) + MB_track_fk_medium = [] + for value in MB_track_data: + MB_track_fk_medium.append(value[3]) + + result = connection.execute(medium_query, {'gids': tuple(gids_in_AB), 'data': tuple(MB_track_fk_medium)}) + global MB_medium_data + MB_medium_data = result.fetchall() + + +def load_track(connection, gids_in_AB): + track_query = text(""" + SELECT DISTINCT track.id, + track.gid, + track.recording, + track.medium, + track.position, + track.number, + track.name, + track.artist_credit, + track.length, + track.edits_pending, + track.last_updated, + track.is_data_track + FROM track + INNER JOIN recording + ON track.recording = recording.id + WHERE recording.gid in :gids + """) + result = connection.execute(track_query, {'gids': tuple(gids_in_AB)}) + global MB_track_data + MB_track_data = result.fetchall() + + +print("--------------------------------------------------------------------------------------------------") + + +# TO ACOUSTICBRAINZ +def write_artist_credit(transaction, connection): + artist_credit_query = text(""" + INSERT INTO musicbrainz.artist_credit + VALUES (:id, :name, :artist_count, :ref_count, :created) + """) + values = [{ + "id" : value[0], + "name" : value[1], + "artist_count" : value[2], + "ref_count" : value[3], + "created" : value[4]} for value in MB_artist_credit_data + ] + connection.execute(artist_credit_query, values) + transaction.commit() + print("INSERTED artist_credit data\n") + + +def write_artist_type(transaction, connection): + artist_type_query = text(""" + INSERT INTO musicbrainz.artist_type + VALUES (:id, :name, :parent, :child_order, :description, :gid) + """) + values = [{ + "id": value[0], + "name" : value[01], + "parent" : value[2], + "child_order" : value[3], + "description" : value[4], + "gid" : value[5]} for value in MB_artist_type_data + ] + connection.execute(artist_type_query, values) + transaction.commit() + print("INSERTED artist_type data\n") + + +def write_area_type(transaction, connection): + area_type_query = text(""" + INSERT INTO musicbrainz.area_type + VALUES (:id, :name, :parent, :child_order, :description, :gid) + """) + values = [{ + "id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "description": value[4], + "gid": value[5]} for value in MB_area_type_data + ] + connection.execute(area_type_query, values) + transaction.commit() + print("INSERTED area_type data\n") + + +def write_begin_area_type(transaction, connection): + begin_area_type_query = text(""" + INSERT INTO musicbrainz.area_type + VALUES (:id, :name, :parent, :child_order, :description, :gid) + """) + values = [{ + "id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "description": value[4], + "gid": value[5]} for value in MB_begin_area_type_data + ] + connection.execute(begin_area_type_query, values) + transaction.commit() + print("INSERTED begin_area_type data\n") + + +def write_end_area_type(transaction, connection): + end_area_type_query = text(""" + INSERT INTO musicbrainz.area_type + VALUES (:id, :name, :parent, :child_order, :description, :gid) + """) + values = [{ + "id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "description": value[4], + "gid": value[5]} for value in MB_end_area_type_data + ] + connection.execute(end_area_type_query, values) + transaction.commit() + print("INSERTED end_area_type data\n") + + +def write_release_status(transaction, connection): + release_status_query = text(""" + INSERT INTO musicbrainz.release_status + VALUES (:id, :name, :parent, :child_order, :description, :gid) + """) + values= [{ + "id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "description": value[4], + "gid": value[5]} for value in MB_release_status_data + ] + result = connection.execute(release_status_query, values) + transaction.commit() + print("INSERTED release_status data\n") + + +def write_release_group_primary_type(transaction, connection): + release_group_primary_type_query = text(""" + INSERT INTO musicbrainz.release_group_primary_type + VALUES (:id, :name, :parent, :child_order, :description, :gid) + """) + values = [{ + "id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "description": value[4], + "gid": value[5]} for value in MB_release_group_primary_type_data + ] + connection.execute(release_group_primary_type_query, values) + transaction.commit() + print("INSERTED release_group_primary_type data\n") + + +def write_medium_format(transaction, connection): + medium_format_query = text(""" + INSERT INTO musicbrainz.medium_format + VALUES (:id, :name, :parent, :child_order, :year, :has_discids, :description, :gid) + """) + values = [{ + "id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "year": value[4], + "has_discids": value[5], + "description": value[6], + "gid": value[7]} for value in MB_medium_format_data + ] + connection.execute(text("""ALTER TABLE musicbrainz.medium_format DROP CONSTRAINT IF EXISTS medium_format_fk_parent""")) + connection.execute(medium_format_query, values) + transaction.commit() + print("INSERTED medium_format data\n") + + +def write_release_packaging(transaction, connection): + release_packaging_query = text(""" + INSERT INTO musicbrainz.release_packaging + VALUES (:id, :name, :parent, :child_order, :description, :gid) + """) + values = [{ + "id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "description": value[4], + "gid": value[5]} for value in MB_release_packaging_data + ] + connection.execute(release_packaging_query, values) + transaction.commit() + print("INSERTED release_packaging data\n") + + +def write_language(transaction, connection): + language_query = text(""" + INSERT INTO musicbrainz.language + VALUES (:iso_code_2t, :iso_code_2b, :iso_code_1, :name, :frequency, :iso_code_3) + """) + values = [{ + "iso_code_2t": value[0], + "iso_code_2b": value[1], + "iso_code_1": value[2], + "name": value[3], + "frequency": value[4], + "iso_code_3": value[5]} for value in MB_language_data + ] + connection.execute(language_query, values) + transaction.commit() + print("INSERTED language data\n") + + +def write_script(transaction, connection): + script_query = text(""" + INSERT INTO musicbrainz.script + VALUES (:id, :iso_code, :iso_number, :name, :frequency) + """) + values = [{ + "id": value[0], + "iso_code": value[1], + "iso_number": value[2], + "name": value[3], + "frequency": value[4]} for value in MB_script_data + ] + connection.execute(script_query, values) + transaction.commit() + print("INSERTED script data\n") + + +def write_gender(transaction, connection): + gender_query = text(""" + INSERT INTO musicbrainz.gender + VALUES (:id, :name, :parent, :child_order, :description, :gid) + """) + values = [{ + "id": value[0], + "name": value[1], + "parent": value[2], + "child_order": value[3], + "description": value[4], + "gid": value[5]} for value in MB_gender_data + ] + connection.execute(gender_query, values) + transaction.commit() + print("INSERTED gender data\n") + + +def write_area(transaction, connection): + area_query = text(""" + INSERT INTO musicbrainz.area + VALUES (:id, :gid, :name, :type, :edits_pending, :last_updated, :begin_date_year, + :begin_date_month, :begin_date_day, :end_date_year, :end_date_month, :end_date_day, + :ended, :comment) + """) + values = [{ + "id": value[0], + "gid": value[1], + "name": value[2], + "type": value[3], + "edits_pending": value[4], + "last_updated": value[5], + "begin_date_year": value[6], + "begin_date_month": value[7], + "begin_date_day": value[8], + "end_date_year": value[9], + "end_date_month": value[10], + "end_date_day": value[11], + "ended": value[12], + "comment": value[13]} for value in MB_area_data + ] + connection.execute(area_query, values) + transaction.commit() + print("INSERTED area data\n") + + +def write_begin_area(transaction, connection): + begin_area_query = text(""" + INSERT INTO musicbrainz.area + VALUES (:id, :gid, :name, :type, :edits_pending, :last_updated, :begin_date_year, + :begin_date_month, :begin_date_day, :end_date_year, :end_date_month, :end_date_day, + :ended, :comment) + """) + values = [{ + "id": value[0], + "gid": value[1], + "name": value[2], + "type": value[3], + "edits_pending": value[4], + "last_updated": value[5], + "begin_date_year": value[6], + "begin_date_month": value[7], + "begin_date_day": value[8], + "end_date_year": value[9], + "end_date_month": value[10], + "end_date_day": value[11], + "ended": value[12], + "comment": value[13]} for value in MB_begin_area_data + ] + connection.execute(begin_area_query, values) + transaction.commit() + print("INSERTED begin_area data\n") + + +def write_end_area(transaction, connection): + end_area_query = text(""" + INSERT INTO musicbrainz.area + VALUES (:id, :gid, :name, :type, :edits_pending, :last_updated, :begin_date_year, + :begin_date_month, :begin_date_day, :end_date_year, :end_date_month, :end_date_day, + :ended, :comment) + """) + values = [{ + "id": value[0], + "gid": value[1], + "name": value[2], + "type": value[3], + "edits_pending": value[4], + "last_updated": value[5], + "begin_date_year": value[6], + "begin_date_month": value[7], + "begin_date_day": value[8], + "end_date_year": value[9], + "end_date_month": value[10], + "end_date_day": value[11], + "ended": value[12], + "comment": value[13]} for value in MB_end_area_data + ] + connection.execute(end_area_query, values) + transaction.commit() + print("INSERTED end_area data\n") + + +def write_artist(transaction, connection): + artist_query = text(""" + INSERT INTO musicbrainz.artist + VALUES (:id, :gid, :name, :sort_name, :begin_date_year, :begin_date_month, :begin_date_day, + :end_date_year, :end_date_month, :end_date_day, :type, :area, :gender, :comment, :edits_pending, + :last_updated, :ended, :begin_area, :end_area) + ON conflict do nothing + """) + values = [{ + "id": value[0], + "gid": value[1], + "name": value[2], + "sort_name": value[3], + "begin_date_year": value[4], + "begin_date_month": value[5], + "begin_date_day": value[6], + "end_date_year": value[7], + "end_date_month": value[8], + "end_date_day": value[9], + "type": value[10], + "area": value[11], + "gender": value[12], + "comment": value[13], + "edits_pending": value[14], + "last_updated": value[15], + "ended": value[16], + "begin_area": value[17], + "end_area": value[18]} for value in MB_artist_data + ] + connection.execute(artist_query, values) + transaction.commit() + print("INSERTED artist data\n") + + +def write_artist_credit_name(transaction, connection): + artist_credit_name_query = text(""" + INSERT INTO musicbrainz.artist_credit_name + VALUES (:artist_credit, :position, :artist, :name, :join_phrase) + """) + values = [{ + "artist_credit": value[0], + "position": value[1], + "artist": value[2], + "name": value[3], + "join_phrase": value[4]} for value in MB_artist_credit_name_data + ] + connection.execute(artist_credit_name_query, values) + transaction.commit() + print("INSERTED artist_credit_name data\n") + + +def write_artist_gid_redirect(transaction, connection): + artist_gid_redirect_query = text(""" + INSERT INTO musicbrainz.artist_gid_redirect + VALUES (:gid, :new_id, :created) + """) + values = [{ + "gid": value[0], + "new_id": value[1], + "created": value[2]} for value in MB_artist_gid_redirect_data + ] + connection.execute(artist_gid_redirect_query, values) + transaction.commit() + print("INSERTED artist_gid_redirect data\n") + + +def write_recording(transaction, connection): + recording_query = text(""" + INSERT INTO musicbrainz.recording + VALUES (:id, :gid, :name, :artist_credit, :length, :comment, :edits_pending, :last_updated, :video) + """) + values = [{ + "id": value[0], + "gid": value[1], + "name": value[2], + "artist_credit": value[3], + "length": value[4], + "comment": value[5], + "edits_pending": value[6], + "last_updated": value[7], + "video": value[8]} for value in MB_recording_data + ] + connection.execute(recording_query, values) + transaction.commit() + print("INSERTED recording data\n") + + +def write_recording_gid_redirect(transaction, connection): + recording_gid_redirect_query = text(""" + INSERT INTO musicbrainz.recording_gid_redirect + VALUES (:gid, :new_id, :created) + """) + values = [{"gid": value[0], + "new_id": value[1], + "created": value[2]} for value in MB_recording_gid_redirect_data + ] + connection.execute(recording_gid_redirect_query, values) + transaction.commit() + print("INSERTED recording_gid_redirect data\n") + + +def write_release_group(transaction, connection): + release_group_query = text(""" + INSERT INTO musicbrainz.release_group + VALUES (:id, :gid, :name, :artist_credit, :type, :comment, :edits_pending, :last_updated) + """) + values = [{ + "id": value[0], + "gid": value[1], + "name": value[2], + "artist_credit": value[3], + "type": value[4], + "comment": value[5], + "edits_pending": value[6], + "last_updated": value[7]} for value in MB_release_group_data + ] + connection.execute(release_group_query, values) + transaction.commit() + print("INSERTED release_group data\n") + + +def write_release_group_gid_redirect(transaction, connection): + release_group_gid_redirect_query = text(""" + INSERT INTO musicbrainz.release_group_gid_redirect + VALUES (:gid, :new_id, :created) + """) + values = [{ + "gid": value[0], + "new_id": value[1], + "created": value[2]} for value in MB_release_gid_redirect_data + ] + connection.execute(release_group_gid_redirect_query, values) + transaction.commit() + print("INSERTED release_gid_redirect data\n") + + +def write_release(transaction, connection): + release_query = text(""" + INSERT INTO musicbrainz.release + VALUES (:id, :gid, :name, :artist_credit, :release_group, :status, :packaging, :language, + :script, :barcode, :comment, :edits_pending, :quality, :last_updated) + """) + values = [{ + "id": value[0], + "gid": value[1], + "name": value[2], + "artist_credit": value[3], + "release_group": value[4], + "status": value[5], + "packaging": value[6], + "language": value[7], + "script": value[8], + "barcode": value[9], + "comment": value[10], + "edits_pending": value[11], + "quality": value[12], + "last_updated": value[13]} for value in MB_release_data + ] + connection.execute(release_query, values) + transaction.commit() + print("INSERTED release data\n") + + +def write_release_gid_redirect(transaction, connection): + release_gid_redirect_query = text(""" + INSERT INTO musicbrainz.release_gid_redirect + VALUES (:gid, :new_id, :created) + """) + values = [{ + "gid": value[0], + "new_id": value[1], + "created": value[2]} for value in MB_release_gid_redirect_data + ] + connection.execute(release_gid_redirect_query, values) + transaction.commit() + print("INSERTED release_gid_redirect data\n") + + +def write_medium(transaction, connection): + medium_query = text(""" + INSERT INTO musicbrainz.medium + VALUES (:id, :release, :position, :format, :name, :edits_pending, :last_updated, :track_count) + """) + values = [{ + "id": value[0], + "release": value[1], + "position": value[2], + "format": value[3], + "name": value[4], + "edits_pending": value[5], + "last_updated": value[6], + "track_count": value[7]} for value in MB_medium_data + ] + connection.execute(medium_query, values) + transaction.commit() + print("INSERTED medium data\n") + + +def write_track(transaction, connection): + track_query = text(""" + INSERT INTO musicbrainz.track + VALUES (:id, :gid, :recording, :medium, :position, :number, :name, :artist_credit, :length, + :edits_pending, :last_updated, :is_data_track) + """) + values = [{ + "id": value[0], + "gid": value[1], + "recording": value[2], + "medium": value[3], + "position": value[4], + "number": value[5], + "name": value[6], + "artist_credit": value[7], + "length": value[8], + "edits_pending": value[9], + "last_updated": value[10], + "is_data_track": value[11]} for value in MB_track_data + ] + connection.execute(track_query, values) + transaction.commit() + print("INSERTED track data\n") + + + + +# FUNCTION TO CALL ALL INSERTS + +def insert_MB_data_AB(): + with db.engine.connect() as connection: + if MB_artist_credit_data: + transaction = connection.begin() + try: + write_artist_credit(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_artist_type_data: + transaction = connection.begin() + try: + write_artist_type(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_area_type_data: + transaction = connection.begin() + try: + write_area_type(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_begin_area_type_data: + transaction = connection.begin() + try: + write_begin_area_type(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_end_area_type_data: + transaction = connection.begin() + try: + write_end_area_type(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_release_status_data: + transaction = connection.begin() + try: + write_release_status(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_release_group_primary_type_data: + transaction = connection.begin() + try: + write_release_group_primary_type(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_medium_format_data: + transaction = connection.begin() + try: + write_medium_format(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_release_packaging_data: + transaction = connection.begin() + try: + write_release_packaging(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_language_data: + transaction = connection.begin() + try: + write_language(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_script_data: + transaction = connection.begin() + try: + write_script(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_gender_data: + transaction = connection.begin() + try: + write_gender(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_area_data: + transaction = connection.begin() + try: + write_area(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_begin_area_data: + transaction = connection.begin() + try: + write_begin_area(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_end_area_data: + transaction = connection.begin() + try: + write_end_area(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_artist_data: + transaction = connection.begin() + try: + write_artist(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_artist_credit_name_data: + transaction = connection.begin() + try: + write_artist_credit_name(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_artist_gid_redirect_data: + transaction = connection.begin() + try: + write_artist_gid_redirect(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + + if MB_recording_data: + transaction = connection.begin() + try: + write_recording(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_recording_gid_redirect_data: + transaction = connection.begin() + try: + write_recording_gid_redirect(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_release_group_data: + transaction = connection.begin() + try: + write_release_group(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_release_group_gid_redirect_data: + transaction = connection.begin() + try: + write_release_group_gid_redirect(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_release_data: + transaction = connection.begin() + try: + write_release(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_release_gid_redirect_data: + transaction = connection.begin() + try: + write_release_gid_redirect(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_medium_data: + transaction = connection.begin() + try: + write_medium(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + if MB_track_data: + transaction = connection.begin() + try: + write_track(transaction, connection) + except IntegrityError as e: + print(e.message) + transaction.rollback() + + +def fetch_musicbrainz_data(gids_in_AB): + with musicbrainz_db.engine.begin() as connection: + # track + try: + load_track(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # MEDIUM + try: + load_medium(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # release_gid_redirect + try: + load_release_gid_redirect(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # RELEASE + try: + load_release(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # ARTIST CREDIT + try: + load_artist_credit(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # ARTIST CREDIT NAME + try: + load_artist_credit_name(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # ARTIST + try: + load_artist(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # ARTIST TYPE + try: + load_artist_type(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # RECORDING + try: + load_recording(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # AREA + try: + load_area(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # BEGIN AREA + try: + load_begin_area(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # END AREA + try: + load_end_area(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # AREA TYPE + try: + load_area_type(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # BEGIN AREA TYPE + try: + load_begin_area_type(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # END AREA TYPE + try: + load_end_area_type(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # ARTIST GID REDIRECT + try: + load_artist_gid_redirect(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # GENDER + try: + load_gender(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # LANGUAGE + try: + load_language(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # MEDIUM FORMAT + try: + load_medium_format(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # RECORDING GID REDIRECT + try: + load_recording_gid_redirect(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # release_group gid redirect + try: + load_release_group_gid_redirect(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # release_group + try: + load_release_group(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # release_group_primary_type + try: + load_release_group_primary_type(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # release_packaging + try: + load_release_packaging(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # release_status + try: + load_release_status(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + # script + try: + load_script(connection, gids_in_AB) + except ValueError: + print("No Data found for the recordings") + + insert_MB_data_AB() + print("--------------------------------DONE-----------------------------------") + + def start_import(): - with db.engine.begin() as conn: + with db.engine.begin() as connection: lowlevel_query = text("""SELECT gid from lowlevel""") - gids = conn.execute(lowlevel_query) - gids_in_AB = gids.fetchall() - for recording_gid in gids_in_AB: - MB_artist_credit_data, MB_recording_data, MB_artist_data, MB_artist_type_data, MB_area_data, \ - MB_script_data, MB_release_data, MB_release_group_primary_type_data, MB_medium_data, \ - MB_track_data, MB_gender_data, MB_language_data, MB_medium_format_data, MB_release_group_data, \ - MB_release_status_data, MB_artist_gid_redirect_data, MB_recording_gid_redirect_data, \ - MB_release_group_gid_redirect_data, MB_release_gid_redirect_data, MB_artist_credit_name_data, \ - MB_area_type_data, MB_release_packaging_data = (0,)*22 - - # FROM MUSICBRAINZ - with musicbrainz_db.engine.begin() as connection: - # ARTIST CREDIT - try: - artist_credit_query = text("""SELECT artist_credit.id, artist_credit.name, artist_credit.artist_count, - artist_credit.ref_count, artist_credit.created - FROM artist_credit - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid= :recording_gid - """) - result = connection.execute(artist_credit_query, {"recording_gid" : recording_gid[0]}) - MB_artist_credit_data = result.fetchall() - except ValueError: - pass - - try: - artist_query = text(""" - SELECT artist.id, artist.gid, artist.name, artist.sort_name, artist.begin_date_year, - artist.begin_date_month, artist.begin_date_day, artist.end_date_year, artist.end_date_month, - artist.end_date_day, artist.type, artist.area, artist.gender, artist.comment, artist.edits_pending, - artist.last_updated, artist.ended, artist.begin_area, artist.end_area - FROM artist - INNER JOIN artist_credit - ON artist_credit.id = artist.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(artist_query, {"recording_gid": recording_gid[0]}) - MB_artist_data = result.fetchall() - except ValueError: - pass - - # ARTIST TYPE - try: - artist_type_query = text("""SELECT artist_type.id, - artist_type.name, - artist_type.parent, - artist_type.child_order, - artist_type.description, - artist_type.gid - FROM artist_type - INNER JOIN artist - ON artist.type = artist_type.id - INNER JOIN artist_credit - ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(artist_type_query, {"recording_gid": recording_gid[0]}) - MB_artist_type_data = result.fetchall() - except ValueError: - pass - - # RECORDING - try: - recording_query = text("""SELECT recording.id, recording.gid, recording.name, recording.artist_credit, - recording.length, recording.comment, recording.edits_pending, recording.last_updated, - recording.video - FROM recording - WHERE recording.gid = :recording_gid - """) - result = connection.execute(recording_query, {"recording_gid": recording_gid[0]}) - MB_recording_data = result.fetchall() - except ValueError: - pass - - # AREA - try: - area_query = text(""" - SELECT area.id, - area.gid, - area.name, - area.type, - area.edits_pending, - area.last_updated, - area.begin_date_year, - area.begin_date_month, - area.begin_date_day, - area.end_date_year, - area.end_date_month, - area.end_date_day, - area.ended, - area.comment - FROM area - INNER JOIN artist - ON area.id = artist.area - INNER JOIN artist_credit - ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(area_query, {"recording_gid": recording_gid[0]}) - MB_area_data = result.fetchall() - except ValueError: - pass - - # BEGIN AREA - try: - begin_area_query = text(""" - SELECT area.id, - area.gid, - area.name, - area.type, - area.edits_pending, - area.last_updated, - area.begin_date_year, - area.begin_date_month, - area.begin_date_day, - area.end_date_year, - area.end_date_month, - area.end_date_day, - area.ended, - area.comment - FROM area - INNER JOIN artist - ON area.id = artist.begin_area - INNER JOIN artist_credit - ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(begin_area_query, {"recording_gid": recording_gid[0]}) - MB_begin_area_data = result.fetchall() - except ValueError: - pass - - # END AREA - try: - end_area_query = text(""" - SELECT area.id, - area.gid, - area.name, - area.type, - area.edits_pending, - area.last_updated, - area.begin_date_year, - area.begin_date_month, - area.begin_date_day, - area.end_date_year, - area.end_date_month, - area.end_date_day, - area.ended, - area.comment - FROM area - INNER JOIN artist - ON area.id = artist.end_area - INNER JOIN artist_credit - ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(end_area_query, {"recording_gid": recording_gid[0]}) - MB_end_area_data = result.fetchall() - except ValueError: - pass - - # AREA TYPE - try: - area_type_query = text("""SELECT area_type.id, - area_type.name, - area_type.parent, - area_type.child_order, - area_type.description, - area_type.gid - FROM area_type - INNER JOIN area - ON area.type = area_type.id - INNER JOIN artist - ON area.id = artist.area - INNER JOIN artist_credit - ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(area_type_query, {"recording_gid": recording_gid[0]}) - MB_area_type_data = result.fetchall() - except ValueError: - pass - - # BEGIN AREA TYPE - try: - begin_area_type_query = text("""SELECT area_type.id, - area_type.name, - area_type.parent, - area_type.child_order, - area_type.description, - area_type.gid - FROM area_type - INNER JOIN area - ON area.type = area_type.id - INNER JOIN artist - ON area.id = artist.begin_area - INNER JOIN artist_credit - ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(begin_area_type_query, {"recording_gid": recording_gid[0]}) - MB_begin_area_type_data = result.fetchall() - except ValueError: - pass - - # END AREA TYPE - try: - end_area_type_query = text("""SELECT area_type.id, - area_type.name, - area_type.parent, - area_type.child_order, - area_type.description, - area_type.gid - FROM area_type - INNER JOIN area - ON area.type = area_type.id - INNER JOIN artist - ON area.id = artist.end_area - INNER JOIN artist_credit - ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(end_area_type_query, {"recording_gid": recording_gid[0]}) - MB_end_area_type_data = result.fetchall() - except ValueError: - pass - - # ARTIST CREDIT NAME - try: - artist_credit_name_query = text("""SELECT artist_credit_name.artist_credit, - artist_credit_name.position, - artist_credit_name.artist, - artist_credit_name.name, - artist_credit_name.join_phrase - FROM artist_credit_name - INNER JOIN artist_credit - ON artist_credit_name.artist_credit = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(artist_credit_name_query, {"recording_gid": recording_gid[0]}) - MB_artist_credit_name_data = result.fetchall() - except ValueError: - pass - - # ARTIST GID REDIRECT - try: - artist_gid_redirect_query = text("""SELECT artist_gid_redirect.gid, - artist_gid_redirect.new_id, - artist_gid_redirect.created - FROM artist_gid_redirect - INNER JOIN artist - ON artist.id = artist_gid_redirect.new_id - INNER JOIN artist_credit - ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(artist_gid_redirect_query, {"recording_gid": recording_gid[0]}) - MB_artist_gid_redirect_data = result.fetchall() - except ValueError: - pass - - - # GENDER - try: - gender_query = text("""SELECT gender.id, - gender.name, - gender.parent, - gender.child_order, - gender.description, - gender.gid - FROM gender - INNER JOIN artist - ON artist.gender = gender.id - INNER JOIN artist_credit - ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(gender_query, {"recording_gid": recording_gid[0]}) - MB_gender_data = result.fetchall() - except ValueError: - pass - - # RELEASE - try: - release_query = text("""SELECT release.id, - release.gid, - release.name, - release.artist_credit, - release.release_group, - release.status, - release.packaging, - release.language, - release.script, - release.barcode, - release.comment, - release.edits_pending, - release.quality, - release.last_updated - FROM release - INNER JOIN recording - ON recording.artist_credit = release.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(release_query, {"recording_gid": recording_gid[0]}) - MB_release_data = result.fetchall() - except ValueError: - pass - - # LANGUAGE - try: - language_query = text("""SELECT language.id, - language.iso_code_2t, - language.iso_code_2b, - language.iso_code_1, - language.name, - language.frequency, - language.iso_code_3 - FROM language - INNER JOIN release - ON release.language = language.id - INNER JOIN recording - ON recording.artist_credit=release.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(language_query, {"recording_gid": recording_gid[0]}) - MB_language_data = result.fetchall() - except ValueError: - pass - - # MEDIUM - try: - medium_query = text("""SELECT medium.id, - medium.release, - medium.position, - medium.format, - medium.name, - medium.edits_pending, - medium.last_updated, - medium.track_count - FROM medium - INNER JOIN release - ON release.id = medium.release - INNER JOIN recording - ON recording.artist_credit=release.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(medium_query, {"recording_gid": recording_gid[0]}) - MB_medium_data = result.fetchall() - except ValueError: - pass - - # MEDIUM FORMAT - try: - medium_format_query = text("""SELECT medium_format.id, - medium_format.name, - medium_format.parent, - medium_format.child_order, - medium_format.year, - medium_format.has_discids, - medium_format.description, - medium_format.gid - FROM medium_format - INNER JOIN medium - ON medium_format.id = medium.format - INNER JOIN release - ON release.id = medium.release - INNER JOIN recording - ON recording.artist_credit = release.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(medium_format_query, {"recording_gid": recording_gid[0]}) - MB_medium_format_data = result.fetchall() - except ValueError: - pass - - # RECORDING GID REDIRECT - try: - recording_gid_redirect_query = text("""SELECT recording_gid_redirect.gid, - recording_gid_redirect.new_id, - recording_gid_redirect.created - FROM recording_gid_redirect - INNER JOIN recording - ON recording.id = recording_gid_redirect.new_id - WHERE recording.gid = :recording_gid - """) - result = connection.execute(recording_gid_redirect_query, {"recording_gid": recording_gid[0]}) - MB_recording_gid_redirect_data = result.fetchall() - except ValueError: - pass - - # release_gid_redirect - try: - release_gid_redirect_query = text("""SELECT release_gid_redirect.gid, - release_gid_redirect.new_id, - release_gid_redirect.created - FROM release_gid_redirect - INNER JOIN release - ON release.id = release_gid_redirect.new_id - INNER JOIN recording - ON recording.artist_credit = release.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(release_gid_redirect_query, {"recording_gid": recording_gid[0]}) - MB_release_gid_redirect_data = result.fetchall() - except ValueError: - pass - - # release_group - try: - release_group_query = text("""SELECT release_group.id, - release_group.gid, - release_group.name, - release_group.artist_credit, - release_group.type, - release_group.comment, - release_group.edits_pending, - release_group.last_updated - FROM release_group - INNER JOIN recording - ON recording.artist_credit = release_group.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(release_group_query, {"recording_gid": recording_gid[0]}) - MB_release_group_data = result.fetchall() - except ValueError: - pass - - # release_group gid redirect - try: - release_group_gid_redirect_query = text("""SELECT release_group_gid_redirect.gid, - release_group_gid_redirect.new_id, - release_group_gid_redirect.created - FROM release_group_gid_redirect - INNER JOIN release_group - ON release_group.id = release_group_gid_redirect.new_id - INNER JOIN recording - ON recording.artist_credit = release_group.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(release_group_gid_redirect_query, {"recording_gid": recording_gid[0]}) - MB_release_group_gid_redirect_data = result.fetchall() - except ValueError: - pass - - # release_group_primary_type - try: - release_group_primary_type_query = text("""SELECT release_group_primary_type.id, release_group_primary_type.name, - release_group_primary_type.parent, release_group_primary_type.child_order, - release_group_primary_type.description, release_group_primary_type.gid - FROM release_group_primary_type INNER JOIN release_group - ON release_group_primary_type.id = release_group.type - INNER JOIN recording - ON recording.artist_credit = release_group.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(release_group_primary_type_query, {"recording_gid": recording_gid[0]}) - MB_release_group_primary_type_data = result.fetchall() - except ValueError: - pass - - # release_packaging - try: - release_packaging_query = text("""SELECT release_packaging.id, - release_packaging.name, - release_packaging.parent, - release_packaging.child_order, - release_packaging.description, - release_packaging.gid - FROM release_packaging - INNER JOIN release - ON release.packaging = release_packaging.id - INNER JOIN recording - ON recording.artist_credit = release.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(release_packaging_query, {"recording_gid": recording_gid[0]}) - MB_release_packaging_data = result.fetchall() - except ValueError: - pass - - # release_status - try: - release_status_query = text("""SELECT release_status.id, - release_status.name, - release_status.parent, - release_status.child_order, - release_status.description, - release_status.gid - FROM release_status - INNER JOIN release - ON release.status = release_status.id - INNER JOIN recording - ON recording.artist_credit = release.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(release_status_query, {"recording_gid": recording_gid[0]}) - MB_release_status_data = result.fetchall() - except ValueError: - pass - - # script - try: - script_query = text("""SELECT script.id, - script.iso_code, - script.iso_number, - script.name, - script.frequency - FROM script - INNER JOIN release - ON release.script = script.id - INNER JOIN recording - ON recording.artist_credit = release.artist_credit - WHERE recording.gid = :recording_gid - """) - result = connection.execute(script_query, {"recording_gid": recording_gid[0]}) - MB_script_data = result.fetchall() - except ValueError: - pass - - # track - try: - track_query = text("""SELECT track.id, - track.gid, - track.recording, - track.medium, - track.position, - track.number, - track.name, - track.artist_credit, - track.length, - track.edits_pending, - track.last_updated, - track.is_data_track - FROM track - INNER JOIN recording - ON track.recording = recording.id - WHERE recording.gid = :recording_gid - """) - result = connection.execute(track_query, {"recording_gid": recording_gid[0]}) - MB_track_data = result.fetchall() - except ValueError: - pass - - - # TO ACOUSTICBRAINZ - with db.engine.connect() as connection: - if MB_artist_credit_data: - for value in MB_artist_credit_data: - transaction = connection.begin() - try: - artist_credit_query = text(""" - INSERT INTO musicbrainz.artist_credit - VALUES (:id, :name, :artist_count, :ref_count, :created)""") - connection.execute(artist_credit_query, {"id" : value[0], - "name" : value[1], - "artist_count" : value[2], - "ref_count" : value[3], - "created" : value[4] - }) - transaction.commit() - print("INSERTED artist_credit data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_artist_type_data: - for value in MB_artist_type_data: - transaction = connection.begin() - try: - artist_type_query = text(""" - INSERT INTO musicbrainz.artist_type - VALUES (:id, :name, :parent, :child_order, :description, :gid)""") - connection.execute(artist_type_query, {"id":value[0], - "name":value[1], - "parent":value[2], - "child_order":value[3], - "description":value[4], - "gid":value[5] - }) - transaction.commit() - print("INSERTED artist_type data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_area_type_data: - for value in MB_area_type_data: - transaction = connection.begin() - try: - area_type_query = text(""" - INSERT INTO musicbrainz.area_type - VALUES (:id, :name, :parent, :child_order, :description, :gid)""") - connection.execute(area_type_query, {"id": value[0], - "name": value[1], - "parent": value[2], - "child_order": value[3], - "description": value[4], - "gid": value[5]}) - transaction.commit() - print("INSERTED area_type data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_begin_area_type_data: - for value in MB_begin_area_type_data: - transaction = connection.begin() - try: - begin_area_type_query = text(""" - INSERT INTO musicbrainz.area_type - VALUES (:id, :name, :parent, :child_order, :description, :gid)""") - connection.execute(begin_area_type_query, {"id": value[0], - "name": value[1], - "parent": value[2], - "child_order": value[3], - "description": value[4], - "gid": value[5]}) - transaction.commit() - print("INSERTED begin_area_type data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_end_area_type_data: - for value in MB_end_area_type_data: - transaction = connection.begin() - try: - end_area_type_query = text(""" - INSERT INTO musicbrainz.area_type - VALUES (:id, :name, :parent, :child_order, :description, :gid)""") - connection.execute(end_area_type_query, {"id": value[0], - "name": value[1], - "parent": value[2], - "child_order": value[3], - "description": value[4], - "gid": value[5]}) - transaction.commit() - print("INSERTED end_area_type data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_release_status_data: - for value in MB_release_status_data: - transaction = connection.begin() - try: - release_status_query = text(""" - INSERT INTO musicbrainz.release_status - VALUES (:id, :name, :parent, :child_order, :description, :gid)""") - result = connection.execute(release_status_query, {"id": value[0], - "name": value[1], - "parent": value[2], - "child_order": value[3], - "description": value[4], - "gid": value[5]}) - transaction.commit() - print("INSERTED release_status data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_release_group_primary_type_data: - for value in MB_release_group_primary_type_data: - transaction = connection.begin() - try: - release_group_primary_type_query = text(""" - INSERT INTO musicbrainz.release_group_primary_type - VALUES (:id, :name, :parent, :child_order, :description, :gid)""") - connection.execute(release_group_primary_type_query, {"id": value[0], - "name": value[1], - "parent": value[2], - "child_order": value[3], - "description": value[4], - "gid": value[5]}) - transaction.commit() - print("INSERTED release_group_primary_type data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_medium_format_data: - for value in MB_medium_format_data: - transaction = connection.begin() - try: - medium_format_query = text(""" - INSERT INTO musicbrainz.medium_format - VALUES (:id, :name, :parent, :child_order, :year, :has_discids, :description, :gid)""") - connection.execute(medium_format_query, {"id": value[0], - "name": value[1], - "parent": value[2], - "child_order": value[3], - "year": value[4], - "has_discids": value[5], - "description": value[6], - "gid": value[7]}) - transaction.commit() - print("INSERTED medium_format data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_release_packaging_data: - for value in MB_release_packaging_data: - transaction = connection.begin() - try: - release_packaging_query = text(""" - INSERT INTO musicbrainz.release_packaging - VALUES (:id, :name, :parent, :child_order, :description, :gid)""") - connection.execute(release_packaging_query, {"id": value[0], - "name": value[1], - "parent": value[2], - "child_order": value[3], - "description": value[4], - "gid": value[5]}) - transaction.commit() - print("INSERTED release_packaging data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_language_data: - for value in MB_language_data: - transaction = connection.begin() - try: - language_query = text(""" - INSERT INTO musicbrainz.language - VALUES (:iso_code_2t, :iso_code_2b, :iso_code_1, :name, :frequency, :iso_code_3)""") - connection.execute(language_query, {"iso_code_2t": value[0], - "iso_code_2b": value[1], - "iso_code_1": value[2], - "name": value[3], - "frequency": value[4], - "iso_code_3": value[5]}) - transaction.commit() - print("INSERTED language data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_script_data: - for value in MB_script_data: - transaction = connection.begin() - try: - script_query = text(""" - INSERT INTO musicbrainz.script - VALUES (:id, :iso_code, :iso_number, :name, :frequency)""") - connection.execute(script_query, {"id": value[0], - "iso_code": value[1], - "iso_number": value[2], - "name": value[3], - "frequency": value[4]}) - transaction.commit() - print("INSERTED script data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_gender_data: - for value in MB_gender_data: - transaction = connection.begin() - try: - gender_query = text(""" - INSERT INTO musicbrainz.gender - VALUES (:id, :name, :parent, :child_order, :description, :gid)""") - connection.execute(gender_query, {"id": value[0], - "name": value[1], - "parent": value[2], - "child_order": value[3], - "description": value[4], - "gid": value[5]}) - transaction.commit() - print("INSERTED gender data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_area_data: - for value in MB_area_data: - transaction = connection.begin() - try: - area_query = text(""" - INSERT INTO musicbrainz.area - VALUES (:id, :gid, :name, :type, :edits_pending, :last_updated, :begin_date_year, - :begin_date_month, :begin_date_day, :end_date_year, :end_date_month, :end_date_day, - :ended, :comment)""") - connection.execute(area_query, {"id": value[0], - "gid": value[1], - "name": value[2], - "type": value[3], - "edits_pending": value[4], - "last_updated": value[5], - "begin_date_year": value[6], - "begin_date_month": value[7], - "begin_date_day": value[8], - "end_date_year": value[9], - "end_date_month": value[10], - "end_date_day": value[11], - "ended": value[12], - "comment": value[13]}) - transaction.commit() - print("INSERTED area data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_begin_area_data: - for value in MB_begin_area_data: - transaction = connection.begin() - try: - begin_area_query = text(""" - INSERT INTO musicbrainz.area - VALUES (:id, :gid, :name, :type, :edits_pending, :last_updated, :begin_date_year, - :begin_date_month, :begin_date_day, :end_date_year, :end_date_month, :end_date_day, - :ended, :comment)""") - connection.execute(begin_area_query, {"id": value[0], - "gid": value[1], - "name": value[2], - "type": value[3], - "edits_pending": value[4], - "last_updated": value[5], - "begin_date_year": value[6], - "begin_date_month": value[7], - "begin_date_day": value[8], - "end_date_year": value[9], - "end_date_month": value[10], - "end_date_day": value[11], - "ended": value[12], - "comment": value[13]}) - transaction.commit() - print("INSERTED begin_area data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_end_area_data: - for value in MB_end_area_data: - transaction = connection.begin() - try: - end_area_query = text(""" - INSERT INTO musicbrainz.area - VALUES (:id, :gid, :name, :type, :edits_pending, :last_updated, :begin_date_year, - :begin_date_month, :begin_date_day, :end_date_year, :end_date_month, :end_date_day, - :ended, :comment)""") - connection.execute(end_area_query, {"id": value[0], - "gid": value[1], - "name": value[2], - "type": value[3], - "edits_pending": value[4], - "last_updated": value[5], - "begin_date_year": value[6], - "begin_date_month": value[7], - "begin_date_day": value[8], - "end_date_year": value[9], - "end_date_month": value[10], - "end_date_day": value[11], - "ended": value[12], - "comment": value[13]}) - transaction.commit() - print("INSERTED end_area data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_artist_data: - for value in MB_artist_data: - transaction = connection.begin() - try: - artist_query = text(""" - INSERT INTO musicbrainz.artist - VALUES (:id, :gid, :name, :sort_name, :begin_date_year, :begin_date_month, :begin_date_day, - :end_date_year, :end_date_month, :end_date_day, :type, :area, :gender, :comment, :edits_pending, - :last_updated, :ended, :begin_area, :end_area)""") - connection.execute(artist_query, {"id": value[0], - "gid": value[1], - "name": value[2], - "sort_name": value[3], - "begin_date_year": value[4], - "begin_date_month": value[5], - "begin_date_day": value[6], - "end_date_year": value[7], - "end_date_month": value[8], - "end_date_day": value[9], - "type": value[10], - "area": value[11], - "gender": value[12], - "comment": value[13], - "edits_pending": value[14], - "last_updated": value[15], - "ended": value[16], - "begin_area": value[17], - "end_area": value[18]}) - transaction.commit() - print("INSERTED artist data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_artist_credit_name_data: - for value in MB_artist_credit_name_data: - transaction = connection.begin() - try: - artist_credit_name_query = text(""" - INSERT INTO musicbrainz.artist_credit_name - VALUES (:artist_credit, :position, :artist, :name, :join_phrase)""") - connection.execute(artist_credit_name_query, {"artist_credit": value[0], - "position": value[1], - "artist": value[2], - "name": value[3], - "join_phrase": value[4]}) - transaction.commit() - print("INSERTED artist_credit_name data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_artist_gid_redirect_data: - for value in MB_artist_gid_redirect_data: - transaction = connection.begin() - try: - artist_gid_redirect_query = text(""" - INSERT INTO musicbrainz.artist_gid_redirect - VALUES (:gid, :new_id, :created)""") - connection.execute(artist_gid_redirect_query, {"gid": value[0], - "new_id": value[1], - "created": value[2]}) - transaction.commit() - print("INSERTED artist_gid_redirect data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - - if MB_recording_data: - for value in MB_recording_data: - transaction = connection.begin() - try: - recording_query = text(""" - INSERT INTO musicbrainz.recording - VALUES (:id, :gid, :name, :artist_credit, :length, :comment, :edits_pending, :last_updated, :video)""") - connection.execute(recording_query, {"id": value[0], - "gid": value[1], - "name": value[2], - "artist_credit": value[3], - "length": value[4], - "comment": value[5], - "edits_pending": value[6], - "last_updated": value[7], - "video": value[8]}) - transaction.commit() - print("INSERTED recording data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_recording_gid_redirect_data: - for value in MB_recording_gid_redirect_data: - transaction = connection.begin() - try: - recording_gid_redirect_query = text(""" - INSERT INTO musicbrainz.recording_gid_redirect - VALUES (:gid, :new_id, :created)""") - connection.execute(recording_gid_redirect_query, {"gid": value[0], - "new_id": value[1], - "created": value[2]}) - transaction.commit() - print("INSERTED recording_gid_redirect data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_release_group_data: - for value in MB_release_group_data: - transaction = connection.begin() - try: - release_group_query = text(""" - INSERT INTO musicbrainz.release_group - VALUES (:id, :gid, :name, :artist_credit, :type, :comment, :edits_pending, :last_updated)""") - connection.execute(release_group_query, {"id": value[0], - "gid": value[1], - "name": value[2], - "artist_credit": value[3], - "type": value[4], - "comment": value[5], - "edits_pending": value[6], - "last_updated": value[7]}) - transaction.commit() - print("INSERTED release_group data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_release_group_gid_redirect_data: - for value in MB_release_group_gid_redirect_data: - transaction = connection.begin() - try: - release_group_gid_redirect_query = text(""" - INSERT INTO musicbrainz.release_group_gid_redirect - VALUES (:gid, :new_id, :created)""") - connection.execute(release_group_gid_redirect_query, {"gid": value[0], - "new_id": value[1], - "created": value[2]}) - transaction.commit() - print("INSERTED release_gid_redirect data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_release_data: - for value in MB_release_data: - transaction = connection.begin() - try: - release_query = text(""" - INSERT INTO musicbrainz.release - VALUES (:id, :gid, :name, :artist_credit, :release_group, :status, :packaging, :language, - :script, :barcode, :comment, :edits_pending, :quality, :last_updated)""") - connection.execute(release_query, {"id": value[0], - "gid": value[1], - "name": value[2], - "artist_credit": value[3], - "release_group": value[4], - "status": value[5], - "packaging": value[6], - "language": value[7], - "script": value[8], - "barcode": value[9], - "comment": value[10], - "edits_pending": value[11], - "quality": value[12], - "last_updated": value[13]}) - transaction.commit() - print("INSERTED release data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_release_gid_redirect_data: - for value in MB_release_gid_redirect_data: - transaction = connection.begin() - try: - release_gid_redirect_query = text(""" - INSERT INTO musicbrainz.release_gid_redirect - VALUES (:gid, :new_id, :created)""") - connection.execute(release_gid_redirect_query, {"gid": value[0], - "new_id": value[1], - "created": value[2]}) - transaction.commit() - print("INSERTED release_gid_redirect data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_medium_data: - for value in MB_medium_data: - transaction = connection.begin() - try: - medium_query = text(""" - INSERT INTO musicbrainz.medium - VALUES (:id, :release, :position, :format, :name, :edits_pending, :last_updated, :track_count)""") - connection.execute(medium_query, {"id": value[0], - "release": value[1], - "position": value[2], - "format": value[3], - "name": value[4], - "edits_pending": value[5], - "last_updated": value[6], - "track_count": value[7]}) - transaction.commit() - print("INSERTED medium data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_track_data: - for value in MB_track_data: - transaction = connection.begin() - try: - track_query = text(""" - INSERT INTO musicbrainz.track - VALUES (:id, :gid, :recording, :medium, :position, :number, :name, :artist_credit, :length, - :edits_pending, :last_updated, :is_data_track)""") - connection.execute(track_query, {"id": value[0], - "gid": value[1], - "recording": value[2], - "medium": value[3], - "position": value[4], - "number": value[5], - "name": value[6], - "artist_credit": value[7], - "length": value[8], - "edits_pending": value[9], - "last_updated": value[10], - "is_data_track": value[11]}) - transaction.commit() - print("INSERTED track data\n") - except IntegrityError as e: - print(e.message) - transaction.rollback() - - print("--------------------------------DONE-----------------------------------") + gids = connection.execute(lowlevel_query) + gids = gids.fetchall() + gids_in_AB = [value[0] for value in gids] + no_of_rows = len(gids_in_AB) + start = 0 + rows_to_fetch = 10000 + for value in range(0, (no_of_rows/rows_to_fetch) + 1): + fetch_musicbrainz_data(gids_in_AB[start : start + rows_to_fetch]) + start = start + rows_to_fetch From c0931ca0e9387f02988afa269d71ee37c80b4820 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Thu, 14 Jun 2018 02:44:10 +0530 Subject: [PATCH 019/125] Get data for FK referenced tables, write docstrings & comments for every function --- db/import_mb_data.py | 1177 +++++++++++++++++++++++++----------------- manage.py | 2 +- 2 files changed, 698 insertions(+), 481 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index feedc9fc0..065da61e4 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -1,36 +1,61 @@ import db from brainzutils import musicbrainz_db from sqlalchemy import text -from sqlalchemy.exc import IntegrityError +def load_artist_credit(connection, gids_in_AB, MB_release_data): + """Fetches artist_credit table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. -def load_artist_credit(connection, gids_in_AB): - artist_credit_query = text(""" - SELECT DISTINCT artist_credit.id, artist_credit.name, artist_credit.artist_count, - artist_credit.ref_count, artist_credit.created - FROM artist_credit - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid in :gids OR artist_credit.id in :data - """) + Also fetches data corresponding to release table. + """ + filters = [] + filter_data = {} + + # Get data corresponding to artist_credit column in release table MB_release_fk_artist_credit = [] for value in MB_release_data: MB_release_fk_artist_credit.append(value[3]) MB_release_fk_artist_credit = list(set(MB_release_fk_artist_credit)) - result = connection.execute(artist_credit_query, {'gids': tuple(gids_in_AB), 'data': tuple(MB_release_fk_artist_credit)}) - global MB_artist_credit_data + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_release_data: + filters.append("artist_credit.id in :data") + filter_data["data"] = tuple(MB_release_fk_artist_credit) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr + + artist_credit_query = text(""" + SELECT DISTINCT artist_credit.id, artist_credit.name, artist_credit.artist_count, + artist_credit.ref_count, artist_credit.created + FROM artist_credit + INNER JOIN recording + ON artist_credit.id = recording.artist_credit + {filterstr} + """.format(filterstr=filterstr) + ) + + result = connection.execute(artist_credit_query, filter_data) MB_artist_credit_data = result.fetchall() + return MB_artist_credit_data + def load_artist_type(connection, gids_in_AB): + """Fetches artist_type table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + """ artist_type_query = text(""" SELECT DISTINCT artist_type.id, - artist_type.name, - artist_type.parent, - artist_type.child_order, - artist_type.description, - artist_type.gid + artist_type.name, + artist_type.parent, + artist_type.child_order, + artist_type.description, + artist_type.gid FROM artist_type INNER JOIN artist ON artist.type = artist_type.id @@ -41,11 +66,15 @@ def load_artist_type(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(artist_type_query, {'gids': tuple(gids_in_AB)}) - global MB_artist_type_data MB_artist_type_data = result.fetchall() + return MB_artist_type_data + def load_area_type(connection, gids_in_AB): + """Fetches area_type table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + """ area_type_query = text(""" SELECT DISTINCT area_type.id, area_type.name, @@ -65,11 +94,16 @@ def load_area_type(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(area_type_query, {'gids': tuple(gids_in_AB)}) - global MB_area_type_data MB_area_type_data = result.fetchall() + return MB_area_type_data + def load_begin_area_type(connection, gids_in_AB): + """Fetches area_type table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database for the begin area column + in artist table. + """ begin_area_type_query = text(""" SELECT DISTINCT area_type.id, area_type.name, @@ -89,11 +123,16 @@ def load_begin_area_type(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(begin_area_type_query, {'gids': tuple(gids_in_AB)}) - global MB_begin_area_type_data MB_begin_area_type_data = result.fetchall() + return MB_begin_area_type_data + def load_end_area_type(connection, gids_in_AB): + """Fetches area_type table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database for the end area column in + artist table. + """ end_area_type_query = text(""" SELECT DISTINCT area_type.id, area_type.name, @@ -113,11 +152,15 @@ def load_end_area_type(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(end_area_type_query, {'gids': tuple(gids_in_AB)}) - global MB_end_area_type_data MB_end_area_type_data = result.fetchall() + return MB_end_area_type_data + def load_release_status(connection, gids_in_AB): + """Fetches release_status table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + """ release_status_query = text(""" SELECT DISTINCT release_status.id, release_status.name, @@ -133,42 +176,97 @@ def load_release_status(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(release_status_query, {'gids': tuple(gids_in_AB)}) - global MB_release_status_data MB_release_status_data = result.fetchall() + return MB_release_status_data -def load_release_group_primary_type(connection, gids_in_AB): - release_group_primary_type_query = text(""" - SELECT DISTINCT release_group_primary_type.id, release_group_primary_type.name, - release_group_primary_type.parent, release_group_primary_type.child_order, - release_group_primary_type.description, release_group_primary_type.gid - FROM release_group_primary_type INNER JOIN release_group - ON release_group_primary_type.id = release_group.type - INNER JOIN recording - ON recording.artist_credit = release_group.artist_credit - WHERE recording.gid in :gids OR release_group_primary_type.id in :data - """) + +def load_release_group_primary_type(connection, gids_in_AB, MB_release_group_data): + """Fetches release_group_primary_type table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + + Also fetches data corresponding to release_group table. + """ + filters = [] + filter_data = {} + + # Get data corresponding to release_group_primary_type column in release_group table MB_release_group_fk_type = [] for value in MB_release_group_data: MB_release_group_fk_type.append(value[4]) MB_release_group_fk_type = list(set(MB_release_group_fk_type)) - result = connection.execute(release_group_primary_type_query, {'gids': tuple(gids_in_AB), 'data': tuple(MB_release_group_fk_type)}) - global MB_release_group_primary_type_data + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_release_group_data: + filters.append("release_group_primary_type.id in :data") + filter_data["data"] = tuple(MB_release_group_fk_type) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr + + release_group_primary_type_query = text(""" + SELECT DISTINCT release_group_primary_type.id, release_group_primary_type.name, + release_group_primary_type.parent, release_group_primary_type.child_order, + release_group_primary_type.description, release_group_primary_type.gid + FROM release_group_primary_type INNER JOIN release_group + ON release_group_primary_type.id = release_group.type + INNER JOIN recording + ON recording.artist_credit = release_group.artist_credit + {filterstr} + """.format(filterstr=filterstr) + ) + + result = connection.execute(release_group_primary_type_query, filter_data) MB_release_group_primary_type_data = result.fetchall() + return MB_release_group_primary_type_data + def load_medium_format(connection, gids_in_AB): + """Fetches medium_format table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + """ medium_format_query = text(""" SELECT * FROM medium_format ORDER BY id """) result = connection.execute(medium_format_query) - global MB_medium_format_data MB_medium_format_data = result.fetchall() + return MB_medium_format_data + + +def load_release_packaging(connection, gids_in_AB, MB_release_data): + """Fetches release_packaging table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + + Also fetches data corresponding to release table. + """ + filters = [] + filter_data = {} + + # Get data corresponding to release_packaging column in release table + MB_release_fk_packaging = [] + for value in MB_release_data: + MB_release_fk_packaging.append(value[6]) + MB_release_fk_packaging = list(set(MB_release_fk_packaging)) + + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_release_data: + filters.append("release_packaging.id in :data") + filter_data["data"] = tuple(MB_release_fk_packaging) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr -def load_release_packaging(connection, gids_in_AB): release_packaging_query = text(""" SELECT DISTINCT release_packaging.id, release_packaging.name, @@ -181,19 +279,20 @@ def load_release_packaging(connection, gids_in_AB): ON release.packaging = release_packaging.id INNER JOIN recording ON recording.artist_credit = release.artist_credit - WHERE recording.gid in :gids OR release_packaging.id in :data - """) - MB_release_fk_packaging = [] - for value in MB_release_data: - MB_release_fk_packaging.append(value[6]) - MB_release_fk_packaging = list(set(MB_release_fk_packaging)) + {filterstr} + """.format(filterstr=filterstr) + ) - result = connection.execute(release_packaging_query, {'gids': tuple(gids_in_AB), 'data': tuple(MB_release_fk_packaging)}) - global MB_release_packaging_data + result = connection.execute(release_packaging_query, filter_data) MB_release_packaging_data = result.fetchall() + return MB_release_packaging_data + def load_language(connection, gids_in_AB): + """Fetches language table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + """ language_query = text(""" SELECT DISTINCT language.id, language.iso_code_2t, @@ -210,11 +309,15 @@ def load_language(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(language_query, {'gids': tuple(gids_in_AB)}) - global MB_language_data MB_language_data = result.fetchall() + return MB_language_data + def load_script(connection, gids_in_AB): + """Fetches script table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + """ script_query = text(""" SELECT DISTINCT script.id, script.iso_code, @@ -229,11 +332,15 @@ def load_script(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(script_query, {'gids': tuple(gids_in_AB)}) - global MB_script_data MB_script_data = result.fetchall() + return MB_script_data + def load_gender(connection, gids_in_AB): + """ Fetches gender table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + """ gender_query = text(""" SELECT DISTINCT gender.id, gender.name, @@ -251,11 +358,15 @@ def load_gender(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(gender_query, {'gids': tuple(gids_in_AB)}) - global MB_gender_data MB_gender_data = result.fetchall() + return MB_gender_data + def load_area(connection, gids_in_AB): + """ Fetches area table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + """ area_query = text(""" SELECT DISTINCT area.id, area.gid, @@ -281,11 +392,37 @@ def load_area(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(area_query, {'gids': tuple(gids_in_AB)}) - global MB_area_data MB_area_data = result.fetchall() + return MB_area_data + + +def load_begin_area(connection, gids_in_AB, MB_artist_data): + """Fetches area table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database for begin area column. + + Also fetches data corresponding to artist table. + """ + filters = [] + filter_data = {} + + # Get data corresponding to begin_area column in artist table + MB_artist_fk_begin_area = [] + for value in MB_artist_data: + MB_artist_fk_begin_area.append(value[17]) + + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_artist_data: + filters.append("area.id in :data") + filter_data["data"] = tuple(MB_artist_fk_begin_area) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr -def load_begin_area(connection, gids_in_AB): begin_area_query = text(""" SELECT DISTINCT area.id, area.gid, @@ -308,18 +445,20 @@ def load_begin_area(connection, gids_in_AB): ON artist.id = artist_credit.id INNER JOIN recording ON artist_credit.id = recording.artist_credit - WHERE recording.gid in :gids OR area.id in :data - """) - MB_artist_fk_begin_area = [] - for value in MB_artist_data: - MB_artist_fk_begin_area.append(value[17]) + {filterstr} + """.format(filterstr=filterstr) + ) - result = connection.execute(begin_area_query, {'gids': tuple(gids_in_AB), 'data': tuple(MB_artist_fk_begin_area)}) - global MB_begin_area_data + result = connection.execute(begin_area_query, filter_data) MB_begin_area_data = result.fetchall() + return MB_begin_area_data + def load_end_area(connection, gids_in_AB): + """Fetches area table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database for end area column. + """ end_area_query = text(""" SELECT DISTINCT area.id, area.gid, @@ -345,11 +484,15 @@ def load_end_area(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(end_area_query, {'gids': tuple(gids_in_AB)}) - global MB_end_area_data MB_end_area_data = result.fetchall() + return MB_end_area_data + def load_artist_credit_name(connection, gids_in_AB): + """Fetches artist_credit_name table data from MusicBrainz database + for the recording MBIDs in AcousticBrainz database. + """ artist_credit_name_query = text(""" SELECT DISTINCT artist_credit_name.artist_credit, artist_credit_name.position, @@ -364,11 +507,37 @@ def load_artist_credit_name(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(artist_credit_name_query, {'gids': tuple(gids_in_AB)}) - global MB_artist_credit_name_data MB_artist_credit_name_data = result.fetchall() + return MB_artist_credit_name_data + + +def load_artist(connection, gids_in_AB, MB_artist_credit_name_data): + """Fetches artist table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + + Also fetch data corresponding to artist_credit_name table. + """ + filters = [] + filter_data = {} + + # Get data corresponding to artist column in artist_credit_name table. + MB_artist_credit_name_fk_artist = [] + for value in MB_artist_credit_name_data: + MB_artist_credit_name_fk_artist.append(value[2]) + + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_artist_credit_name_data: + filters.append("artist.id in :data") + filter_data["data"] = tuple(MB_artist_credit_name_fk_artist) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr -def load_artist(connection, gids_in_AB): artist_query = text(""" SELECT DISTINCT artist.id, artist.gid, artist.name, artist.sort_name, artist.begin_date_year, artist.begin_date_month, artist.begin_date_day, artist.end_date_year, artist.end_date_month, @@ -379,18 +548,20 @@ def load_artist(connection, gids_in_AB): ON artist_credit.id = artist.id INNER JOIN recording ON artist_credit.id = recording.artist_credit - WHERE recording.gid in :gids OR artist.id in :data - """) - MB_artist_credit_name_fk_artist = [] - for value in MB_artist_credit_name_data: - MB_artist_credit_name_fk_artist.append(value[2]) + {filterstr} + """.format(filterstr=filterstr) + ) - result = connection.execute(artist_query, {'gids': tuple(gids_in_AB), 'data': tuple(MB_artist_credit_name_fk_artist)}) - global MB_artist_data + result = connection.execute(artist_query, filter_data) MB_artist_data = result.fetchall() + return MB_artist_data + def load_artist_gid_redirect(connection, gids_in_AB): + """Fetches artist_gid_redirect table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + """ artist_gid_redirect_query = text(""" SELECT DISTINCT artist_gid_redirect.gid, artist_gid_redirect.new_id, @@ -405,11 +576,15 @@ def load_artist_gid_redirect(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(artist_gid_redirect_query, {'gids': tuple(gids_in_AB)}) - global MB_artist_gid_redirect_data MB_artist_gid_redirect_data = result.fetchall() + return MB_artist_gid_redirect_data + def load_recording(connection, gids_in_AB): + """Fetches recording table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + """ recording_query = text(""" SELECT DISTINCT recording.id, recording.gid, recording.name, recording.artist_credit, recording.length, recording.comment, recording.edits_pending, recording.last_updated, @@ -418,11 +593,15 @@ def load_recording(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(recording_query, {'gids': tuple(gids_in_AB)}) - global MB_recording_data MB_recording_data = result.fetchall() + return MB_recording_data + def load_recording_gid_redirect(connection, gids_in_AB): + """Fetches recording_gid_redirect table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + """ recording_gid_redirect_query = text(""" SELECT DISTINCT recording_gid_redirect.gid, recording_gid_redirect.new_id, @@ -433,11 +612,49 @@ def load_recording_gid_redirect(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(recording_gid_redirect_query, {'gids': tuple(gids_in_AB)}) - global MB_recording_gid_redirect_data MB_recording_gid_redirect_data = result.fetchall() + return MB_recording_gid_redirect_data + + +def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_data, MB_release_data): + """Fetches release_group table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + + Also fetch data corresponding to release_group_gid_redirect and + release table. + """ + filters = [] + filter_data = {} + + # Get data corresponding to release_group column in release_group_gid_redirect table. + MB_release_group_gid_redirect_fk_release_group = [] + for value in MB_release_group_gid_redirect_data: + MB_release_group_gid_redirect_fk_release_group.append(value[1]) + MB_release_group_gid_redirect_fk_release_group = list(set(MB_release_group_gid_redirect_fk_release_group)) + + # Get data corresponding to release_group column in release table. + MB_release_fk_release_group = [] + for value in MB_release_data: + MB_release_fk_release_group.append(value[4]) + MB_release_fk_release_group = list(set(MB_release_fk_release_group)) + + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_release_group_gid_redirect_data: + filters.append("release_group.id in :redirect_data") + filter_data["redirect_data"] = tuple(MB_release_group_gid_redirect_fk_release_group) + + if MB_release_data: + filters.append("release_group.id in :release_data") + filter_data["release_data"] = tuple(MB_release_fk_release_group) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr -def load_release_group(connection, gids_in_AB): release_group_query = text(""" SELECT DISTINCT release_group.id, release_group.gid, @@ -450,26 +667,20 @@ def load_release_group(connection, gids_in_AB): FROM release_group INNER JOIN recording ON recording.artist_credit = release_group.artist_credit - WHERE recording.gid in :gids OR release_group.id in :redirect_data OR release_group.id in :release_data - """) - MB_release_group_gid_redirect_fk_release_group = [] - for value in MB_release_group_gid_redirect_data: - MB_release_group_gid_redirect_fk_release_group.append(value[1]) - MB_release_group_gid_redirect_fk_release_group = list(set(MB_release_group_gid_redirect_fk_release_group)) - - MB_release_fk_release_group = [] - for value in MB_release_data: - MB_release_fk_release_group.append(value[4]) - MB_release_fk_release_group = list(set(MB_release_fk_release_group)) + {filterstr} + """.format(filterstr=filterstr) + ) - result = connection.execute(release_group_query, {'gids': tuple(gids_in_AB), - 'redirect_data': tuple(MB_release_group_gid_redirect_fk_release_group), - 'release_data': tuple(MB_release_fk_release_group)}) - global MB_release_group_data + result = connection.execute(release_group_query, filter_data) MB_release_group_data = result.fetchall() + return MB_release_group_data + def load_release_group_gid_redirect(connection, gids_in_AB): + """Fetches release_group_gid_redirect table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + """ release_group_gid_redirect_query = text(""" SELECT DISTINCT release_group_gid_redirect.gid, release_group_gid_redirect.new_id, @@ -482,11 +693,48 @@ def load_release_group_gid_redirect(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(release_group_gid_redirect_query, {'gids': tuple(gids_in_AB)}) - global MB_release_group_gid_redirect_data MB_release_group_gid_redirect_data = result.fetchall() + return MB_release_group_gid_redirect_data + + +def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect_data): + """Fetches release table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + + Also fetch data corresponding to medium and release_gid_redirect table. + """ + filters = [] + filter_data = {} + + # Get data corresponding to release column in medium table. + MB_medium_fk_release = [] + for value in MB_medium_data: + MB_medium_fk_release.append(value[1]) + MB_medium_fk_release = list(set(MB_medium_fk_release)) + + # Get data corresponding to release column in release_gid_redirect table. + MB_release_gid_redirect_fk_release = [] + for value in MB_release_gid_redirect_data: + MB_release_gid_redirect_fk_release.append(value[1]) + MB_release_gid_redirect_fk_release = list(set(MB_release_gid_redirect_fk_release)) + + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_medium_data: + filters.append("release.id in :medium_data") + filter_data["medium_data"] = tuple(MB_medium_fk_release) + + if MB_release_gid_redirect_data: + filters.append("release.id in :redirect_data") + filter_data["redirect_data"] = tuple(MB_release_gid_redirect_fk_release) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr -def load_release(connection, gids_in_AB): release_query = text(""" SELECT DISTINCT release.id, release.gid, @@ -505,27 +753,20 @@ def load_release(connection, gids_in_AB): FROM release INNER JOIN recording ON recording.artist_credit = release.artist_credit - WHERE recording.gid in :gids OR release.id in :medium_data OR release.id in :redirect_data - """) - MB_medium_fk_release = [] - for value in MB_medium_data: - MB_medium_fk_release.append(value[1]) - MB_medium_fk_release = list(set(MB_medium_fk_release)) - - MB_release_gid_redirect_fk_release = [] - for value in MB_release_gid_redirect_data: - MB_release_gid_redirect_fk_release.append(value[1]) - MB_release_gid_redirect_fk_release = list(set(MB_release_gid_redirect_fk_release)) + {filterstr} + """.format(filterstr=filterstr) + ) - result = connection.execute(release_query, {'gids': tuple(gids_in_AB), - 'medium_data': tuple(MB_medium_fk_release), - 'redirect_data': tuple(MB_release_gid_redirect_fk_release) - }) - global MB_release_data + result = connection.execute(release_query, filter_data) MB_release_data = result.fetchall() + return MB_release_data + def load_release_gid_redirect(connection, gids_in_AB): + """Fetches release_gid_redirect table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + """ release_gid_redirect_query = text(""" SELECT DISTINCT release_gid_redirect.gid, release_gid_redirect.new_id, @@ -538,11 +779,37 @@ def load_release_gid_redirect(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(release_gid_redirect_query, {'gids': tuple(gids_in_AB)}) - global MB_release_gid_redirect_data MB_release_gid_redirect_data = result.fetchall() + return MB_release_gid_redirect_data + + +def load_medium(connection, gids_in_AB, MB_track_data): + """Fetches medium table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + + Also fetch data corresponding to track table. + """ + filters = [] + filter_data = {} + + # Get data corresponding to medium column in track table. + MB_track_fk_medium = [] + for value in MB_track_data: + MB_track_fk_medium.append(value[3]) + + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_track_data: + filters.append("medium.id in :data") + filter_data["data"] = tuple(MB_track_fk_medium) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr -def load_medium(connection, gids_in_AB): medium_query = text(""" SELECT DISTINCT medium.id, medium.release, @@ -557,18 +824,20 @@ def load_medium(connection, gids_in_AB): ON release.id = medium.release INNER JOIN recording ON recording.artist_credit=release.artist_credit - WHERE recording.gid in :gids OR medium.id in :data - """) - MB_track_fk_medium = [] - for value in MB_track_data: - MB_track_fk_medium.append(value[3]) + {filterstr} + """.format(filterstr=filterstr) + ) - result = connection.execute(medium_query, {'gids': tuple(gids_in_AB), 'data': tuple(MB_track_fk_medium)}) - global MB_medium_data + result = connection.execute(medium_query, filter_data) MB_medium_data = result.fetchall() + return MB_medium_data + def load_track(connection, gids_in_AB): + """Fetches track table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + """ track_query = text(""" SELECT DISTINCT track.id, track.gid, @@ -588,18 +857,19 @@ def load_track(connection, gids_in_AB): WHERE recording.gid in :gids """) result = connection.execute(track_query, {'gids': tuple(gids_in_AB)}) - global MB_track_data MB_track_data = result.fetchall() + return MB_track_data -print("--------------------------------------------------------------------------------------------------") - -# TO ACOUSTICBRAINZ -def write_artist_credit(transaction, connection): +def write_artist_credit(connection, MB_artist_credit_data): + """Insert data into artist_credit table in musicbrainz schema in + AcousticBrainz database. + """ artist_credit_query = text(""" INSERT INTO musicbrainz.artist_credit - VALUES (:id, :name, :artist_count, :ref_count, :created) + VALUES (:id, :name, :artist_count, :ref_count, :created) + ON CONFLICT (id) DO NOTHING """) values = [{ "id" : value[0], @@ -609,14 +879,17 @@ def write_artist_credit(transaction, connection): "created" : value[4]} for value in MB_artist_credit_data ] connection.execute(artist_credit_query, values) - transaction.commit() print("INSERTED artist_credit data\n") -def write_artist_type(transaction, connection): +def write_artist_type(connection, MB_artist_type_data): + """Insert data in artist_type table in musicbrainz schema in + AcousticBrainz database. + """ artist_type_query = text(""" - INSERT INTO musicbrainz.artist_type - VALUES (:id, :name, :parent, :child_order, :description, :gid) + INSERT INTO musicbrainz.artist_type(id, name, parent, child_order, description, gid) + VALUES (:id, :name, :parent, :child_order, :description, :gid) + ON CONFLICT (gid) DO NOTHING """) values = [{ "id": value[0], @@ -627,14 +900,17 @@ def write_artist_type(transaction, connection): "gid" : value[5]} for value in MB_artist_type_data ] connection.execute(artist_type_query, values) - transaction.commit() print("INSERTED artist_type data\n") -def write_area_type(transaction, connection): +def write_area_type(connection, MB_area_type_data): + """Insert data in area_type table in musicbrainz schema in + AcousticBrainz database. + """ area_type_query = text(""" INSERT INTO musicbrainz.area_type - VALUES (:id, :name, :parent, :child_order, :description, :gid) + VALUES (:id, :name, :parent, :child_order, :description, :gid) + ON CONFLICT (gid) DO NOTHING """) values = [{ "id": value[0], @@ -645,14 +921,17 @@ def write_area_type(transaction, connection): "gid": value[5]} for value in MB_area_type_data ] connection.execute(area_type_query, values) - transaction.commit() print("INSERTED area_type data\n") -def write_begin_area_type(transaction, connection): +def write_begin_area_type(connection, MB_begin_area_type_data): + """Insert data in area_type table in musicbrainz schema in + AcousticBrainz database for begin_area column in artist table. + """ begin_area_type_query = text(""" INSERT INTO musicbrainz.area_type - VALUES (:id, :name, :parent, :child_order, :description, :gid) + VALUES (:id, :name, :parent, :child_order, :description, :gid) + ON CONFLICT (id) DO NOTHING """) values = [{ "id": value[0], @@ -663,14 +942,17 @@ def write_begin_area_type(transaction, connection): "gid": value[5]} for value in MB_begin_area_type_data ] connection.execute(begin_area_type_query, values) - transaction.commit() print("INSERTED begin_area_type data\n") -def write_end_area_type(transaction, connection): +def write_end_area_type(connection, MB_end_area_type_data): + """Insert data in area_type table in musicbrainz schema in + AcousticBrainz database for end area column in artist table. + """ end_area_type_query = text(""" - INSERT INTO musicbrainz.area_type - VALUES (:id, :name, :parent, :child_order, :description, :gid) + INSERT INTO musicbrainz.area_type(id, name, parent, child_order, description, gid) + VALUES (:id, :name, :parent, :child_order, :description, :gid) + ON CONFLICT (id) DO NOTHING """) values = [{ "id": value[0], @@ -681,14 +963,17 @@ def write_end_area_type(transaction, connection): "gid": value[5]} for value in MB_end_area_type_data ] connection.execute(end_area_type_query, values) - transaction.commit() print("INSERTED end_area_type data\n") -def write_release_status(transaction, connection): +def write_release_status(connection, MB_release_status_data): + """Insert data in release_status table in musicbrainz schema in + AcousticBrainz database. + """ release_status_query = text(""" INSERT INTO musicbrainz.release_status - VALUES (:id, :name, :parent, :child_order, :description, :gid) + VALUES (:id, :name, :parent, :child_order, :description, :gid) + ON CONFLICT (gid) DO NOTHING """) values= [{ "id": value[0], @@ -699,14 +984,17 @@ def write_release_status(transaction, connection): "gid": value[5]} for value in MB_release_status_data ] result = connection.execute(release_status_query, values) - transaction.commit() print("INSERTED release_status data\n") -def write_release_group_primary_type(transaction, connection): +def write_release_group_primary_type(connection, MB_release_group_primary_type_data): + """Insert data in release_group_primary_type table in musicbrainz schema in + AcousticBrainz database. + """ release_group_primary_type_query = text(""" INSERT INTO musicbrainz.release_group_primary_type - VALUES (:id, :name, :parent, :child_order, :description, :gid) + VALUES (:id, :name, :parent, :child_order, :description, :gid) + ON CONFLICT (id) DO NOTHING """) values = [{ "id": value[0], @@ -717,14 +1005,17 @@ def write_release_group_primary_type(transaction, connection): "gid": value[5]} for value in MB_release_group_primary_type_data ] connection.execute(release_group_primary_type_query, values) - transaction.commit() print("INSERTED release_group_primary_type data\n") -def write_medium_format(transaction, connection): +def write_medium_format(connection, MB_medium_format_data): + """Insert data in medium_format table in musicbrainz schema in + AcousticBrainz database. + """ medium_format_query = text(""" INSERT INTO musicbrainz.medium_format - VALUES (:id, :name, :parent, :child_order, :year, :has_discids, :description, :gid) + VALUES (:id, :name, :parent, :child_order, :year, :has_discids, :description, :gid) + ON CONFLICT (gid) DO NOTHING """) values = [{ "id": value[0], @@ -738,14 +1029,17 @@ def write_medium_format(transaction, connection): ] connection.execute(text("""ALTER TABLE musicbrainz.medium_format DROP CONSTRAINT IF EXISTS medium_format_fk_parent""")) connection.execute(medium_format_query, values) - transaction.commit() print("INSERTED medium_format data\n") -def write_release_packaging(transaction, connection): +def write_release_packaging(connection, MB_release_packaging_data): + """Insert data in release_packaging table in musicbrainz schema in + AcousticBrainz database. + """ release_packaging_query = text(""" INSERT INTO musicbrainz.release_packaging - VALUES (:id, :name, :parent, :child_order, :description, :gid) + VALUES (:id, :name, :parent, :child_order, :description, :gid) + ON CONFLICT (gid) DO NOTHING """) values = [{ "id": value[0], @@ -756,14 +1050,17 @@ def write_release_packaging(transaction, connection): "gid": value[5]} for value in MB_release_packaging_data ] connection.execute(release_packaging_query, values) - transaction.commit() print("INSERTED release_packaging data\n") -def write_language(transaction, connection): +def write_language(connection, MB_language_data): + """Insert data in language table in musicbrainz schema in + AcousticBrainz database. + """ language_query = text(""" INSERT INTO musicbrainz.language - VALUES (:iso_code_2t, :iso_code_2b, :iso_code_1, :name, :frequency, :iso_code_3) + VALUES (:iso_code_2t, :iso_code_2b, :iso_code_1, :name, :frequency, :iso_code_3) + ON CONFLICT (iso_code_2b) DO NOTHING """) values = [{ "iso_code_2t": value[0], @@ -774,14 +1071,17 @@ def write_language(transaction, connection): "iso_code_3": value[5]} for value in MB_language_data ] connection.execute(language_query, values) - transaction.commit() print("INSERTED language data\n") -def write_script(transaction, connection): +def write_script(connection, MB_script_data): + """Insert data in script table in musicbrainz schema in + AcousticBrainz database. + """ script_query = text(""" INSERT INTO musicbrainz.script - VALUES (:id, :iso_code, :iso_number, :name, :frequency) + VALUES (:id, :iso_code, :iso_number, :name, :frequency) + ON CONFLICT (iso_code) DO NOTHING """) values = [{ "id": value[0], @@ -791,14 +1091,17 @@ def write_script(transaction, connection): "frequency": value[4]} for value in MB_script_data ] connection.execute(script_query, values) - transaction.commit() print("INSERTED script data\n") -def write_gender(transaction, connection): +def write_gender(connection, MB_gender_data): + """Insert data in gender table in musicbrainz schema in + AcousticBrainz database. + """ gender_query = text(""" INSERT INTO musicbrainz.gender - VALUES (:id, :name, :parent, :child_order, :description, :gid) + VALUES (:id, :name, :parent, :child_order, :description, :gid) + ON CONFLICT (gid) DO NOTHING """) values = [{ "id": value[0], @@ -809,16 +1112,19 @@ def write_gender(transaction, connection): "gid": value[5]} for value in MB_gender_data ] connection.execute(gender_query, values) - transaction.commit() print("INSERTED gender data\n") -def write_area(transaction, connection): +def write_area(connection, MB_area_data): + """Insert data in area table in musicbrainz schema in + AcousticBrainz database. + """ area_query = text(""" INSERT INTO musicbrainz.area - VALUES (:id, :gid, :name, :type, :edits_pending, :last_updated, :begin_date_year, - :begin_date_month, :begin_date_day, :end_date_year, :end_date_month, :end_date_day, - :ended, :comment) + VALUES (:id, :gid, :name, :type, :edits_pending, :last_updated, :begin_date_year, + :begin_date_month, :begin_date_day, :end_date_year, :end_date_month, :end_date_day, + :ended, :comment) + ON CONFLICT (gid) DO NOTHING """) values = [{ "id": value[0], @@ -837,16 +1143,20 @@ def write_area(transaction, connection): "comment": value[13]} for value in MB_area_data ] connection.execute(area_query, values) - transaction.commit() print("INSERTED area data\n") -def write_begin_area(transaction, connection): +def write_begin_area(connection, MB_begin_area_data): + """Insert data in area table in musicbrainz schema in + AcousticBrainz database for begin_area column in artist + table. + """ begin_area_query = text(""" INSERT INTO musicbrainz.area - VALUES (:id, :gid, :name, :type, :edits_pending, :last_updated, :begin_date_year, - :begin_date_month, :begin_date_day, :end_date_year, :end_date_month, :end_date_day, - :ended, :comment) + VALUES (:id, :gid, :name, :type, :edits_pending, :last_updated, :begin_date_year, + :begin_date_month, :begin_date_day, :end_date_year, :end_date_month, :end_date_day, + :ended, :comment) + ON CONFLICT (id) DO NOTHING """) values = [{ "id": value[0], @@ -865,16 +1175,20 @@ def write_begin_area(transaction, connection): "comment": value[13]} for value in MB_begin_area_data ] connection.execute(begin_area_query, values) - transaction.commit() print("INSERTED begin_area data\n") -def write_end_area(transaction, connection): +def write_end_area(connection, MB_end_area_data): + """Insert data in area table in musicbrainz schema in + AcousticBrainz database for end_area column in artist + table. + """ end_area_query = text(""" INSERT INTO musicbrainz.area - VALUES (:id, :gid, :name, :type, :edits_pending, :last_updated, :begin_date_year, - :begin_date_month, :begin_date_day, :end_date_year, :end_date_month, :end_date_day, - :ended, :comment) + VALUES (:id, :gid, :name, :type, :edits_pending, :last_updated, :begin_date_year, + :begin_date_month, :begin_date_day, :end_date_year, :end_date_month, :end_date_day, + :ended, :comment) + ON CONFLICT (id) DO NOTHING """) values = [{ "id": value[0], @@ -893,17 +1207,19 @@ def write_end_area(transaction, connection): "comment": value[13]} for value in MB_end_area_data ] connection.execute(end_area_query, values) - transaction.commit() print("INSERTED end_area data\n") -def write_artist(transaction, connection): +def write_artist(connection, MB_artist_data): + """Insert data in artist table in musicbrainz schema in + AcousticBrainz database. + """ artist_query = text(""" INSERT INTO musicbrainz.artist - VALUES (:id, :gid, :name, :sort_name, :begin_date_year, :begin_date_month, :begin_date_day, - :end_date_year, :end_date_month, :end_date_day, :type, :area, :gender, :comment, :edits_pending, - :last_updated, :ended, :begin_area, :end_area) - ON conflict do nothing + VALUES (:id, :gid, :name, :sort_name, :begin_date_year, :begin_date_month, :begin_date_day, + :end_date_year, :end_date_month, :end_date_day, :type, :area, :gender, :comment, :edits_pending, + :last_updated, :ended, :begin_area, :end_area) + ON CONFLICT (gid) DO NOTHING """) values = [{ "id": value[0], @@ -927,14 +1243,17 @@ def write_artist(transaction, connection): "end_area": value[18]} for value in MB_artist_data ] connection.execute(artist_query, values) - transaction.commit() print("INSERTED artist data\n") -def write_artist_credit_name(transaction, connection): +def write_artist_credit_name(connection, MB_artist_credit_name_data): + """Insert data in artist_credit_name table in musicbrainz schema in + AcousticBrainz database. + """ artist_credit_name_query = text(""" INSERT INTO musicbrainz.artist_credit_name - VALUES (:artist_credit, :position, :artist, :name, :join_phrase) + VALUES (:artist_credit, :position, :artist, :name, :join_phrase) + ON CONFLICT (artist_credit, position) DO NOTHING """) values = [{ "artist_credit": value[0], @@ -944,14 +1263,17 @@ def write_artist_credit_name(transaction, connection): "join_phrase": value[4]} for value in MB_artist_credit_name_data ] connection.execute(artist_credit_name_query, values) - transaction.commit() print("INSERTED artist_credit_name data\n") -def write_artist_gid_redirect(transaction, connection): +def write_artist_gid_redirect(connection, MB_artist_gid_redirect_data): + """Insert data in artist_gid_redirect table in musicbrainz schema in + AcousticBrainz database. + """ artist_gid_redirect_query = text(""" INSERT INTO musicbrainz.artist_gid_redirect - VALUES (:gid, :new_id, :created) + VALUES (:gid, :new_id, :created) + ON CONFLICT (gid) DO NOTHING """) values = [{ "gid": value[0], @@ -959,14 +1281,17 @@ def write_artist_gid_redirect(transaction, connection): "created": value[2]} for value in MB_artist_gid_redirect_data ] connection.execute(artist_gid_redirect_query, values) - transaction.commit() print("INSERTED artist_gid_redirect data\n") -def write_recording(transaction, connection): +def write_recording(connection, MB_recording_data): + """Insert data in recording table in musicbrainz schema in + AcousticBrainz database. + """ recording_query = text(""" INSERT INTO musicbrainz.recording - VALUES (:id, :gid, :name, :artist_credit, :length, :comment, :edits_pending, :last_updated, :video) + VALUES (:id, :gid, :name, :artist_credit, :length, :comment, :edits_pending, :last_updated, :video) + ON CONFLICT (gid) DO NOTHING """) values = [{ "id": value[0], @@ -980,28 +1305,34 @@ def write_recording(transaction, connection): "video": value[8]} for value in MB_recording_data ] connection.execute(recording_query, values) - transaction.commit() print("INSERTED recording data\n") -def write_recording_gid_redirect(transaction, connection): +def write_recording_gid_redirect(connection, MB_recording_gid_redirect_data): + """Insert data in recording_gid_redirect table in musicbrainz schema in + AcousticBrainz database. + """ recording_gid_redirect_query = text(""" INSERT INTO musicbrainz.recording_gid_redirect - VALUES (:gid, :new_id, :created) + VALUES (:gid, :new_id, :created) + ON CONFLICT (gid) DO NOTHING """) values = [{"gid": value[0], "new_id": value[1], "created": value[2]} for value in MB_recording_gid_redirect_data ] connection.execute(recording_gid_redirect_query, values) - transaction.commit() print("INSERTED recording_gid_redirect data\n") -def write_release_group(transaction, connection): +def write_release_group(connection, MB_release_group_data): + """Insert data in release_group table in musicbrainz schema in + AcousticBrainz database. + """ release_group_query = text(""" INSERT INTO musicbrainz.release_group - VALUES (:id, :gid, :name, :artist_credit, :type, :comment, :edits_pending, :last_updated) + VALUES (:id, :gid, :name, :artist_credit, :type, :comment, :edits_pending, :last_updated) + ON CONFLICT (gid) DO NOTHING """) values = [{ "id": value[0], @@ -1014,14 +1345,17 @@ def write_release_group(transaction, connection): "last_updated": value[7]} for value in MB_release_group_data ] connection.execute(release_group_query, values) - transaction.commit() print("INSERTED release_group data\n") -def write_release_group_gid_redirect(transaction, connection): +def write_release_group_gid_redirect(connection, MB_release_gid_redirect_data): + """Insert data in release_group_gid_redirect table in musicbrainz schema in + AcousticBrainz database. + """ release_group_gid_redirect_query = text(""" INSERT INTO musicbrainz.release_group_gid_redirect - VALUES (:gid, :new_id, :created) + VALUES (:gid, :new_id, :created) + ON CONFLICT (gid) DO NOTHING """) values = [{ "gid": value[0], @@ -1029,15 +1363,18 @@ def write_release_group_gid_redirect(transaction, connection): "created": value[2]} for value in MB_release_gid_redirect_data ] connection.execute(release_group_gid_redirect_query, values) - transaction.commit() print("INSERTED release_gid_redirect data\n") -def write_release(transaction, connection): +def write_release(connection, MB_release_data): + """Insert data in release table in musicbrainz schema in + AcousticBrainz database. + """ release_query = text(""" INSERT INTO musicbrainz.release - VALUES (:id, :gid, :name, :artist_credit, :release_group, :status, :packaging, :language, - :script, :barcode, :comment, :edits_pending, :quality, :last_updated) + VALUES (:id, :gid, :name, :artist_credit, :release_group, :status, :packaging, :language, + :script, :barcode, :comment, :edits_pending, :quality, :last_updated) + ON CONFLICT (gid) DO NOTHING """) values = [{ "id": value[0], @@ -1056,14 +1393,17 @@ def write_release(transaction, connection): "last_updated": value[13]} for value in MB_release_data ] connection.execute(release_query, values) - transaction.commit() print("INSERTED release data\n") -def write_release_gid_redirect(transaction, connection): +def write_release_gid_redirect(connection, MB_release_gid_redirect_data): + """Insert data in release_gid_redirect table in musicbrainz schema in + AcousticBrainz database. + """ release_gid_redirect_query = text(""" INSERT INTO musicbrainz.release_gid_redirect - VALUES (:gid, :new_id, :created) + VALUES (:gid, :new_id, :created) + ON CONFLICT (gid) DO NOTHING """) values = [{ "gid": value[0], @@ -1071,14 +1411,17 @@ def write_release_gid_redirect(transaction, connection): "created": value[2]} for value in MB_release_gid_redirect_data ] connection.execute(release_gid_redirect_query, values) - transaction.commit() print("INSERTED release_gid_redirect data\n") -def write_medium(transaction, connection): +def write_medium(connection, MB_medium_data): + """Insert data in medium table in musicbrainz schema in + AcousticBrainz database. + """ medium_query = text(""" INSERT INTO musicbrainz.medium - VALUES (:id, :release, :position, :format, :name, :edits_pending, :last_updated, :track_count) + VALUES (:id, :release, :position, :format, :name, :edits_pending, :last_updated, :track_count) + ON CONFLICT (id) DO NOTHING """) values = [{ "id": value[0], @@ -1091,15 +1434,18 @@ def write_medium(transaction, connection): "track_count": value[7]} for value in MB_medium_data ] connection.execute(medium_query, values) - transaction.commit() print("INSERTED medium data\n") -def write_track(transaction, connection): +def write_track(connection, MB_track_data): + """Insert data in track table in musicbrainz schema in + AcousticBrainz database. + """ track_query = text(""" INSERT INTO musicbrainz.track - VALUES (:id, :gid, :recording, :medium, :position, :number, :name, :artist_credit, :length, - :edits_pending, :last_updated, :is_data_track) + VALUES (:id, :gid, :recording, :medium, :position, :number, :name, :artist_credit, :length, + :edits_pending, :last_updated, :is_data_track) + ON CONFLICT (gid) DO NOTHING """) values = [{ "id": value[0], @@ -1116,397 +1462,268 @@ def write_track(transaction, connection): "is_data_track": value[11]} for value in MB_track_data ] connection.execute(track_query, values) - transaction.commit() print("INSERTED track data\n") - - -# FUNCTION TO CALL ALL INSERTS - -def insert_MB_data_AB(): - with db.engine.connect() as connection: - if MB_artist_credit_data: - transaction = connection.begin() - try: - write_artist_credit(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_artist_type_data: - transaction = connection.begin() - try: - write_artist_type(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_area_type_data: - transaction = connection.begin() - try: - write_area_type(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_begin_area_type_data: - transaction = connection.begin() - try: - write_begin_area_type(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_end_area_type_data: - transaction = connection.begin() - try: - write_end_area_type(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_release_status_data: - transaction = connection.begin() - try: - write_release_status(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_release_group_primary_type_data: - transaction = connection.begin() - try: - write_release_group_primary_type(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_medium_format_data: - transaction = connection.begin() - try: - write_medium_format(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_release_packaging_data: - transaction = connection.begin() - try: - write_release_packaging(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_language_data: - transaction = connection.begin() - try: - write_language(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_script_data: - transaction = connection.begin() - try: - write_script(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_gender_data: - transaction = connection.begin() - try: - write_gender(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_area_data: - transaction = connection.begin() - try: - write_area(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_begin_area_data: - transaction = connection.begin() - try: - write_begin_area(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_end_area_data: - transaction = connection.begin() - try: - write_end_area(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_artist_data: - transaction = connection.begin() - try: - write_artist(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_artist_credit_name_data: - transaction = connection.begin() - try: - write_artist_credit_name(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_artist_gid_redirect_data: - transaction = connection.begin() - try: - write_artist_gid_redirect(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - - if MB_recording_data: - transaction = connection.begin() - try: - write_recording(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_recording_gid_redirect_data: - transaction = connection.begin() - try: - write_recording_gid_redirect(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_release_group_data: - transaction = connection.begin() - try: - write_release_group(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_release_group_gid_redirect_data: - transaction = connection.begin() - try: - write_release_group_gid_redirect(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_release_data: - transaction = connection.begin() - try: - write_release(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_release_gid_redirect_data: - transaction = connection.begin() - try: - write_release_gid_redirect(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_medium_data: - transaction = connection.begin() - try: - write_medium(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - if MB_track_data: - transaction = connection.begin() - try: - write_track(transaction, connection) - except IntegrityError as e: - print(e.message) - transaction.rollback() - - -def fetch_musicbrainz_data(gids_in_AB): +def fetch_and_insert_musicbrainz_data(gids_in_AB): + # Get MusicBrainz data with musicbrainz_db.engine.begin() as connection: # track try: - load_track(connection, gids_in_AB) + MB_track_data = load_track(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") - # MEDIUM + # medium try: - load_medium(connection, gids_in_AB) + MB_medium_data = load_medium(connection, gids_in_AB, MB_track_data) except ValueError: print("No Data found for the recordings") # release_gid_redirect try: - load_release_gid_redirect(connection, gids_in_AB) + MB_release_gid_redirect_data = load_release_gid_redirect(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") - # RELEASE + # release try: - load_release(connection, gids_in_AB) + MB_release_data = load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect_data) except ValueError: print("No Data found for the recordings") - # ARTIST CREDIT + # artist_credit try: - load_artist_credit(connection, gids_in_AB) + MB_artist_credit_data = load_artist_credit(connection, gids_in_AB, MB_release_data) except ValueError: print("No Data found for the recordings") - # ARTIST CREDIT NAME + # artist_credit_name try: - load_artist_credit_name(connection, gids_in_AB) + MB_artist_credit_name_data = load_artist_credit_name(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") - # ARTIST + # artist try: - load_artist(connection, gids_in_AB) + MB_artist_data = load_artist(connection, gids_in_AB, MB_artist_credit_name_data) except ValueError: print("No Data found for the recordings") - # ARTIST TYPE + # artist_type try: - load_artist_type(connection, gids_in_AB) + MB_artist_type_data = load_artist_type(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") - # RECORDING + # recording try: - load_recording(connection, gids_in_AB) + MB_recording_data = load_recording(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") - # AREA + # area try: - load_area(connection, gids_in_AB) + MB_area_data = load_area(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") - # BEGIN AREA + # begin_area try: - load_begin_area(connection, gids_in_AB) + MB_begin_area_data = load_begin_area(connection, gids_in_AB, MB_artist_data) except ValueError: print("No Data found for the recordings") - # END AREA + # end_area try: - load_end_area(connection, gids_in_AB) + MB_end_area_data = load_end_area(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") - # AREA TYPE + # area_type try: - load_area_type(connection, gids_in_AB) + MB_area_type_data = load_area_type(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") - # BEGIN AREA TYPE + # begin_area_type try: - load_begin_area_type(connection, gids_in_AB) + MB_begin_area_type_data = load_begin_area_type(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") - # END AREA TYPE + # end_area_type try: - load_end_area_type(connection, gids_in_AB) + MB_end_area_type_data = load_end_area_type(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") - # ARTIST GID REDIRECT + # artist_gid_redirect try: - load_artist_gid_redirect(connection, gids_in_AB) + MB_artist_gid_redirect_data = load_artist_gid_redirect(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") - # GENDER + # gender try: - load_gender(connection, gids_in_AB) + MB_gender_data = load_gender(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") - # LANGUAGE + # language try: - load_language(connection, gids_in_AB) + MB_language_data = load_language(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") - # MEDIUM FORMAT + # medium_format try: - load_medium_format(connection, gids_in_AB) + MB_medium_format_data = load_medium_format(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") - # RECORDING GID REDIRECT + # recording_gid_redirect try: - load_recording_gid_redirect(connection, gids_in_AB) + MB_recording_gid_redirect_data = load_recording_gid_redirect(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # release_group gid redirect try: - load_release_group_gid_redirect(connection, gids_in_AB) + MB_release_group_gid_redirect_data = load_release_group_gid_redirect(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # release_group try: - load_release_group(connection, gids_in_AB) + MB_release_group_data = load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_data, MB_release_data) except ValueError: print("No Data found for the recordings") # release_group_primary_type try: - load_release_group_primary_type(connection, gids_in_AB) + MB_release_group_primary_type_data = load_release_group_primary_type(connection, gids_in_AB, MB_release_group_data) except ValueError: print("No Data found for the recordings") # release_packaging try: - load_release_packaging(connection, gids_in_AB) + MB_release_packaging_data = load_release_packaging(connection, gids_in_AB, MB_release_data) except ValueError: print("No Data found for the recordings") # release_status try: - load_release_status(connection, gids_in_AB) + MB_release_status_data = load_release_status(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # script try: - load_script(connection, gids_in_AB) + MB_script_data = load_script(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") - insert_MB_data_AB() - print("--------------------------------DONE-----------------------------------") + # Write MusicBrainz data into AcousticBrainz database + with db.engine.begin() as connection: + if MB_artist_credit_data: + write_artist_credit(connection, MB_artist_credit_data) + + if MB_artist_type_data: + write_artist_type(connection, MB_artist_type_data) + + if MB_area_type_data: + write_area_type(connection, MB_area_type_data) + + if MB_begin_area_type_data: + write_begin_area_type(connection, MB_begin_area_type_data) + + if MB_end_area_type_data: + write_end_area_type(connection, MB_end_area_type_data) + + if MB_release_status_data: + write_release_status(connection, MB_release_status_data) + + if MB_release_group_primary_type_data: + write_release_group_primary_type(connection, MB_release_group_primary_type_data) + + if MB_medium_format_data: + write_medium_format(connection, MB_medium_format_data) + + if MB_release_packaging_data: + write_release_packaging(connection, MB_release_packaging_data) + + if MB_language_data: + write_language(connection, MB_language_data) + + if MB_script_data: + write_script(connection, MB_script_data) + + if MB_gender_data: + write_gender(connection, MB_gender_data) + + if MB_area_data: + write_area(connection, MB_area_data) + + if MB_begin_area_data: + write_begin_area(connection, MB_begin_area_data) + + if MB_end_area_data: + write_end_area(connection, MB_end_area_data) + + if MB_artist_data: + write_artist(connection, MB_artist_data) + + if MB_artist_credit_name_data: + write_artist_credit_name(connection, MB_artist_credit_name_data) + + if MB_artist_gid_redirect_data: + write_artist_gid_redirect(connection, MB_artist_gid_redirect_data) + + if MB_recording_data: + write_recording(connection, MB_recording_data) + + if MB_recording_gid_redirect_data: + write_recording_gid_redirect(connection, MB_recording_gid_redirect_data) + + if MB_release_group_data: + write_release_group(connection, MB_release_group_data) + + if MB_release_group_gid_redirect_data: + write_release_group_gid_redirect(connection, MB_release_group_gid_redirect_data) + + if MB_release_data: + write_release(connection, MB_release_data) + + if MB_release_gid_redirect_data: + write_release_gid_redirect(connection, MB_release_gid_redirect_data) + + if MB_medium_data: + write_medium(connection, MB_medium_data) + + if MB_track_data: + write_track(connection, MB_track_data) def start_import(): with db.engine.begin() as connection: - lowlevel_query = text("""SELECT gid from lowlevel""") - gids = connection.execute(lowlevel_query) - gids = gids.fetchall() - gids_in_AB = [value[0] for value in gids] - no_of_rows = len(gids_in_AB) - start = 0 + offset = 0 rows_to_fetch = 10000 - for value in range(0, (no_of_rows/rows_to_fetch) + 1): - fetch_musicbrainz_data(gids_in_AB[start : start + rows_to_fetch]) - start = start + rows_to_fetch + while True: + lowlevel_query = text("""SELECT gid + FROM lowlevel + ORDER BY id + OFFSET :offset + LIMIT :rows_to_fetch + """) + gids = connection.execute(lowlevel_query, {"offset": offset, "rows_to_fetch": rows_to_fetch}) + gids = gids.fetchall() + gids_in_AB = [value[0] for value in gids] + offset = offset + rows_to_fetch + + if gids_in_AB: + print('\nInserting %d recordings at a time...\n' % (rows_to_fetch)) + fetch_and_insert_musicbrainz_data(gids_in_AB) + else: + break + print("--------------------------------DONE!-----------------------------------") diff --git a/manage.py b/manage.py index e582ce5ab..71a5389de 100644 --- a/manage.py +++ b/manage.py @@ -187,7 +187,7 @@ def init_mb_db(): @cli.command() def import_musicbrainz_db(): - print("Importing MusicBrainz data...") + print("\nImporting MusicBrainz data...") db.import_mb_data.start_import() # Please keep additional sets of commands down there From 5b38820849a00c34e04fa6f1d2e15c0ca77f2b25 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Thu, 14 Jun 2018 02:49:32 +0530 Subject: [PATCH 020/125] Remove repeated MB db initialization command --- manage.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/manage.py b/manage.py index 71a5389de..451110393 100644 --- a/manage.py +++ b/manage.py @@ -181,10 +181,6 @@ def remove_admin(username): sys.exit(1) -@cli.command() -def init_mb_db(): - musicbrainz_db.init_db_engine(current_app.config['MB_DATABASE_URI']) - @cli.command() def import_musicbrainz_db(): print("\nImporting MusicBrainz data...") From 5f4b1b360b68efd404ceecf9dd9576e953aa054d Mon Sep 17 00:00:00 2001 From: RashiSah Date: Thu, 14 Jun 2018 15:31:28 +0530 Subject: [PATCH 021/125] Write docstrings in imperative mood --- db/import_mb_data.py | 60 ++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 065da61e4..0f0720ad6 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -3,10 +3,10 @@ from sqlalchemy import text def load_artist_credit(connection, gids_in_AB, MB_release_data): - """Fetches artist_credit table data from MusicBrainz database for the + """Fetch artist_credit table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. - Also fetches data corresponding to release table. + Also fetch data corresponding to release table. """ filters = [] filter_data = {} @@ -46,7 +46,7 @@ def load_artist_credit(connection, gids_in_AB, MB_release_data): def load_artist_type(connection, gids_in_AB): - """Fetches artist_type table data from MusicBrainz database for the + """Fetch artist_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. """ artist_type_query = text(""" @@ -72,7 +72,7 @@ def load_artist_type(connection, gids_in_AB): def load_area_type(connection, gids_in_AB): - """Fetches area_type table data from MusicBrainz database for the + """Fetch area_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. """ area_type_query = text(""" @@ -100,7 +100,7 @@ def load_area_type(connection, gids_in_AB): def load_begin_area_type(connection, gids_in_AB): - """Fetches area_type table data from MusicBrainz database for the + """Fetch area_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for the begin area column in artist table. """ @@ -129,7 +129,7 @@ def load_begin_area_type(connection, gids_in_AB): def load_end_area_type(connection, gids_in_AB): - """Fetches area_type table data from MusicBrainz database for the + """Fetch area_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for the end area column in artist table. """ @@ -158,7 +158,7 @@ def load_end_area_type(connection, gids_in_AB): def load_release_status(connection, gids_in_AB): - """Fetches release_status table data from MusicBrainz database for the + """Fetch release_status table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. """ release_status_query = text(""" @@ -182,10 +182,10 @@ def load_release_status(connection, gids_in_AB): def load_release_group_primary_type(connection, gids_in_AB, MB_release_group_data): - """Fetches release_group_primary_type table data from MusicBrainz database for the + """Fetch release_group_primary_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. - Also fetches data corresponding to release_group table. + Also fetch data corresponding to release_group table. """ filters = [] filter_data = {} @@ -227,7 +227,7 @@ def load_release_group_primary_type(connection, gids_in_AB, MB_release_group_dat def load_medium_format(connection, gids_in_AB): - """Fetches medium_format table data from MusicBrainz database for the + """Fetch medium_format table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. """ medium_format_query = text(""" @@ -241,10 +241,10 @@ def load_medium_format(connection, gids_in_AB): def load_release_packaging(connection, gids_in_AB, MB_release_data): - """Fetches release_packaging table data from MusicBrainz database for the + """Fetch release_packaging table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. - Also fetches data corresponding to release table. + Also fetch data corresponding to release table. """ filters = [] filter_data = {} @@ -290,7 +290,7 @@ def load_release_packaging(connection, gids_in_AB, MB_release_data): def load_language(connection, gids_in_AB): - """Fetches language table data from MusicBrainz database for the + """Fetch language table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. """ language_query = text(""" @@ -315,7 +315,7 @@ def load_language(connection, gids_in_AB): def load_script(connection, gids_in_AB): - """Fetches script table data from MusicBrainz database for the + """Fetch script table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. """ script_query = text(""" @@ -338,7 +338,7 @@ def load_script(connection, gids_in_AB): def load_gender(connection, gids_in_AB): - """ Fetches gender table data from MusicBrainz database for the + """ Fetch gender table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. """ gender_query = text(""" @@ -364,7 +364,7 @@ def load_gender(connection, gids_in_AB): def load_area(connection, gids_in_AB): - """ Fetches area table data from MusicBrainz database for the + """ Fetch area table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. """ area_query = text(""" @@ -398,10 +398,10 @@ def load_area(connection, gids_in_AB): def load_begin_area(connection, gids_in_AB, MB_artist_data): - """Fetches area table data from MusicBrainz database for the + """Fetch area table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for begin area column. - Also fetches data corresponding to artist table. + Also fetch data corresponding to artist table. """ filters = [] filter_data = {} @@ -456,7 +456,7 @@ def load_begin_area(connection, gids_in_AB, MB_artist_data): def load_end_area(connection, gids_in_AB): - """Fetches area table data from MusicBrainz database for the + """Fetch area table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for end area column. """ end_area_query = text(""" @@ -490,7 +490,7 @@ def load_end_area(connection, gids_in_AB): def load_artist_credit_name(connection, gids_in_AB): - """Fetches artist_credit_name table data from MusicBrainz database + """Fetch artist_credit_name table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. """ artist_credit_name_query = text(""" @@ -513,7 +513,7 @@ def load_artist_credit_name(connection, gids_in_AB): def load_artist(connection, gids_in_AB, MB_artist_credit_name_data): - """Fetches artist table data from MusicBrainz database for the + """Fetch artist table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. Also fetch data corresponding to artist_credit_name table. @@ -559,7 +559,7 @@ def load_artist(connection, gids_in_AB, MB_artist_credit_name_data): def load_artist_gid_redirect(connection, gids_in_AB): - """Fetches artist_gid_redirect table data from MusicBrainz database for the + """Fetch artist_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. """ artist_gid_redirect_query = text(""" @@ -582,7 +582,7 @@ def load_artist_gid_redirect(connection, gids_in_AB): def load_recording(connection, gids_in_AB): - """Fetches recording table data from MusicBrainz database for the + """Fetch recording table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. """ recording_query = text(""" @@ -599,7 +599,7 @@ def load_recording(connection, gids_in_AB): def load_recording_gid_redirect(connection, gids_in_AB): - """Fetches recording_gid_redirect table data from MusicBrainz database for the + """Fetch recording_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. """ recording_gid_redirect_query = text(""" @@ -618,7 +618,7 @@ def load_recording_gid_redirect(connection, gids_in_AB): def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_data, MB_release_data): - """Fetches release_group table data from MusicBrainz database for the + """Fetch release_group table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. Also fetch data corresponding to release_group_gid_redirect and @@ -678,7 +678,7 @@ def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_dat def load_release_group_gid_redirect(connection, gids_in_AB): - """Fetches release_group_gid_redirect table data from MusicBrainz database for the + """Fetch release_group_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. """ release_group_gid_redirect_query = text(""" @@ -699,7 +699,7 @@ def load_release_group_gid_redirect(connection, gids_in_AB): def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect_data): - """Fetches release table data from MusicBrainz database for the + """Fetch release table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. Also fetch data corresponding to medium and release_gid_redirect table. @@ -764,7 +764,7 @@ def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect def load_release_gid_redirect(connection, gids_in_AB): - """Fetches release_gid_redirect table data from MusicBrainz database for the + """Fetch release_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. """ release_gid_redirect_query = text(""" @@ -785,7 +785,7 @@ def load_release_gid_redirect(connection, gids_in_AB): def load_medium(connection, gids_in_AB, MB_track_data): - """Fetches medium table data from MusicBrainz database for the + """Fetch medium table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. Also fetch data corresponding to track table. @@ -835,7 +835,7 @@ def load_medium(connection, gids_in_AB, MB_track_data): def load_track(connection, gids_in_AB): - """Fetches track table data from MusicBrainz database for the + """Fetch track table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. """ track_query = text(""" From 4853ebcceac091599b1f0a78c3a8b3e3230358e9 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sat, 16 Jun 2018 22:49:51 +0530 Subject: [PATCH 022/125] AB-344: Add a force recreate musicbrainz schema option --- admin/sql/drop_musicbrainz_schema.sql | 1 + manage.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 admin/sql/drop_musicbrainz_schema.sql diff --git a/admin/sql/drop_musicbrainz_schema.sql b/admin/sql/drop_musicbrainz_schema.sql new file mode 100644 index 000000000..f12ed0757 --- /dev/null +++ b/admin/sql/drop_musicbrainz_schema.sql @@ -0,0 +1 @@ +DROP SCHEMA IF EXISTS musicbrainz CASCADE; diff --git a/manage.py b/manage.py index 451110393..cc679a41a 100644 --- a/manage.py +++ b/manage.py @@ -99,7 +99,8 @@ def init_db(archive, force, skip_create_db=False): @cli.command() -def init_mb_db(): +@click.option("--force", "-f", is_flag=True, help="Drop existing musicbrainz schema and tables.") +def init_mb_db(force): """Initialize the MusicBrainz database. This process involves several steps: @@ -111,6 +112,11 @@ def init_mb_db(): musicbrainz_db.init_db_engine(current_app.config['MB_DATABASE_URI']) + if force: + res = db.run_sql_script_without_transaction(os.path.join(ADMIN_SQL_DIR, 'drop_musicbrainz_schema.sql')) + if not res: + raise Exception('Failed to drop existing musicbrainz schema and tables! Exit code: %i' % res) + print('Creating MusicBrainz schema...') db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_schema.sql')) From 038f20403c77a812820aa07339eb69846027aa94 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sat, 16 Jun 2018 23:43:30 +0530 Subject: [PATCH 023/125] Add a line to print a message of dropping the schema and capitalize first letters of musicbrainz --- manage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/manage.py b/manage.py index cc679a41a..2f26655eb 100644 --- a/manage.py +++ b/manage.py @@ -99,7 +99,7 @@ def init_db(archive, force, skip_create_db=False): @cli.command() -@click.option("--force", "-f", is_flag=True, help="Drop existing musicbrainz schema and tables.") +@click.option("--force", "-f", is_flag=True, help="Drop existing MusicBrainz schema and tables.") def init_mb_db(force): """Initialize the MusicBrainz database. @@ -113,6 +113,7 @@ def init_mb_db(force): musicbrainz_db.init_db_engine(current_app.config['MB_DATABASE_URI']) if force: + print('Dropping MusicBrainz schema...') res = db.run_sql_script_without_transaction(os.path.join(ADMIN_SQL_DIR, 'drop_musicbrainz_schema.sql')) if not res: raise Exception('Failed to drop existing musicbrainz schema and tables! Exit code: %i' % res) From ddaa587a63f304d4b9a3712668d3cfe243dea598 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sun, 17 Jun 2018 00:28:17 +0530 Subject: [PATCH 024/125] AB-343: Add number of recording per batch in config.py --- config.py.example | 3 +++ db/import_mb_data.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/config.py.example b/config.py.example index 407c584c1..b8c4f7d69 100644 --- a/config.py.example +++ b/config.py.example @@ -63,3 +63,6 @@ FILE_STORAGE_DIR = "/data/files" #Feature Flags FEATURE_EVAL_LOCATION = False + +# Maximum number of recordings to fetch at a time for importing MusicBrainz metadata. +RECORDINGS_FETCHED_PER_BATCH = 10000 diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 0f0720ad6..6f3a51986 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -1,6 +1,7 @@ import db from brainzutils import musicbrainz_db from sqlalchemy import text +from flask import current_app def load_artist_credit(connection, gids_in_AB, MB_release_data): """Fetch artist_credit table data from MusicBrainz database for the @@ -1708,7 +1709,7 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): def start_import(): with db.engine.begin() as connection: offset = 0 - rows_to_fetch = 10000 + rows_to_fetch = current_app.config['RECORDINGS_FETCHED_PER_BATCH'] while True: lowlevel_query = text("""SELECT gid FROM lowlevel From 155200605171d87085845e801a55a24ccb18d35f Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sun, 17 Jun 2018 17:07:17 +0530 Subject: [PATCH 025/125] AB-342: Improve logging during MusicBrainz data import --- db/import_mb_data.py | 84 +++++++++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 29 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 6f3a51986..ddafcdb94 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -880,7 +880,7 @@ def write_artist_credit(connection, MB_artist_credit_data): "created" : value[4]} for value in MB_artist_credit_data ] connection.execute(artist_credit_query, values) - print("INSERTED artist_credit data\n") + print('Inserted %d rows in artist credit table!' % len(MB_artist_credit_data)) def write_artist_type(connection, MB_artist_type_data): @@ -901,7 +901,7 @@ def write_artist_type(connection, MB_artist_type_data): "gid" : value[5]} for value in MB_artist_type_data ] connection.execute(artist_type_query, values) - print("INSERTED artist_type data\n") + print('Inserted %d rows in artist type table!' % len(MB_artist_type_data)) def write_area_type(connection, MB_area_type_data): @@ -922,7 +922,7 @@ def write_area_type(connection, MB_area_type_data): "gid": value[5]} for value in MB_area_type_data ] connection.execute(area_type_query, values) - print("INSERTED area_type data\n") + print('Inserted %d rows in area type table!' % len(MB_area_type_data)) def write_begin_area_type(connection, MB_begin_area_type_data): @@ -943,7 +943,7 @@ def write_begin_area_type(connection, MB_begin_area_type_data): "gid": value[5]} for value in MB_begin_area_type_data ] connection.execute(begin_area_type_query, values) - print("INSERTED begin_area_type data\n") + print('Inserted %d rows in area type table for begin area data!' % len(MB_begin_area_type_data)) def write_end_area_type(connection, MB_end_area_type_data): @@ -964,7 +964,7 @@ def write_end_area_type(connection, MB_end_area_type_data): "gid": value[5]} for value in MB_end_area_type_data ] connection.execute(end_area_type_query, values) - print("INSERTED end_area_type data\n") + print('Inserted %d rows in area type table for end area data!' % len(MB_end_area_type_data)) def write_release_status(connection, MB_release_status_data): @@ -985,7 +985,7 @@ def write_release_status(connection, MB_release_status_data): "gid": value[5]} for value in MB_release_status_data ] result = connection.execute(release_status_query, values) - print("INSERTED release_status data\n") + print('Inserted %d rows in release status table!' % len(MB_release_status_data)) def write_release_group_primary_type(connection, MB_release_group_primary_type_data): @@ -1006,7 +1006,7 @@ def write_release_group_primary_type(connection, MB_release_group_primary_type_d "gid": value[5]} for value in MB_release_group_primary_type_data ] connection.execute(release_group_primary_type_query, values) - print("INSERTED release_group_primary_type data\n") + print('Inserted %d rows in release group primary type table!' % len(MB_release_group_primary_type_data)) def write_medium_format(connection, MB_medium_format_data): @@ -1030,7 +1030,7 @@ def write_medium_format(connection, MB_medium_format_data): ] connection.execute(text("""ALTER TABLE musicbrainz.medium_format DROP CONSTRAINT IF EXISTS medium_format_fk_parent""")) connection.execute(medium_format_query, values) - print("INSERTED medium_format data\n") + print('Inserted %d rows in medium format table!' % len(MB_medium_format_data)) def write_release_packaging(connection, MB_release_packaging_data): @@ -1051,7 +1051,7 @@ def write_release_packaging(connection, MB_release_packaging_data): "gid": value[5]} for value in MB_release_packaging_data ] connection.execute(release_packaging_query, values) - print("INSERTED release_packaging data\n") + print('Inserted %d rows in release packaging table!' % len(MB_release_packaging_data)) def write_language(connection, MB_language_data): @@ -1072,7 +1072,7 @@ def write_language(connection, MB_language_data): "iso_code_3": value[5]} for value in MB_language_data ] connection.execute(language_query, values) - print("INSERTED language data\n") + print('Inserted %d rows in language table!' % len(MB_language_data)) def write_script(connection, MB_script_data): @@ -1092,7 +1092,7 @@ def write_script(connection, MB_script_data): "frequency": value[4]} for value in MB_script_data ] connection.execute(script_query, values) - print("INSERTED script data\n") + print('Inserted %d rows in script table!' % len(MB_script_data)) def write_gender(connection, MB_gender_data): @@ -1113,7 +1113,7 @@ def write_gender(connection, MB_gender_data): "gid": value[5]} for value in MB_gender_data ] connection.execute(gender_query, values) - print("INSERTED gender data\n") + print('Inserted %d rows in gender table!' % len(MB_gender_data)) def write_area(connection, MB_area_data): @@ -1144,7 +1144,7 @@ def write_area(connection, MB_area_data): "comment": value[13]} for value in MB_area_data ] connection.execute(area_query, values) - print("INSERTED area data\n") + print('Inserted %d rows in area table!' % len(MB_area_data)) def write_begin_area(connection, MB_begin_area_data): @@ -1176,8 +1176,7 @@ def write_begin_area(connection, MB_begin_area_data): "comment": value[13]} for value in MB_begin_area_data ] connection.execute(begin_area_query, values) - print("INSERTED begin_area data\n") - + print('Inserted %d rows in area table for begin area data!' % len(MB_begin_area_data)) def write_end_area(connection, MB_end_area_data): """Insert data in area table in musicbrainz schema in @@ -1208,7 +1207,7 @@ def write_end_area(connection, MB_end_area_data): "comment": value[13]} for value in MB_end_area_data ] connection.execute(end_area_query, values) - print("INSERTED end_area data\n") + print('Inserted %d rows in area table for end area data!' % len(MB_end_area_data)) def write_artist(connection, MB_artist_data): @@ -1244,7 +1243,7 @@ def write_artist(connection, MB_artist_data): "end_area": value[18]} for value in MB_artist_data ] connection.execute(artist_query, values) - print("INSERTED artist data\n") + print('Inserted %d rows in artist table!' % len(MB_artist_data)) def write_artist_credit_name(connection, MB_artist_credit_name_data): @@ -1264,7 +1263,7 @@ def write_artist_credit_name(connection, MB_artist_credit_name_data): "join_phrase": value[4]} for value in MB_artist_credit_name_data ] connection.execute(artist_credit_name_query, values) - print("INSERTED artist_credit_name data\n") + print('Inserted %d rows in artist credit name table!' % len(MB_artist_credit_name_data)) def write_artist_gid_redirect(connection, MB_artist_gid_redirect_data): @@ -1282,7 +1281,7 @@ def write_artist_gid_redirect(connection, MB_artist_gid_redirect_data): "created": value[2]} for value in MB_artist_gid_redirect_data ] connection.execute(artist_gid_redirect_query, values) - print("INSERTED artist_gid_redirect data\n") + print('Inserted %d rows in artist gid redirect table!' % len(MB_artist_gid_redirect_data)) def write_recording(connection, MB_recording_data): @@ -1306,7 +1305,7 @@ def write_recording(connection, MB_recording_data): "video": value[8]} for value in MB_recording_data ] connection.execute(recording_query, values) - print("INSERTED recording data\n") + print('Inserted %d rows in recording table!' % len(MB_recording_data)) def write_recording_gid_redirect(connection, MB_recording_gid_redirect_data): @@ -1323,7 +1322,7 @@ def write_recording_gid_redirect(connection, MB_recording_gid_redirect_data): "created": value[2]} for value in MB_recording_gid_redirect_data ] connection.execute(recording_gid_redirect_query, values) - print("INSERTED recording_gid_redirect data\n") + print('Inserted %d rows in recording gid redirect table!' % len(MB_recording_gid_redirect_data)) def write_release_group(connection, MB_release_group_data): @@ -1346,7 +1345,7 @@ def write_release_group(connection, MB_release_group_data): "last_updated": value[7]} for value in MB_release_group_data ] connection.execute(release_group_query, values) - print("INSERTED release_group data\n") + print('Inserted %d rows in release group table!' % len(MB_release_group_data)) def write_release_group_gid_redirect(connection, MB_release_gid_redirect_data): @@ -1364,7 +1363,7 @@ def write_release_group_gid_redirect(connection, MB_release_gid_redirect_data): "created": value[2]} for value in MB_release_gid_redirect_data ] connection.execute(release_group_gid_redirect_query, values) - print("INSERTED release_gid_redirect data\n") + print('Inserted %d rows in release gid redirect table!' % len(MB_release_gid_redirect_data)) def write_release(connection, MB_release_data): @@ -1394,7 +1393,7 @@ def write_release(connection, MB_release_data): "last_updated": value[13]} for value in MB_release_data ] connection.execute(release_query, values) - print("INSERTED release data\n") + print('Inserted %d rows in release table!' % len(MB_release_data)) def write_release_gid_redirect(connection, MB_release_gid_redirect_data): @@ -1412,7 +1411,7 @@ def write_release_gid_redirect(connection, MB_release_gid_redirect_data): "created": value[2]} for value in MB_release_gid_redirect_data ] connection.execute(release_gid_redirect_query, values) - print("INSERTED release_gid_redirect data\n") + print('Inserted %d rows in release gid redirect table!' % len(MB_release_gid_redirect_data)) def write_medium(connection, MB_medium_data): @@ -1435,7 +1434,7 @@ def write_medium(connection, MB_medium_data): "track_count": value[7]} for value in MB_medium_data ] connection.execute(medium_query, values) - print("INSERTED medium data\n") + print('Inserted %d rows in medium table!' % len(MB_medium_data)) def write_track(connection, MB_track_data): @@ -1463,169 +1462,197 @@ def write_track(connection, MB_track_data): "is_data_track": value[11]} for value in MB_track_data ] connection.execute(track_query, values) - print("INSERTED track data\n") + print('Inserted %d rows in track table!' % len(MB_track_data)) def fetch_and_insert_musicbrainz_data(gids_in_AB): # Get MusicBrainz data + print('\nGetting %d recordings data at a time...\n' % (len(gids_in_AB))) with musicbrainz_db.engine.begin() as connection: # track try: + print('Getting track data...') MB_track_data = load_track(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # medium try: + print('Getting medium data...') MB_medium_data = load_medium(connection, gids_in_AB, MB_track_data) except ValueError: print("No Data found for the recordings") # release_gid_redirect try: + print('Getting release gid redirect data...') MB_release_gid_redirect_data = load_release_gid_redirect(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # release try: + print('Getting release data...') MB_release_data = load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect_data) except ValueError: print("No Data found for the recordings") # artist_credit try: + print('Getting artist credit data...') MB_artist_credit_data = load_artist_credit(connection, gids_in_AB, MB_release_data) except ValueError: print("No Data found for the recordings") # artist_credit_name try: + print('Getting artist credit name data...') MB_artist_credit_name_data = load_artist_credit_name(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # artist try: + print('Getting artist data...') MB_artist_data = load_artist(connection, gids_in_AB, MB_artist_credit_name_data) except ValueError: print("No Data found for the recordings") # artist_type try: + print('Getting artist type data...') MB_artist_type_data = load_artist_type(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # recording try: + print('Getting recording data...') MB_recording_data = load_recording(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # area try: + print('Getting area data...') MB_area_data = load_area(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # begin_area try: + print('Getting begin area data...') MB_begin_area_data = load_begin_area(connection, gids_in_AB, MB_artist_data) except ValueError: print("No Data found for the recordings") # end_area try: + print('Getting end area data...') MB_end_area_data = load_end_area(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # area_type try: + print('Getting area type data...') MB_area_type_data = load_area_type(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # begin_area_type try: + print('Getting begin area type data...') MB_begin_area_type_data = load_begin_area_type(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # end_area_type try: + print('Getting end area data...') MB_end_area_type_data = load_end_area_type(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # artist_gid_redirect try: + print('Getting artist gid redirect data...') MB_artist_gid_redirect_data = load_artist_gid_redirect(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # gender try: + print('Getting gender data...') MB_gender_data = load_gender(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # language try: + print('Getting language data...') MB_language_data = load_language(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # medium_format try: + print('Getting medium format data...') MB_medium_format_data = load_medium_format(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # recording_gid_redirect try: + print('Getting recording gid redirect data...') MB_recording_gid_redirect_data = load_recording_gid_redirect(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # release_group gid redirect try: + print('Getting release group gid redirect data...') MB_release_group_gid_redirect_data = load_release_group_gid_redirect(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # release_group try: + print('Getting release group data...') MB_release_group_data = load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_data, MB_release_data) except ValueError: print("No Data found for the recordings") # release_group_primary_type try: + print('Getting release group primary type data...') MB_release_group_primary_type_data = load_release_group_primary_type(connection, gids_in_AB, MB_release_group_data) except ValueError: print("No Data found for the recordings") # release_packaging try: + print('Getting release packaging data...') MB_release_packaging_data = load_release_packaging(connection, gids_in_AB, MB_release_data) except ValueError: print("No Data found for the recordings") # release_status try: + print('Getting release status data...') MB_release_status_data = load_release_status(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # script try: + print('Getting script data...') MB_script_data = load_script(connection, gids_in_AB) except ValueError: print("No Data found for the recordings") # Write MusicBrainz data into AcousticBrainz database + print('\nInserting %d recordings data at a time...\n' % (len(gids_in_AB))) with db.engine.begin() as connection: if MB_artist_credit_data: write_artist_credit(connection, MB_artist_credit_data) @@ -1723,8 +1750,7 @@ def start_import(): offset = offset + rows_to_fetch if gids_in_AB: - print('\nInserting %d recordings at a time...\n' % (rows_to_fetch)) fetch_and_insert_musicbrainz_data(gids_in_AB) else: break - print("--------------------------------DONE!-----------------------------------") + print('Done!') From ac856e1eb7cf8849ffc8f0c20fdd1c193975c0c0 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Wed, 20 Jun 2018 21:36:47 +0530 Subject: [PATCH 026/125] AB-346: Import MusicBrainz data for every new recording added to AB db --- db/mb_import.py | 33 +++++++++++++++++++++++++++++++++ docker/docker-compose.dev.yml | 12 ++++++++++++ worker_manage.py | 6 ++++++ 3 files changed, 51 insertions(+) create mode 100644 db/mb_import.py diff --git a/db/mb_import.py b/db/mb_import.py new file mode 100644 index 000000000..5f6eaf296 --- /dev/null +++ b/db/mb_import.py @@ -0,0 +1,33 @@ +import db +import db.import_mb_data +import logging +from sqlalchemy import text +import time + +SLEEP_DURATION = 30 # number of seconds to wait between runs + + +def main(): + logging.info("Checking if any import is required...") + while True: + gids_in_AB = get_new_recordings_from_AB() + if gids_in_AB: + logging.info("Updating AcousticBrainz database...") + db.import_mb_data.fetch_and_insert_musicbrainz_data(gids_in_AB) + else: + logging.info("No new recording found. Sleeping %s seconds." % SLEEP_DURATION) + time.sleep(SLEEP_DURATION) + + +def get_new_recordings_from_AB(): + with db.engine.begin() as connection: + query = text("""SELECT lowlevel.gid + FROM lowlevel + LEFT JOIN musicbrainz.recording + ON lowlevel.gid = musicbrainz.recording.gid + WHERE musicbrainz.recording.gid is NULL + """) + gids = connection.execute(query) + gids = gids.fetchall() + gids_in_AB = [value[0] for value in gids] + return gids_in_AB diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index 9a964d593..a2a246d5f 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -57,3 +57,15 @@ services: - ../data/files:/data/files depends_on: - db + + musicbrainz_importer: + build: + context: .. + dockerfile: ./docker/Dockerfile.gaia + command: python2 worker_manage.py musicbrainz_importer + volumes: + - ../:/code + - ../data/app:/data + depends_on: + - db + - musicbrainz_db diff --git a/worker_manage.py b/worker_manage.py index cea76c24d..b35fd8ea7 100644 --- a/worker_manage.py +++ b/worker_manage.py @@ -3,6 +3,7 @@ import click from flask.cli import FlaskGroup +import db.mb_import import dataset_eval.evaluate import hl_extractor.hl_calc import webserver @@ -23,5 +24,10 @@ def command_dataset_evaluator(): dataset_eval.evaluate.main() +@cli.command('musicbrainz_importer') +def command_musicbrainz_importer(): + """Import MusicBrainz metadata""" + db.mb_import.main() + if __name__ == '__main__': cli() From 4154ff12d5e886e4cd4e7f22f1aa10e18297c902 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Fri, 22 Jun 2018 02:10:59 +0530 Subject: [PATCH 027/125] Include table name in - No Data Found - message --- db/import_mb_data.py | 52 ++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index ddafcdb94..1e627f8f4 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -1474,182 +1474,182 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): print('Getting track data...') MB_track_data = load_track(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from track table for the recordings") # medium try: print('Getting medium data...') MB_medium_data = load_medium(connection, gids_in_AB, MB_track_data) except ValueError: - print("No Data found for the recordings") + print("No Data found from medium table for the recordings") # release_gid_redirect try: print('Getting release gid redirect data...') MB_release_gid_redirect_data = load_release_gid_redirect(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from release gid redirect table for the recordings") # release try: print('Getting release data...') MB_release_data = load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect_data) except ValueError: - print("No Data found for the recordings") + print("No Data found from release table for the recordings") # artist_credit try: print('Getting artist credit data...') MB_artist_credit_data = load_artist_credit(connection, gids_in_AB, MB_release_data) except ValueError: - print("No Data found for the recordings") + print("No Data found from artist credit table for the recordings") # artist_credit_name try: print('Getting artist credit name data...') MB_artist_credit_name_data = load_artist_credit_name(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from artist credit name table for the recordings") # artist try: print('Getting artist data...') MB_artist_data = load_artist(connection, gids_in_AB, MB_artist_credit_name_data) except ValueError: - print("No Data found for the recordings") + print("No Data found from artist table for the recordings") # artist_type try: print('Getting artist type data...') MB_artist_type_data = load_artist_type(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from artist type table for the recordings") # recording try: print('Getting recording data...') MB_recording_data = load_recording(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from recording table for the recordings") # area try: print('Getting area data...') MB_area_data = load_area(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from area table for the recordings") # begin_area try: print('Getting begin area data...') MB_begin_area_data = load_begin_area(connection, gids_in_AB, MB_artist_data) except ValueError: - print("No Data found for the recordings") + print("No Data found from area table for the recordings") # end_area try: print('Getting end area data...') MB_end_area_data = load_end_area(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from area table for the recordings") # area_type try: print('Getting area type data...') MB_area_type_data = load_area_type(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from area type table for the recordings") # begin_area_type try: print('Getting begin area type data...') MB_begin_area_type_data = load_begin_area_type(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from area type table for the recordings") # end_area_type try: print('Getting end area data...') MB_end_area_type_data = load_end_area_type(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from area type table for the recordings") # artist_gid_redirect try: print('Getting artist gid redirect data...') MB_artist_gid_redirect_data = load_artist_gid_redirect(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from artist gid redirect table for the recordings") # gender try: print('Getting gender data...') MB_gender_data = load_gender(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from gender table for the recordings") # language try: print('Getting language data...') MB_language_data = load_language(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from language table for the recordings") # medium_format try: print('Getting medium format data...') MB_medium_format_data = load_medium_format(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from medium format table for the recordings") # recording_gid_redirect try: print('Getting recording gid redirect data...') MB_recording_gid_redirect_data = load_recording_gid_redirect(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from recording gid redirect table for the recordings") # release_group gid redirect try: print('Getting release group gid redirect data...') MB_release_group_gid_redirect_data = load_release_group_gid_redirect(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from release group gid redirect table for the recordings") # release_group try: print('Getting release group data...') MB_release_group_data = load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_data, MB_release_data) except ValueError: - print("No Data found for the recordings") + print("No Data found from release group table for the recordings") # release_group_primary_type try: print('Getting release group primary type data...') MB_release_group_primary_type_data = load_release_group_primary_type(connection, gids_in_AB, MB_release_group_data) except ValueError: - print("No Data found for the recordings") + print("No Data found from release group primary type table for the recordings") # release_packaging try: print('Getting release packaging data...') MB_release_packaging_data = load_release_packaging(connection, gids_in_AB, MB_release_data) except ValueError: - print("No Data found for the recordings") + print("No Data found from release packaging table for the recordings") # release_status try: print('Getting release status data...') MB_release_status_data = load_release_status(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from release status table for the recordings") # script try: print('Getting script data...') MB_script_data = load_script(connection, gids_in_AB) except ValueError: - print("No Data found for the recordings") + print("No Data found from script table for the recordings") # Write MusicBrainz data into AcousticBrainz database print('\nInserting %d recordings data at a time...\n' % (len(gids_in_AB))) From 5bba175e9ab7b4bc4ce91e52334d4dc56d1f1b1c Mon Sep 17 00:00:00 2001 From: RashiSah Date: Fri, 22 Jun 2018 02:36:11 +0530 Subject: [PATCH 028/125] Add new musicbrainz_importer module with a musicbrainz_importer script and modify a message --- musicbrainz_importer/__init__.py | 0 .../musicbrainz_importer.py | 2 +- worker_manage.py | 4 ++-- 3 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 musicbrainz_importer/__init__.py rename db/mb_import.py => musicbrainz_importer/musicbrainz_importer.py (94%) diff --git a/musicbrainz_importer/__init__.py b/musicbrainz_importer/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/db/mb_import.py b/musicbrainz_importer/musicbrainz_importer.py similarity index 94% rename from db/mb_import.py rename to musicbrainz_importer/musicbrainz_importer.py index 5f6eaf296..cae00cd4a 100644 --- a/db/mb_import.py +++ b/musicbrainz_importer/musicbrainz_importer.py @@ -8,7 +8,7 @@ def main(): - logging.info("Checking if any import is required...") + logging.info("musicbrainz importer started") while True: gids_in_AB = get_new_recordings_from_AB() if gids_in_AB: diff --git a/worker_manage.py b/worker_manage.py index b35fd8ea7..6121650cf 100644 --- a/worker_manage.py +++ b/worker_manage.py @@ -3,7 +3,7 @@ import click from flask.cli import FlaskGroup -import db.mb_import +import musicbrainz_importer.musicbrainz_importer import dataset_eval.evaluate import hl_extractor.hl_calc import webserver @@ -27,7 +27,7 @@ def command_dataset_evaluator(): @cli.command('musicbrainz_importer') def command_musicbrainz_importer(): """Import MusicBrainz metadata""" - db.mb_import.main() + musicbrainz_importer.musicbrainz_importer.main() if __name__ == '__main__': cli() From 39dd8c90c70198591869ab2bb55651e819a1c032 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Fri, 22 Jun 2018 15:41:27 +0530 Subject: [PATCH 029/125] Import recordings in batches and pick the limit from config file --- musicbrainz_importer/musicbrainz_importer.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/musicbrainz_importer/musicbrainz_importer.py b/musicbrainz_importer/musicbrainz_importer.py index cae00cd4a..28fc00e80 100644 --- a/musicbrainz_importer/musicbrainz_importer.py +++ b/musicbrainz_importer/musicbrainz_importer.py @@ -3,6 +3,7 @@ import logging from sqlalchemy import text import time +from flask import current_app SLEEP_DURATION = 30 # number of seconds to wait between runs @@ -10,9 +11,10 @@ def main(): logging.info("musicbrainz importer started") while True: - gids_in_AB = get_new_recordings_from_AB() + gids_in_AB, rows_to_fetch = get_new_recordings_from_AB() if gids_in_AB: - logging.info("Updating AcousticBrainz database...") + logging.info("Importing MusicBrainz data...") + logging.info('Inserting data for %d recordings...' % (rows_to_fetch)) db.import_mb_data.fetch_and_insert_musicbrainz_data(gids_in_AB) else: logging.info("No new recording found. Sleeping %s seconds." % SLEEP_DURATION) @@ -21,13 +23,21 @@ def main(): def get_new_recordings_from_AB(): with db.engine.begin() as connection: + offset = 0 + rows_to_fetch = current_app.config['RECORDINGS_FETCHED_PER_BATCH'] + query = text("""SELECT lowlevel.gid FROM lowlevel LEFT JOIN musicbrainz.recording ON lowlevel.gid = musicbrainz.recording.gid WHERE musicbrainz.recording.gid is NULL - """) - gids = connection.execute(query) + ORDER BY lowlevel.id + OFFSET :offset + LIMIT :rows_to_fetch + """) + gids = connection.execute(query, {"offset": offset, "rows_to_fetch": rows_to_fetch}) gids = gids.fetchall() gids_in_AB = [value[0] for value in gids] - return gids_in_AB + offset = offset + rows_to_fetch + + return gids_in_AB, rows_to_fetch From e0c644ff6d0ee54690f9a675dd5bf4d1daacb752 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sun, 17 Jun 2018 22:04:00 +0530 Subject: [PATCH 030/125] AB-345: Log the amount of time taken to import MB data in AB --- db/import_mb_data.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 1e627f8f4..ae776f53b 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -2,6 +2,7 @@ from brainzutils import musicbrainz_db from sqlalchemy import text from flask import current_app +import time def load_artist_credit(connection, gids_in_AB, MB_release_data): """Fetch artist_credit table data from MusicBrainz database for the @@ -1737,6 +1738,7 @@ def start_import(): with db.engine.begin() as connection: offset = 0 rows_to_fetch = current_app.config['RECORDINGS_FETCHED_PER_BATCH'] + start_time = time.time() while True: lowlevel_query = text("""SELECT gid FROM lowlevel @@ -1754,3 +1756,5 @@ def start_import(): else: break print('Done!') + total_time_taken = time.time() - start_time + print('Data imported and inserted in %s seconds.' % "{0:.2f}".format(total_time_taken)) From 62c8f6b4d01ffdca517e4364a45b028fd23b321b Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sat, 23 Jun 2018 02:55:19 +0530 Subject: [PATCH 031/125] Move the sql query function to db module, remove offset and remove one returning parameter --- db/get_lowlevel_recordings.py | 21 ++++++++++++ ..._mb_data.py => import_musicbrainz_data.py} | 0 musicbrainz_importer/musicbrainz_importer.py | 34 +++---------------- 3 files changed, 26 insertions(+), 29 deletions(-) create mode 100644 db/get_lowlevel_recordings.py rename db/{import_mb_data.py => import_musicbrainz_data.py} (100%) diff --git a/db/get_lowlevel_recordings.py b/db/get_lowlevel_recordings.py new file mode 100644 index 000000000..c95d81e38 --- /dev/null +++ b/db/get_lowlevel_recordings.py @@ -0,0 +1,21 @@ +import db +from flask import current_app +from sqlalchemy import text + +def get_new_recordings_from_lowlevel(): + with db.engine.begin() as connection: + rows_to_fetch = current_app.config['RECORDINGS_FETCHED_PER_BATCH'] + + query = text("""SELECT lowlevel.gid + FROM lowlevel + LEFT JOIN musicbrainz.recording + ON lowlevel.gid = musicbrainz.recording.gid + WHERE musicbrainz.recording.gid is NULL + ORDER BY lowlevel.id + LIMIT :rows_to_fetch + """) + gids = connection.execute(query, {"rows_to_fetch": rows_to_fetch}) + gids = gids.fetchall() + gids_in_AB = [value[0] for value in gids] + + return gids_in_AB diff --git a/db/import_mb_data.py b/db/import_musicbrainz_data.py similarity index 100% rename from db/import_mb_data.py rename to db/import_musicbrainz_data.py diff --git a/musicbrainz_importer/musicbrainz_importer.py b/musicbrainz_importer/musicbrainz_importer.py index 28fc00e80..0c185f161 100644 --- a/musicbrainz_importer/musicbrainz_importer.py +++ b/musicbrainz_importer/musicbrainz_importer.py @@ -1,9 +1,7 @@ -import db -import db.import_mb_data import logging -from sqlalchemy import text import time -from flask import current_app +import db.get_lowlevel_recordings +import db.import_musicbrainz_data SLEEP_DURATION = 30 # number of seconds to wait between runs @@ -11,33 +9,11 @@ def main(): logging.info("musicbrainz importer started") while True: - gids_in_AB, rows_to_fetch = get_new_recordings_from_AB() + gids_in_AB = db.get_lowlevel_recordings.get_new_recordings_from_lowlevel() if gids_in_AB: logging.info("Importing MusicBrainz data...") - logging.info('Inserting data for %d recordings...' % (rows_to_fetch)) - db.import_mb_data.fetch_and_insert_musicbrainz_data(gids_in_AB) + logging.info('Inserting data for %d recordings...' % (len(gids_in_AB))) + db.import_musicbrainz_data.fetch_and_insert_musicbrainz_data(gids_in_AB) else: logging.info("No new recording found. Sleeping %s seconds." % SLEEP_DURATION) time.sleep(SLEEP_DURATION) - - -def get_new_recordings_from_AB(): - with db.engine.begin() as connection: - offset = 0 - rows_to_fetch = current_app.config['RECORDINGS_FETCHED_PER_BATCH'] - - query = text("""SELECT lowlevel.gid - FROM lowlevel - LEFT JOIN musicbrainz.recording - ON lowlevel.gid = musicbrainz.recording.gid - WHERE musicbrainz.recording.gid is NULL - ORDER BY lowlevel.id - OFFSET :offset - LIMIT :rows_to_fetch - """) - gids = connection.execute(query, {"offset": offset, "rows_to_fetch": rows_to_fetch}) - gids = gids.fetchall() - gids_in_AB = [value[0] for value in gids] - offset = offset + rows_to_fetch - - return gids_in_AB, rows_to_fetch From 6b9446d69279bc99a8b3edc9c74f337d80ebd600 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sat, 23 Jun 2018 03:04:12 +0530 Subject: [PATCH 032/125] Change name of the mb importer script --- db/{import_musicbrainz_data.py => import_mb_data.py} | 0 musicbrainz_importer/musicbrainz_importer.py | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename db/{import_musicbrainz_data.py => import_mb_data.py} (100%) diff --git a/db/import_musicbrainz_data.py b/db/import_mb_data.py similarity index 100% rename from db/import_musicbrainz_data.py rename to db/import_mb_data.py diff --git a/musicbrainz_importer/musicbrainz_importer.py b/musicbrainz_importer/musicbrainz_importer.py index 0c185f161..db8eca3f8 100644 --- a/musicbrainz_importer/musicbrainz_importer.py +++ b/musicbrainz_importer/musicbrainz_importer.py @@ -1,7 +1,7 @@ import logging import time import db.get_lowlevel_recordings -import db.import_musicbrainz_data +import db.import_mb_data SLEEP_DURATION = 30 # number of seconds to wait between runs @@ -13,7 +13,7 @@ def main(): if gids_in_AB: logging.info("Importing MusicBrainz data...") logging.info('Inserting data for %d recordings...' % (len(gids_in_AB))) - db.import_musicbrainz_data.fetch_and_insert_musicbrainz_data(gids_in_AB) + db.import_mb_data.fetch_and_insert_musicbrainz_data(gids_in_AB) else: logging.info("No new recording found. Sleeping %s seconds." % SLEEP_DURATION) time.sleep(SLEEP_DURATION) From 5cdc48238546711a73482b58e2708fd00cb3301e Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sat, 23 Jun 2018 03:12:14 +0530 Subject: [PATCH 033/125] Use % style format in the print message & remove .format --- db/import_mb_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index ae776f53b..7fa4ec0fb 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -1757,4 +1757,4 @@ def start_import(): break print('Done!') total_time_taken = time.time() - start_time - print('Data imported and inserted in %s seconds.' % "{0:.2f}".format(total_time_taken)) + print('Data imported and inserted in %.2f seconds.' % total_time_taken) From 1e89e8641a09952f9cfea7160a4fb459dafd3d89 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sat, 23 Jun 2018 03:22:12 +0530 Subject: [PATCH 034/125] AB-347: Use logging in place of print in db/import_mb_data script --- db/import_mb_data.py | 163 ++++++++++++++++++++++--------------------- 1 file changed, 82 insertions(+), 81 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 7fa4ec0fb..0fcfe7935 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -3,6 +3,7 @@ from sqlalchemy import text from flask import current_app import time +import logging def load_artist_credit(connection, gids_in_AB, MB_release_data): """Fetch artist_credit table data from MusicBrainz database for the @@ -881,7 +882,7 @@ def write_artist_credit(connection, MB_artist_credit_data): "created" : value[4]} for value in MB_artist_credit_data ] connection.execute(artist_credit_query, values) - print('Inserted %d rows in artist credit table!' % len(MB_artist_credit_data)) + logging.info('Inserted %d rows in artist credit table!' % len(MB_artist_credit_data)) def write_artist_type(connection, MB_artist_type_data): @@ -902,7 +903,7 @@ def write_artist_type(connection, MB_artist_type_data): "gid" : value[5]} for value in MB_artist_type_data ] connection.execute(artist_type_query, values) - print('Inserted %d rows in artist type table!' % len(MB_artist_type_data)) + logging.info('Inserted %d rows in artist type table!' % len(MB_artist_type_data)) def write_area_type(connection, MB_area_type_data): @@ -923,7 +924,7 @@ def write_area_type(connection, MB_area_type_data): "gid": value[5]} for value in MB_area_type_data ] connection.execute(area_type_query, values) - print('Inserted %d rows in area type table!' % len(MB_area_type_data)) + logging.info('Inserted %d rows in area type table!' % len(MB_area_type_data)) def write_begin_area_type(connection, MB_begin_area_type_data): @@ -944,7 +945,7 @@ def write_begin_area_type(connection, MB_begin_area_type_data): "gid": value[5]} for value in MB_begin_area_type_data ] connection.execute(begin_area_type_query, values) - print('Inserted %d rows in area type table for begin area data!' % len(MB_begin_area_type_data)) + logging.info('Inserted %d rows in area type table for begin area data!' % len(MB_begin_area_type_data)) def write_end_area_type(connection, MB_end_area_type_data): @@ -965,7 +966,7 @@ def write_end_area_type(connection, MB_end_area_type_data): "gid": value[5]} for value in MB_end_area_type_data ] connection.execute(end_area_type_query, values) - print('Inserted %d rows in area type table for end area data!' % len(MB_end_area_type_data)) + logging.info('Inserted %d rows in area type table for end area data!' % len(MB_end_area_type_data)) def write_release_status(connection, MB_release_status_data): @@ -986,7 +987,7 @@ def write_release_status(connection, MB_release_status_data): "gid": value[5]} for value in MB_release_status_data ] result = connection.execute(release_status_query, values) - print('Inserted %d rows in release status table!' % len(MB_release_status_data)) + logging.info('Inserted %d rows in release status table!' % len(MB_release_status_data)) def write_release_group_primary_type(connection, MB_release_group_primary_type_data): @@ -1007,7 +1008,7 @@ def write_release_group_primary_type(connection, MB_release_group_primary_type_d "gid": value[5]} for value in MB_release_group_primary_type_data ] connection.execute(release_group_primary_type_query, values) - print('Inserted %d rows in release group primary type table!' % len(MB_release_group_primary_type_data)) + logging.info('Inserted %d rows in release group primary type table!' % len(MB_release_group_primary_type_data)) def write_medium_format(connection, MB_medium_format_data): @@ -1031,7 +1032,7 @@ def write_medium_format(connection, MB_medium_format_data): ] connection.execute(text("""ALTER TABLE musicbrainz.medium_format DROP CONSTRAINT IF EXISTS medium_format_fk_parent""")) connection.execute(medium_format_query, values) - print('Inserted %d rows in medium format table!' % len(MB_medium_format_data)) + logging.info('Inserted %d rows in medium format table!' % len(MB_medium_format_data)) def write_release_packaging(connection, MB_release_packaging_data): @@ -1052,7 +1053,7 @@ def write_release_packaging(connection, MB_release_packaging_data): "gid": value[5]} for value in MB_release_packaging_data ] connection.execute(release_packaging_query, values) - print('Inserted %d rows in release packaging table!' % len(MB_release_packaging_data)) + logging.info('Inserted %d rows in release packaging table!' % len(MB_release_packaging_data)) def write_language(connection, MB_language_data): @@ -1073,7 +1074,7 @@ def write_language(connection, MB_language_data): "iso_code_3": value[5]} for value in MB_language_data ] connection.execute(language_query, values) - print('Inserted %d rows in language table!' % len(MB_language_data)) + logging.info('Inserted %d rows in language table!' % len(MB_language_data)) def write_script(connection, MB_script_data): @@ -1093,7 +1094,7 @@ def write_script(connection, MB_script_data): "frequency": value[4]} for value in MB_script_data ] connection.execute(script_query, values) - print('Inserted %d rows in script table!' % len(MB_script_data)) + logging.info('Inserted %d rows in script table!' % len(MB_script_data)) def write_gender(connection, MB_gender_data): @@ -1114,7 +1115,7 @@ def write_gender(connection, MB_gender_data): "gid": value[5]} for value in MB_gender_data ] connection.execute(gender_query, values) - print('Inserted %d rows in gender table!' % len(MB_gender_data)) + logging.info('Inserted %d rows in gender table!' % len(MB_gender_data)) def write_area(connection, MB_area_data): @@ -1145,7 +1146,7 @@ def write_area(connection, MB_area_data): "comment": value[13]} for value in MB_area_data ] connection.execute(area_query, values) - print('Inserted %d rows in area table!' % len(MB_area_data)) + logging.info('Inserted %d rows in area table!' % len(MB_area_data)) def write_begin_area(connection, MB_begin_area_data): @@ -1177,7 +1178,7 @@ def write_begin_area(connection, MB_begin_area_data): "comment": value[13]} for value in MB_begin_area_data ] connection.execute(begin_area_query, values) - print('Inserted %d rows in area table for begin area data!' % len(MB_begin_area_data)) + logging.info('Inserted %d rows in area table for begin area data!' % len(MB_begin_area_data)) def write_end_area(connection, MB_end_area_data): """Insert data in area table in musicbrainz schema in @@ -1208,7 +1209,7 @@ def write_end_area(connection, MB_end_area_data): "comment": value[13]} for value in MB_end_area_data ] connection.execute(end_area_query, values) - print('Inserted %d rows in area table for end area data!' % len(MB_end_area_data)) + logging.info('Inserted %d rows in area table for end area data!' % len(MB_end_area_data)) def write_artist(connection, MB_artist_data): @@ -1244,7 +1245,7 @@ def write_artist(connection, MB_artist_data): "end_area": value[18]} for value in MB_artist_data ] connection.execute(artist_query, values) - print('Inserted %d rows in artist table!' % len(MB_artist_data)) + logging.info('Inserted %d rows in artist table!' % len(MB_artist_data)) def write_artist_credit_name(connection, MB_artist_credit_name_data): @@ -1264,7 +1265,7 @@ def write_artist_credit_name(connection, MB_artist_credit_name_data): "join_phrase": value[4]} for value in MB_artist_credit_name_data ] connection.execute(artist_credit_name_query, values) - print('Inserted %d rows in artist credit name table!' % len(MB_artist_credit_name_data)) + logging.info('Inserted %d rows in artist credit name table!' % len(MB_artist_credit_name_data)) def write_artist_gid_redirect(connection, MB_artist_gid_redirect_data): @@ -1282,7 +1283,7 @@ def write_artist_gid_redirect(connection, MB_artist_gid_redirect_data): "created": value[2]} for value in MB_artist_gid_redirect_data ] connection.execute(artist_gid_redirect_query, values) - print('Inserted %d rows in artist gid redirect table!' % len(MB_artist_gid_redirect_data)) + logging.info('Inserted %d rows in artist gid redirect table!' % len(MB_artist_gid_redirect_data)) def write_recording(connection, MB_recording_data): @@ -1306,7 +1307,7 @@ def write_recording(connection, MB_recording_data): "video": value[8]} for value in MB_recording_data ] connection.execute(recording_query, values) - print('Inserted %d rows in recording table!' % len(MB_recording_data)) + logging.info('Inserted %d rows in recording table!' % len(MB_recording_data)) def write_recording_gid_redirect(connection, MB_recording_gid_redirect_data): @@ -1323,7 +1324,7 @@ def write_recording_gid_redirect(connection, MB_recording_gid_redirect_data): "created": value[2]} for value in MB_recording_gid_redirect_data ] connection.execute(recording_gid_redirect_query, values) - print('Inserted %d rows in recording gid redirect table!' % len(MB_recording_gid_redirect_data)) + logging.info('Inserted %d rows in recording gid redirect table!' % len(MB_recording_gid_redirect_data)) def write_release_group(connection, MB_release_group_data): @@ -1346,7 +1347,7 @@ def write_release_group(connection, MB_release_group_data): "last_updated": value[7]} for value in MB_release_group_data ] connection.execute(release_group_query, values) - print('Inserted %d rows in release group table!' % len(MB_release_group_data)) + logging.info('Inserted %d rows in release group table!' % len(MB_release_group_data)) def write_release_group_gid_redirect(connection, MB_release_gid_redirect_data): @@ -1364,7 +1365,7 @@ def write_release_group_gid_redirect(connection, MB_release_gid_redirect_data): "created": value[2]} for value in MB_release_gid_redirect_data ] connection.execute(release_group_gid_redirect_query, values) - print('Inserted %d rows in release gid redirect table!' % len(MB_release_gid_redirect_data)) + logging.info('Inserted %d rows in release gid redirect table!' % len(MB_release_gid_redirect_data)) def write_release(connection, MB_release_data): @@ -1394,7 +1395,7 @@ def write_release(connection, MB_release_data): "last_updated": value[13]} for value in MB_release_data ] connection.execute(release_query, values) - print('Inserted %d rows in release table!' % len(MB_release_data)) + logging.info('Inserted %d rows in release table!' % len(MB_release_data)) def write_release_gid_redirect(connection, MB_release_gid_redirect_data): @@ -1412,7 +1413,7 @@ def write_release_gid_redirect(connection, MB_release_gid_redirect_data): "created": value[2]} for value in MB_release_gid_redirect_data ] connection.execute(release_gid_redirect_query, values) - print('Inserted %d rows in release gid redirect table!' % len(MB_release_gid_redirect_data)) + logging.info('Inserted %d rows in release gid redirect table!' % len(MB_release_gid_redirect_data)) def write_medium(connection, MB_medium_data): @@ -1435,7 +1436,7 @@ def write_medium(connection, MB_medium_data): "track_count": value[7]} for value in MB_medium_data ] connection.execute(medium_query, values) - print('Inserted %d rows in medium table!' % len(MB_medium_data)) + logging.info('Inserted %d rows in medium table!' % len(MB_medium_data)) def write_track(connection, MB_track_data): @@ -1463,197 +1464,197 @@ def write_track(connection, MB_track_data): "is_data_track": value[11]} for value in MB_track_data ] connection.execute(track_query, values) - print('Inserted %d rows in track table!' % len(MB_track_data)) + logging.info('Inserted %d rows in track table!' % len(MB_track_data)) def fetch_and_insert_musicbrainz_data(gids_in_AB): # Get MusicBrainz data - print('\nGetting %d recordings data at a time...\n' % (len(gids_in_AB))) + logging.info('\nGetting %d recordings data at a time...\n' % (len(gids_in_AB))) with musicbrainz_db.engine.begin() as connection: # track try: - print('Getting track data...') + logging.info('Getting track data...') MB_track_data = load_track(connection, gids_in_AB) except ValueError: - print("No Data found from track table for the recordings") + logging.info("No Data found from track table for the recordings") # medium try: - print('Getting medium data...') + logging.info('Getting medium data...') MB_medium_data = load_medium(connection, gids_in_AB, MB_track_data) except ValueError: - print("No Data found from medium table for the recordings") + logging.info("No Data found from medium table for the recordings") # release_gid_redirect try: - print('Getting release gid redirect data...') + logging.info('Getting release gid redirect data...') MB_release_gid_redirect_data = load_release_gid_redirect(connection, gids_in_AB) except ValueError: - print("No Data found from release gid redirect table for the recordings") + logging.info("No Data found from release gid redirect table for the recordings") # release try: - print('Getting release data...') + logging.info('Getting release data...') MB_release_data = load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect_data) except ValueError: - print("No Data found from release table for the recordings") + logging.info("No Data found from release table for the recordings") # artist_credit try: - print('Getting artist credit data...') + logging.info('Getting artist credit data...') MB_artist_credit_data = load_artist_credit(connection, gids_in_AB, MB_release_data) except ValueError: - print("No Data found from artist credit table for the recordings") + logging.info("No Data found from artist credit table for the recordings") # artist_credit_name try: - print('Getting artist credit name data...') + logging.info('Getting artist credit name data...') MB_artist_credit_name_data = load_artist_credit_name(connection, gids_in_AB) except ValueError: - print("No Data found from artist credit name table for the recordings") + logging.info("No Data found from artist credit name table for the recordings") # artist try: - print('Getting artist data...') + logging.info('Getting artist data...') MB_artist_data = load_artist(connection, gids_in_AB, MB_artist_credit_name_data) except ValueError: - print("No Data found from artist table for the recordings") + logging.info("No Data found from artist table for the recordings") # artist_type try: - print('Getting artist type data...') + logging.info('Getting artist type data...') MB_artist_type_data = load_artist_type(connection, gids_in_AB) except ValueError: - print("No Data found from artist type table for the recordings") + logging.info("No Data found from artist type table for the recordings") # recording try: - print('Getting recording data...') + logging.info('Getting recording data...') MB_recording_data = load_recording(connection, gids_in_AB) except ValueError: - print("No Data found from recording table for the recordings") + logging.info("No Data found from recording table for the recordings") # area try: - print('Getting area data...') + logging.info('Getting area data...') MB_area_data = load_area(connection, gids_in_AB) except ValueError: - print("No Data found from area table for the recordings") + logging.info("No Data found from area table for the recordings") # begin_area try: - print('Getting begin area data...') + logging.info('Getting begin area data...') MB_begin_area_data = load_begin_area(connection, gids_in_AB, MB_artist_data) except ValueError: - print("No Data found from area table for the recordings") + logging.info("No Data found from area table for the recordings") # end_area try: - print('Getting end area data...') + logging.info('Getting end area data...') MB_end_area_data = load_end_area(connection, gids_in_AB) except ValueError: - print("No Data found from area table for the recordings") + logging.info("No Data found from area table for the recordings") # area_type try: - print('Getting area type data...') + logging.info('Getting area type data...') MB_area_type_data = load_area_type(connection, gids_in_AB) except ValueError: - print("No Data found from area type table for the recordings") + logging.info("No Data found from area type table for the recordings") # begin_area_type try: - print('Getting begin area type data...') + logging.info('Getting begin area type data...') MB_begin_area_type_data = load_begin_area_type(connection, gids_in_AB) except ValueError: - print("No Data found from area type table for the recordings") + logging.info("No Data found from area type table for the recordings") # end_area_type try: - print('Getting end area data...') + logging.info('Getting end area data...') MB_end_area_type_data = load_end_area_type(connection, gids_in_AB) except ValueError: - print("No Data found from area type table for the recordings") + logging.info("No Data found from area type table for the recordings") # artist_gid_redirect try: - print('Getting artist gid redirect data...') + logging.info('Getting artist gid redirect data...') MB_artist_gid_redirect_data = load_artist_gid_redirect(connection, gids_in_AB) except ValueError: - print("No Data found from artist gid redirect table for the recordings") + logging.info("No Data found from artist gid redirect table for the recordings") # gender try: - print('Getting gender data...') + logging.info('Getting gender data...') MB_gender_data = load_gender(connection, gids_in_AB) except ValueError: - print("No Data found from gender table for the recordings") + logging.info("No Data found from gender table for the recordings") # language try: - print('Getting language data...') + logging.info('Getting language data...') MB_language_data = load_language(connection, gids_in_AB) except ValueError: - print("No Data found from language table for the recordings") + logging.info("No Data found from language table for the recordings") # medium_format try: - print('Getting medium format data...') + logging.info('Getting medium format data...') MB_medium_format_data = load_medium_format(connection, gids_in_AB) except ValueError: - print("No Data found from medium format table for the recordings") + logging.info("No Data found from medium format table for the recordings") # recording_gid_redirect try: - print('Getting recording gid redirect data...') + logging.info('Getting recording gid redirect data...') MB_recording_gid_redirect_data = load_recording_gid_redirect(connection, gids_in_AB) except ValueError: - print("No Data found from recording gid redirect table for the recordings") + logging.info("No Data found from recording gid redirect table for the recordings") # release_group gid redirect try: - print('Getting release group gid redirect data...') + logging.info('Getting release group gid redirect data...') MB_release_group_gid_redirect_data = load_release_group_gid_redirect(connection, gids_in_AB) except ValueError: - print("No Data found from release group gid redirect table for the recordings") + logging.info("No Data found from release group gid redirect table for the recordings") # release_group try: - print('Getting release group data...') + logging.info('Getting release group data...') MB_release_group_data = load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_data, MB_release_data) except ValueError: - print("No Data found from release group table for the recordings") + logging.info("No Data found from release group table for the recordings") # release_group_primary_type try: - print('Getting release group primary type data...') + logging.info('Getting release group primary type data...') MB_release_group_primary_type_data = load_release_group_primary_type(connection, gids_in_AB, MB_release_group_data) except ValueError: - print("No Data found from release group primary type table for the recordings") + logging.info("No Data found from release group primary type table for the recordings") # release_packaging try: - print('Getting release packaging data...') + logging.info('Getting release packaging data...') MB_release_packaging_data = load_release_packaging(connection, gids_in_AB, MB_release_data) except ValueError: - print("No Data found from release packaging table for the recordings") + logging.info("No Data found from release packaging table for the recordings") # release_status try: - print('Getting release status data...') + logging.info('Getting release status data...') MB_release_status_data = load_release_status(connection, gids_in_AB) except ValueError: - print("No Data found from release status table for the recordings") + logging.info("No Data found from release status table for the recordings") # script try: - print('Getting script data...') + logging.info('Getting script data...') MB_script_data = load_script(connection, gids_in_AB) except ValueError: - print("No Data found from script table for the recordings") + logging.info("No Data found from script table for the recordings") # Write MusicBrainz data into AcousticBrainz database - print('\nInserting %d recordings data at a time...\n' % (len(gids_in_AB))) + logging.info('\nInserting %d recordings data at a time...\n' % (len(gids_in_AB))) with db.engine.begin() as connection: if MB_artist_credit_data: write_artist_credit(connection, MB_artist_credit_data) @@ -1755,6 +1756,6 @@ def start_import(): fetch_and_insert_musicbrainz_data(gids_in_AB) else: break - print('Done!') total_time_taken = time.time() - start_time print('Data imported and inserted in %.2f seconds.' % total_time_taken) + logging.info('Done!') From 8b636b962481e93d3dd0a9eba43259505dda32cf Mon Sep 17 00:00:00 2001 From: RashiSah Date: Sat, 23 Jun 2018 13:29:09 +0530 Subject: [PATCH 035/125] Replace print with logging in time status message --- db/import_mb_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 0fcfe7935..722d7c8e4 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -1756,6 +1756,6 @@ def start_import(): fetch_and_insert_musicbrainz_data(gids_in_AB) else: break - total_time_taken = time.time() - start_time - print('Data imported and inserted in %.2f seconds.' % total_time_taken) logging.info('Done!') + total_time_taken = time.time() - start_time + logging.info('Data imported and inserted in %.2f seconds.' % total_time_taken) From 2945a7d4b0799de63734983f6bddd2a28b491cd5 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Wed, 27 Jun 2018 14:37:31 +0530 Subject: [PATCH 036/125] Move a function to db/data.py file --- db/data.py | 20 +++++++++++++++++++ db/get_lowlevel_recordings.py | 21 -------------------- musicbrainz_importer/musicbrainz_importer.py | 4 ++-- 3 files changed, 22 insertions(+), 23 deletions(-) delete mode 100644 db/get_lowlevel_recordings.py diff --git a/db/data.py b/db/data.py index 606b632bf..b2a80e28c 100644 --- a/db/data.py +++ b/db/data.py @@ -6,6 +6,7 @@ import os import db import db.exceptions +from flask import current_app from sqlalchemy import text import sqlalchemy.exc @@ -524,3 +525,22 @@ def get_summary_data(mbid, offset=0): pass return summary + + +def get_new_recordings_from_lowlevel(): + with db.engine.begin() as connection: + rows_to_fetch = current_app.config['RECORDINGS_FETCHED_PER_BATCH'] + + query = text("""SELECT lowlevel.gid + FROM lowlevel + LEFT JOIN musicbrainz.recording + ON lowlevel.gid = musicbrainz.recording.gid + WHERE musicbrainz.recording.gid is NULL + ORDER BY lowlevel.id + LIMIT :rows_to_fetch + """) + gids = connection.execute(query, {"rows_to_fetch": rows_to_fetch}) + gids = gids.fetchall() + gids_in_AB = [value[0] for value in gids] + + return gids_in_AB diff --git a/db/get_lowlevel_recordings.py b/db/get_lowlevel_recordings.py deleted file mode 100644 index c95d81e38..000000000 --- a/db/get_lowlevel_recordings.py +++ /dev/null @@ -1,21 +0,0 @@ -import db -from flask import current_app -from sqlalchemy import text - -def get_new_recordings_from_lowlevel(): - with db.engine.begin() as connection: - rows_to_fetch = current_app.config['RECORDINGS_FETCHED_PER_BATCH'] - - query = text("""SELECT lowlevel.gid - FROM lowlevel - LEFT JOIN musicbrainz.recording - ON lowlevel.gid = musicbrainz.recording.gid - WHERE musicbrainz.recording.gid is NULL - ORDER BY lowlevel.id - LIMIT :rows_to_fetch - """) - gids = connection.execute(query, {"rows_to_fetch": rows_to_fetch}) - gids = gids.fetchall() - gids_in_AB = [value[0] for value in gids] - - return gids_in_AB diff --git a/musicbrainz_importer/musicbrainz_importer.py b/musicbrainz_importer/musicbrainz_importer.py index db8eca3f8..f0be184e8 100644 --- a/musicbrainz_importer/musicbrainz_importer.py +++ b/musicbrainz_importer/musicbrainz_importer.py @@ -1,6 +1,6 @@ import logging import time -import db.get_lowlevel_recordings +import db.data import db.import_mb_data SLEEP_DURATION = 30 # number of seconds to wait between runs @@ -9,7 +9,7 @@ def main(): logging.info("musicbrainz importer started") while True: - gids_in_AB = db.get_lowlevel_recordings.get_new_recordings_from_lowlevel() + gids_in_AB = db.data.get_new_recordings_from_lowlevel() if gids_in_AB: logging.info("Importing MusicBrainz data...") logging.info('Inserting data for %d recordings...' % (len(gids_in_AB))) From 5987d443c038389d8faf3ca622336c4e8f3000fd Mon Sep 17 00:00:00 2001 From: RashiSah Date: Wed, 27 Jun 2018 22:51:50 +0530 Subject: [PATCH 037/125] Add tests for getting new recordings from lowlevel --- db/test/test_data.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/db/test/test_data.py b/db/test/test_data.py index 9ac875651..8b45054d1 100644 --- a/db/test/test_data.py +++ b/db/test/test_data.py @@ -5,6 +5,7 @@ import json import mock import copy +import uuid class DataDBTestCase(DatabaseTestCase): @@ -345,6 +346,19 @@ def test_add_get_model(self): def test_get_summary_data(self): pass + + def test_load_new_recordings_from_lowlevel(self): + """Two mbids are inserted into lowlevel table and then fetch a list of newly added mbids + and then check if both the lists contain similar items""" + recording_mbids = [uuid.UUID('ceec2751-44fe-44ff-b281-de00df9117d8'), uuid.UUID('575519b3-c06b-4157-b172-5d7ca80a8382')] + one = {"data": "one", "metadata": {"audio_properties": {"lossless": True}, "version": {"essentia_build_sha": "x"}}} + two = {"data": "two", "metadata": {"audio_properties": {"lossless": True}, "version": {"essentia_build_sha": "x"}}} + db.data.write_low_level(recording_mbids[0], one, gid_types.GID_TYPE_MBID) + db.data.write_low_level(recording_mbids[1], two, gid_types.GID_TYPE_MBID) + + self.assertEqual(recording_mbids, db.data.get_new_recordings_from_lowlevel()) + + class DataUtilTestCase(DatabaseTestCase): """ Tests for utility methods in db/data. Should be moved out of db at some time. """ From f462fef22843695834879adca46e84013845f7b7 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Thu, 28 Jun 2018 21:59:18 +0530 Subject: [PATCH 038/125] Get data for more foreign key dependent tables --- db/import_mb_data.py | 77 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 61 insertions(+), 16 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 722d7c8e4..878de736d 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -5,11 +5,11 @@ import time import logging -def load_artist_credit(connection, gids_in_AB, MB_release_data): +def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group_data, MB_track_data): """Fetch artist_credit table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. - Also fetch data corresponding to release table. + Also fetch data corresponding to release, release_group and track table. """ filters = [] filter_data = {} @@ -20,13 +20,33 @@ def load_artist_credit(connection, gids_in_AB, MB_release_data): MB_release_fk_artist_credit.append(value[3]) MB_release_fk_artist_credit = list(set(MB_release_fk_artist_credit)) + # Get data corresponding to artist_credit column in release_group table + MB_release_group_fk_artist_credit = [] + for value in MB_release_group_data: + MB_release_group_fk_artist_credit.append(value[3]) + MB_release_group_fk_artist_credit = list(set(MB_release_group_fk_artist_credit)) + + # Get data corresponding to artist_credit column in track table + MB_track_fk_artist_credit = [] + for value in MB_track_data: + MB_track_fk_artist_credit.append(value[7]) + MB_track_fk_artist_credit = list(set(MB_track_fk_artist_credit)) + if gids_in_AB: filters.append("recording.gid in :gids") filter_data["gids"] = tuple(gids_in_AB) if MB_release_data: - filters.append("artist_credit.id in :data") - filter_data["data"] = tuple(MB_release_fk_artist_credit) + filters.append("artist_credit.id in :release_data") + filter_data["release_data"] = tuple(MB_release_fk_artist_credit) + + if MB_release_group_data: + filters.append("artist_credit.id in :release_group_data") + filter_data["release_group_data"] = tuple(MB_release_group_fk_artist_credit) + + if MB_track_data: + filters.append("artist_credit.id in :track_data") + filter_data["track_data"] = tuple(MB_track_fk_artist_credit) filterstr = " OR ".join(filters) if filterstr: @@ -366,10 +386,33 @@ def load_gender(connection, gids_in_AB): return MB_gender_data -def load_area(connection, gids_in_AB): +def load_area(connection, gids_in_AB, MB_artist_data): """ Fetch area table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + + Also fetch data corresponding to artist table. """ + filters = [] + filter_data = {} + + # Get data corresponding to area column in artist table + MB_artist_fk_area = [] + for value in MB_artist_data: + MB_artist_fk_area.append(value[11]) + MB_artist_fk_area = list(set(MB_artist_fk_area)) + + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_artist_data: + filters.append("area.id in :data") + filter_data["data"] = tuple(MB_artist_fk_area) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr + area_query = text(""" SELECT DISTINCT area.id, area.gid, @@ -392,9 +435,11 @@ def load_area(connection, gids_in_AB): ON artist.id = artist_credit.id INNER JOIN recording ON artist_credit.id = recording.artist_credit - WHERE recording.gid in :gids - """) - result = connection.execute(area_query, {'gids': tuple(gids_in_AB)}) + {filterstr} + """.format(filterstr=filterstr) + ) + + result = connection.execute(area_query, filter_data) MB_area_data = result.fetchall() return MB_area_data @@ -1499,13 +1544,6 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): except ValueError: logging.info("No Data found from release table for the recordings") - # artist_credit - try: - logging.info('Getting artist credit data...') - MB_artist_credit_data = load_artist_credit(connection, gids_in_AB, MB_release_data) - except ValueError: - logging.info("No Data found from artist credit table for the recordings") - # artist_credit_name try: logging.info('Getting artist credit name data...') @@ -1537,7 +1575,7 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # area try: logging.info('Getting area data...') - MB_area_data = load_area(connection, gids_in_AB) + MB_area_data = load_area(connection, gids_in_AB, MB_artist_data) except ValueError: logging.info("No Data found from area table for the recordings") @@ -1625,6 +1663,13 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): except ValueError: logging.info("No Data found from release group table for the recordings") + # artist_credit + try: + logging.info('Getting artist credit data...') + MB_artist_credit_data = load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group_data, MB_track_data) + except ValueError: + logging.info("No Data found from artist credit table for the recordings") + # release_group_primary_type try: logging.info('Getting release group primary type data...') From 46e8ebbc76fe843df445a89ead005160ae50c391 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Wed, 4 Jul 2018 21:18:31 +0530 Subject: [PATCH 039/125] Use key names instead of integer indexes & use set-comprehension for data retrieval --- db/import_mb_data.py | 62 ++++++++++---------------------------------- 1 file changed, 13 insertions(+), 49 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 878de736d..a42bb164a 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -15,22 +15,13 @@ def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group filter_data = {} # Get data corresponding to artist_credit column in release table - MB_release_fk_artist_credit = [] - for value in MB_release_data: - MB_release_fk_artist_credit.append(value[3]) - MB_release_fk_artist_credit = list(set(MB_release_fk_artist_credit)) + MB_release_fk_artist_credit = list({value['artist_credit'] for value in MB_release_data if value['artist_credit'] == value[3]}) # Get data corresponding to artist_credit column in release_group table - MB_release_group_fk_artist_credit = [] - for value in MB_release_group_data: - MB_release_group_fk_artist_credit.append(value[3]) - MB_release_group_fk_artist_credit = list(set(MB_release_group_fk_artist_credit)) + MB_release_group_fk_artist_credit = list({value['artist_credit'] for value in MB_release_group_data if value['artist_credit'] == value[3]}) # Get data corresponding to artist_credit column in track table - MB_track_fk_artist_credit = [] - for value in MB_track_data: - MB_track_fk_artist_credit.append(value[7]) - MB_track_fk_artist_credit = list(set(MB_track_fk_artist_credit)) + MB_track_fk_artist_credit = list({value['artist_credit'] for value in MB_track_data if value['artist_credit'] == value[7]}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -214,10 +205,7 @@ def load_release_group_primary_type(connection, gids_in_AB, MB_release_group_dat filter_data = {} # Get data corresponding to release_group_primary_type column in release_group table - MB_release_group_fk_type = [] - for value in MB_release_group_data: - MB_release_group_fk_type.append(value[4]) - MB_release_group_fk_type = list(set(MB_release_group_fk_type)) + MB_release_group_fk_type = list({value['type'] for value in MB_release_group_data if value['type'] == value[4]}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -273,10 +261,7 @@ def load_release_packaging(connection, gids_in_AB, MB_release_data): filter_data = {} # Get data corresponding to release_packaging column in release table - MB_release_fk_packaging = [] - for value in MB_release_data: - MB_release_fk_packaging.append(value[6]) - MB_release_fk_packaging = list(set(MB_release_fk_packaging)) + MB_release_fk_packaging = list({value['packaging'] for value in MB_release_data if value['packaging'] == value[6]}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -396,10 +381,7 @@ def load_area(connection, gids_in_AB, MB_artist_data): filter_data = {} # Get data corresponding to area column in artist table - MB_artist_fk_area = [] - for value in MB_artist_data: - MB_artist_fk_area.append(value[11]) - MB_artist_fk_area = list(set(MB_artist_fk_area)) + MB_artist_fk_area = list({value['area'] for value in MB_artist_data if value['area'] == value[11]}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -455,9 +437,7 @@ def load_begin_area(connection, gids_in_AB, MB_artist_data): filter_data = {} # Get data corresponding to begin_area column in artist table - MB_artist_fk_begin_area = [] - for value in MB_artist_data: - MB_artist_fk_begin_area.append(value[17]) + MB_artist_fk_begin_area = list({value['begin_area'] for value in MB_artist_data if value['begin_area'] == value[17]}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -570,9 +550,7 @@ def load_artist(connection, gids_in_AB, MB_artist_credit_name_data): filter_data = {} # Get data corresponding to artist column in artist_credit_name table. - MB_artist_credit_name_fk_artist = [] - for value in MB_artist_credit_name_data: - MB_artist_credit_name_fk_artist.append(value[2]) + MB_artist_credit_name_fk_artist = list({value['artist'] for value in MB_artist_credit_name_data if value['artist'] == value[2]}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -676,16 +654,10 @@ def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_dat filter_data = {} # Get data corresponding to release_group column in release_group_gid_redirect table. - MB_release_group_gid_redirect_fk_release_group = [] - for value in MB_release_group_gid_redirect_data: - MB_release_group_gid_redirect_fk_release_group.append(value[1]) - MB_release_group_gid_redirect_fk_release_group = list(set(MB_release_group_gid_redirect_fk_release_group)) + MB_release_group_gid_redirect_fk_release_group = list({value['new_id'] for value in MB_release_group_gid_redirect_data if value['new_id'] == value[1]}) # Get data corresponding to release_group column in release table. - MB_release_fk_release_group = [] - for value in MB_release_data: - MB_release_fk_release_group.append(value[4]) - MB_release_fk_release_group = list(set(MB_release_fk_release_group)) + MB_release_fk_release_group = list({value['release_group'] for value in MB_release_data if value['release_group'] == value[4]}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -756,16 +728,10 @@ def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect filter_data = {} # Get data corresponding to release column in medium table. - MB_medium_fk_release = [] - for value in MB_medium_data: - MB_medium_fk_release.append(value[1]) - MB_medium_fk_release = list(set(MB_medium_fk_release)) + MB_medium_fk_release = list({value['release'] for value in MB_medium_data if value['release'] == value[1]}) # Get data corresponding to release column in release_gid_redirect table. - MB_release_gid_redirect_fk_release = [] - for value in MB_release_gid_redirect_data: - MB_release_gid_redirect_fk_release.append(value[1]) - MB_release_gid_redirect_fk_release = list(set(MB_release_gid_redirect_fk_release)) + MB_release_gid_redirect_fk_release = list({value['new_id'] for value in MB_release_gid_redirect_data if value['new_id'] == value[1]}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -842,9 +808,7 @@ def load_medium(connection, gids_in_AB, MB_track_data): filter_data = {} # Get data corresponding to medium column in track table. - MB_track_fk_medium = [] - for value in MB_track_data: - MB_track_fk_medium.append(value[3]) + MB_track_fk_medium = list({value['medium'] for value in MB_track_data if value['medium'] == value[3]}) if gids_in_AB: filters.append("recording.gid in :gids") From 340d81ba1c7f4a588a3c03b7b22d5104de27883c Mon Sep 17 00:00:00 2001 From: rsh7 Date: Wed, 4 Jul 2018 23:23:24 +0530 Subject: [PATCH 040/125] Add more documentations to specify the definition of function arguments and return values --- db/import_mb_data.py | 247 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 247 insertions(+) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index a42bb164a..4120f55cb 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -10,6 +10,14 @@ def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group recording MBIDs in AcousticBrainz database. Also fetch data corresponding to release, release_group and track table. + + Args: + connection: database connection to execute the query. + MB_release_data: release data fetched from MusicBrainz database. + MB_release_group_data: release_group data fetched from MusicBrainz database. + MB_track_data: track data fetched from MusicBrainz database. + Returns: + artist_credit data fetched from MusicBrainz database. """ filters = [] filter_data = {} @@ -62,6 +70,11 @@ def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group def load_artist_type(connection, gids_in_AB): """Fetch artist_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + Returns: + artist_type data fetched from MusicBrainz database. """ artist_type_query = text(""" SELECT DISTINCT artist_type.id, @@ -88,6 +101,11 @@ def load_artist_type(connection, gids_in_AB): def load_area_type(connection, gids_in_AB): """Fetch area_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + Returns: + area_type data fetched from MusicBrainz database. """ area_type_query = text(""" SELECT DISTINCT area_type.id, @@ -117,6 +135,11 @@ def load_begin_area_type(connection, gids_in_AB): """Fetch area_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for the begin area column in artist table. + + Args: + connection: database connection to execute the query. + Returns: + begin_area_type data fetched from MusicBrainz database. """ begin_area_type_query = text(""" SELECT DISTINCT area_type.id, @@ -146,6 +169,11 @@ def load_end_area_type(connection, gids_in_AB): """Fetch area_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for the end area column in artist table. + + Args: + connection: database connection to execute the query. + Returns: + end_area_type data fetched from MusicBrainz database. """ end_area_type_query = text(""" SELECT DISTINCT area_type.id, @@ -174,6 +202,11 @@ def load_end_area_type(connection, gids_in_AB): def load_release_status(connection, gids_in_AB): """Fetch release_status table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + Returns: + release_status data fetched from MusicBrainz database. """ release_status_query = text(""" SELECT DISTINCT release_status.id, @@ -200,6 +233,12 @@ def load_release_group_primary_type(connection, gids_in_AB, MB_release_group_dat recording MBIDs in AcousticBrainz database. Also fetch data corresponding to release_group table. + + Args: + connection: database connection to execute the query. + MB_release_group_data: release group data fetched from MusicBrainz database. + Returns: + release_group_primary_type data fetched from MusicBrainz database. """ filters = [] filter_data = {} @@ -240,6 +279,11 @@ def load_release_group_primary_type(connection, gids_in_AB, MB_release_group_dat def load_medium_format(connection, gids_in_AB): """Fetch medium_format table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + Returns: + medium_format data fetched from MusicBrainz database. """ medium_format_query = text(""" SELECT * FROM medium_format @@ -256,6 +300,12 @@ def load_release_packaging(connection, gids_in_AB, MB_release_data): recording MBIDs in AcousticBrainz database. Also fetch data corresponding to release table. + + Args: + connection: database connection to execute the query. + MB_release_data: release data fetched from MusicBrainz database. + Returns: + release_packaging data fetched from MusicBrainz database. """ filters = [] filter_data = {} @@ -300,6 +350,11 @@ def load_release_packaging(connection, gids_in_AB, MB_release_data): def load_language(connection, gids_in_AB): """Fetch language table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + Returns: + language data fetched from MusicBrainz database. """ language_query = text(""" SELECT DISTINCT language.id, @@ -325,6 +380,11 @@ def load_language(connection, gids_in_AB): def load_script(connection, gids_in_AB): """Fetch script table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + Returns: + script data fetched from MusicBrainz database. """ script_query = text(""" SELECT DISTINCT script.id, @@ -348,6 +408,11 @@ def load_script(connection, gids_in_AB): def load_gender(connection, gids_in_AB): """ Fetch gender table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + Returns: + gender data fetched from MusicBrainz database. """ gender_query = text(""" SELECT DISTINCT gender.id, @@ -376,6 +441,12 @@ def load_area(connection, gids_in_AB, MB_artist_data): recording MBIDs in AcousticBrainz database. Also fetch data corresponding to artist table. + + Args: + connection: database connection to execute the query. + MB_artist_data: artist data fetched from MusicBrainz database. + Returns: + area data fetched from MusicBrainz database. """ filters = [] filter_data = {} @@ -432,6 +503,12 @@ def load_begin_area(connection, gids_in_AB, MB_artist_data): recording MBIDs in AcousticBrainz database for begin area column. Also fetch data corresponding to artist table. + + Args: + connection: database connection to execute the query. + MB_artist_data: artist data fetched from MusicBrainz database. + Returns: + begin_area data fetched from MusicBrainz database. """ filters = [] filter_data = {} @@ -486,6 +563,11 @@ def load_begin_area(connection, gids_in_AB, MB_artist_data): def load_end_area(connection, gids_in_AB): """Fetch area table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for end area column. + + Args: + connection: database connection to execute the query. + Returns: + end_area data fetched from MusicBrainz database. """ end_area_query = text(""" SELECT DISTINCT area.id, @@ -520,6 +602,11 @@ def load_end_area(connection, gids_in_AB): def load_artist_credit_name(connection, gids_in_AB): """Fetch artist_credit_name table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + Returns: + artist_credit_name data fetched from MusicBrainz database. """ artist_credit_name_query = text(""" SELECT DISTINCT artist_credit_name.artist_credit, @@ -545,6 +632,12 @@ def load_artist(connection, gids_in_AB, MB_artist_credit_name_data): recording MBIDs in AcousticBrainz database. Also fetch data corresponding to artist_credit_name table. + + Args: + connection: database connection to execute the query. + MB_artist_credit_name_data: artist credit name data fetched from MusicBrainz database. + Returns: + artist data fetched from MusicBrainz database. """ filters = [] filter_data = {} @@ -587,6 +680,11 @@ def load_artist(connection, gids_in_AB, MB_artist_credit_name_data): def load_artist_gid_redirect(connection, gids_in_AB): """Fetch artist_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + Returns: + artist_gid_redirect data fetched from MusicBrainz database. """ artist_gid_redirect_query = text(""" SELECT DISTINCT artist_gid_redirect.gid, @@ -610,6 +708,11 @@ def load_artist_gid_redirect(connection, gids_in_AB): def load_recording(connection, gids_in_AB): """Fetch recording table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + Returns: + recording data fetched from MusicBrainz database. """ recording_query = text(""" SELECT DISTINCT recording.id, recording.gid, recording.name, recording.artist_credit, @@ -627,6 +730,11 @@ def load_recording(connection, gids_in_AB): def load_recording_gid_redirect(connection, gids_in_AB): """Fetch recording_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + Returns: + recording_gid_redirect data fetched from MusicBrainz database. """ recording_gid_redirect_query = text(""" SELECT DISTINCT recording_gid_redirect.gid, @@ -649,6 +757,13 @@ def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_dat Also fetch data corresponding to release_group_gid_redirect and release table. + + Args: + connection: database connection to execute the query. + MB_release_group_gid_redirect_data: release group gid redirect data fetched from MusicBrainz database. + MB_release_data: release data fetched from MusicBrainz database. + Returns: + release_group data fetched from MusicBrainz database. """ filters = [] filter_data = {} @@ -700,6 +815,11 @@ def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_dat def load_release_group_gid_redirect(connection, gids_in_AB): """Fetch release_group_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + Returns: + release_group_gid_redirect data fetched from MusicBrainz database. """ release_group_gid_redirect_query = text(""" SELECT DISTINCT release_group_gid_redirect.gid, @@ -723,6 +843,13 @@ def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect recording MBIDs in AcousticBrainz database. Also fetch data corresponding to medium and release_gid_redirect table. + + Args: + connection: database connection to execute the query. + MB_medium_data: medium data fetched from MusicBrainz database. + MB_release_gid_redirect_data: release_gid_redirect data fetched from MusicBrainz database. + Returns: + release data fetched from MusicBrainz database. """ filters = [] filter_data = {} @@ -780,6 +907,11 @@ def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect def load_release_gid_redirect(connection, gids_in_AB): """Fetch release_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + Returns: + release_gid_redirect data fetched from MusicBrainz database. """ release_gid_redirect_query = text(""" SELECT DISTINCT release_gid_redirect.gid, @@ -803,6 +935,12 @@ def load_medium(connection, gids_in_AB, MB_track_data): recording MBIDs in AcousticBrainz database. Also fetch data corresponding to track table. + + Args: + connection: database connection to execute the query. + MB_track_data: track data fetched from MusicBrainz database. + Returns: + medium data fetched from MusicBrainz database. """ filters = [] filter_data = {} @@ -849,6 +987,11 @@ def load_medium(connection, gids_in_AB, MB_track_data): def load_track(connection, gids_in_AB): """Fetch track table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + Returns: + track data fetched from MusicBrainz database. """ track_query = text(""" SELECT DISTINCT track.id, @@ -877,6 +1020,10 @@ def load_track(connection, gids_in_AB): def write_artist_credit(connection, MB_artist_credit_data): """Insert data into artist_credit table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_artist_credit_data: artist_credit data fetched from MusicBrainz database. """ artist_credit_query = text(""" INSERT INTO musicbrainz.artist_credit @@ -897,6 +1044,10 @@ def write_artist_credit(connection, MB_artist_credit_data): def write_artist_type(connection, MB_artist_type_data): """Insert data in artist_type table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_artist_type_data: artist_type data fetched from MusicBrainz database. """ artist_type_query = text(""" INSERT INTO musicbrainz.artist_type(id, name, parent, child_order, description, gid) @@ -918,6 +1069,10 @@ def write_artist_type(connection, MB_artist_type_data): def write_area_type(connection, MB_area_type_data): """Insert data in area_type table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_area_type_data: area_type data fetched from MusicBrainz database. """ area_type_query = text(""" INSERT INTO musicbrainz.area_type @@ -939,6 +1094,10 @@ def write_area_type(connection, MB_area_type_data): def write_begin_area_type(connection, MB_begin_area_type_data): """Insert data in area_type table in musicbrainz schema in AcousticBrainz database for begin_area column in artist table. + + Args: + connection: database connection to execute the query. + MB_begin_area_type_data: begin_area_type data fetched from MusicBrainz database. """ begin_area_type_query = text(""" INSERT INTO musicbrainz.area_type @@ -960,6 +1119,10 @@ def write_begin_area_type(connection, MB_begin_area_type_data): def write_end_area_type(connection, MB_end_area_type_data): """Insert data in area_type table in musicbrainz schema in AcousticBrainz database for end area column in artist table. + + Args: + connection: database connection to execute the query. + MB_end_area_type_data: end_area_type data fetched from MusicBrainz database. """ end_area_type_query = text(""" INSERT INTO musicbrainz.area_type(id, name, parent, child_order, description, gid) @@ -981,6 +1144,10 @@ def write_end_area_type(connection, MB_end_area_type_data): def write_release_status(connection, MB_release_status_data): """Insert data in release_status table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_release_status_data: release_status data fetched from MusicBrainz database. """ release_status_query = text(""" INSERT INTO musicbrainz.release_status @@ -1002,6 +1169,10 @@ def write_release_status(connection, MB_release_status_data): def write_release_group_primary_type(connection, MB_release_group_primary_type_data): """Insert data in release_group_primary_type table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_release_group_primary_type_data: release_group_primary_type data fetched from MusicBrainz database. """ release_group_primary_type_query = text(""" INSERT INTO musicbrainz.release_group_primary_type @@ -1023,6 +1194,10 @@ def write_release_group_primary_type(connection, MB_release_group_primary_type_d def write_medium_format(connection, MB_medium_format_data): """Insert data in medium_format table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_medium_format_data: medium_format data fetched from MusicBrainz database. """ medium_format_query = text(""" INSERT INTO musicbrainz.medium_format @@ -1047,6 +1222,10 @@ def write_medium_format(connection, MB_medium_format_data): def write_release_packaging(connection, MB_release_packaging_data): """Insert data in release_packaging table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_release_packaging_data: release_packaging data fetched from MusicBrainz database. """ release_packaging_query = text(""" INSERT INTO musicbrainz.release_packaging @@ -1068,6 +1247,10 @@ def write_release_packaging(connection, MB_release_packaging_data): def write_language(connection, MB_language_data): """Insert data in language table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_language_data: language data fetched from MusicBrainz database. """ language_query = text(""" INSERT INTO musicbrainz.language @@ -1089,6 +1272,10 @@ def write_language(connection, MB_language_data): def write_script(connection, MB_script_data): """Insert data in script table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_script_data: script data fetched from MusicBrainz database. """ script_query = text(""" INSERT INTO musicbrainz.script @@ -1109,6 +1296,10 @@ def write_script(connection, MB_script_data): def write_gender(connection, MB_gender_data): """Insert data in gender table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_gender_data: gender data fetched from MusicBrainz database. """ gender_query = text(""" INSERT INTO musicbrainz.gender @@ -1130,6 +1321,10 @@ def write_gender(connection, MB_gender_data): def write_area(connection, MB_area_data): """Insert data in area table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_area_data: area data fetched from MusicBrainz database. """ area_query = text(""" INSERT INTO musicbrainz.area @@ -1162,6 +1357,10 @@ def write_begin_area(connection, MB_begin_area_data): """Insert data in area table in musicbrainz schema in AcousticBrainz database for begin_area column in artist table. + + Args: + connection: database connection to execute the query. + MB_begin_area_data: begin_area data fetched from MusicBrainz database. """ begin_area_query = text(""" INSERT INTO musicbrainz.area @@ -1193,6 +1392,10 @@ def write_end_area(connection, MB_end_area_data): """Insert data in area table in musicbrainz schema in AcousticBrainz database for end_area column in artist table. + + Args: + connection: database connection to execute the query. + MB_end_area_data: end_area data fetched from MusicBrainz database. """ end_area_query = text(""" INSERT INTO musicbrainz.area @@ -1224,6 +1427,10 @@ def write_end_area(connection, MB_end_area_data): def write_artist(connection, MB_artist_data): """Insert data in artist table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_artist_data: artist data fetched from MusicBrainz database. """ artist_query = text(""" INSERT INTO musicbrainz.artist @@ -1260,6 +1467,10 @@ def write_artist(connection, MB_artist_data): def write_artist_credit_name(connection, MB_artist_credit_name_data): """Insert data in artist_credit_name table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_artist_credit_name_data: artist_credit_name data fetched from MusicBrainz database. """ artist_credit_name_query = text(""" INSERT INTO musicbrainz.artist_credit_name @@ -1280,6 +1491,10 @@ def write_artist_credit_name(connection, MB_artist_credit_name_data): def write_artist_gid_redirect(connection, MB_artist_gid_redirect_data): """Insert data in artist_gid_redirect table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_artist_gid_redirect_data: artist_gid_redirect data fetched from MusicBrainz database. """ artist_gid_redirect_query = text(""" INSERT INTO musicbrainz.artist_gid_redirect @@ -1298,6 +1513,10 @@ def write_artist_gid_redirect(connection, MB_artist_gid_redirect_data): def write_recording(connection, MB_recording_data): """Insert data in recording table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_recording_data: recording data fetched from MusicBrainz database. """ recording_query = text(""" INSERT INTO musicbrainz.recording @@ -1322,6 +1541,10 @@ def write_recording(connection, MB_recording_data): def write_recording_gid_redirect(connection, MB_recording_gid_redirect_data): """Insert data in recording_gid_redirect table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_recording_gid_redirect_data: recording_gid_redirect data fetched from MusicBrainz database. """ recording_gid_redirect_query = text(""" INSERT INTO musicbrainz.recording_gid_redirect @@ -1339,6 +1562,10 @@ def write_recording_gid_redirect(connection, MB_recording_gid_redirect_data): def write_release_group(connection, MB_release_group_data): """Insert data in release_group table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_release_group_data: release_group data fetched from MusicBrainz database. """ release_group_query = text(""" INSERT INTO musicbrainz.release_group @@ -1362,6 +1589,10 @@ def write_release_group(connection, MB_release_group_data): def write_release_group_gid_redirect(connection, MB_release_gid_redirect_data): """Insert data in release_group_gid_redirect table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_release_group_gid_redirect_data: release_group_gid_redirect data fetched from MusicBrainz database. """ release_group_gid_redirect_query = text(""" INSERT INTO musicbrainz.release_group_gid_redirect @@ -1380,6 +1611,10 @@ def write_release_group_gid_redirect(connection, MB_release_gid_redirect_data): def write_release(connection, MB_release_data): """Insert data in release table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_release_data: release data fetched from MusicBrainz database. """ release_query = text(""" INSERT INTO musicbrainz.release @@ -1410,6 +1645,10 @@ def write_release(connection, MB_release_data): def write_release_gid_redirect(connection, MB_release_gid_redirect_data): """Insert data in release_gid_redirect table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_release_gid_redirect_data: release_gid_redirect data fetched from MusicBrainz database. """ release_gid_redirect_query = text(""" INSERT INTO musicbrainz.release_gid_redirect @@ -1428,6 +1667,10 @@ def write_release_gid_redirect(connection, MB_release_gid_redirect_data): def write_medium(connection, MB_medium_data): """Insert data in medium table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_medium_data: medium data fetched from MusicBrainz database. """ medium_query = text(""" INSERT INTO musicbrainz.medium @@ -1451,6 +1694,10 @@ def write_medium(connection, MB_medium_data): def write_track(connection, MB_track_data): """Insert data in track table in musicbrainz schema in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_track_data: track data fetched from MusicBrainz database. """ track_query = text(""" INSERT INTO musicbrainz.track From 259175738bd79340f1c837e716cdb3c9c20b67d3 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Thu, 5 Jul 2018 02:38:07 +0530 Subject: [PATCH 041/125] Import data by including more foreign key referenced tables --- db/import_mb_data.py | 307 +++++++++++++++++++++++++++++++++---------- 1 file changed, 236 insertions(+), 71 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 4120f55cb..06b1553ee 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -5,7 +5,7 @@ import time import logging -def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group_data, MB_track_data): +def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data): """Fetch artist_credit table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -16,6 +16,7 @@ def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group MB_release_data: release data fetched from MusicBrainz database. MB_release_group_data: release_group data fetched from MusicBrainz database. MB_track_data: track data fetched from MusicBrainz database. + MB_artist_credit_name_data: artist_credit_name_data fetched from MusicBrainz database. Returns: artist_credit data fetched from MusicBrainz database. """ @@ -23,13 +24,16 @@ def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group filter_data = {} # Get data corresponding to artist_credit column in release table - MB_release_fk_artist_credit = list({value['artist_credit'] for value in MB_release_data if value['artist_credit'] == value[3]}) + MB_release_fk_artist_credit = list({value['artist_credit'] for value in MB_release_data}) # Get data corresponding to artist_credit column in release_group table - MB_release_group_fk_artist_credit = list({value['artist_credit'] for value in MB_release_group_data if value['artist_credit'] == value[3]}) + MB_release_group_fk_artist_credit = list({value['artist_credit'] for value in MB_release_group_data}) # Get data corresponding to artist_credit column in track table - MB_track_fk_artist_credit = list({value['artist_credit'] for value in MB_track_data if value['artist_credit'] == value[7]}) + MB_track_fk_artist_credit = list({value['artist_credit'] for value in MB_track_data}) + + # Get data corresponding to artist_credit column in artist_credit_name table + MB_artist_credit_name_fk_artist_credit = list({value['artist_credit'] for value in MB_artist_credit_name_data}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -47,6 +51,10 @@ def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group filters.append("artist_credit.id in :track_data") filter_data["track_data"] = tuple(MB_track_fk_artist_credit) + if MB_artist_credit_name_data: + filters.append("artist_credit.id in :artist_credit_name_data") + filter_data["artist_credit_name_data"] = tuple(MB_artist_credit_name_fk_artist_credit) + filterstr = " OR ".join(filters) if filterstr: filterstr = " WHERE " + filterstr @@ -67,15 +75,36 @@ def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group return MB_artist_credit_data -def load_artist_type(connection, gids_in_AB): +def load_artist_type(connection, gids_in_AB, MB_artist_data): """Fetch artist_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + Also fetch data corresponding to artist table. + Args: connection: database connection to execute the query. + MB_artist_data: artist data fetched from MusicBrainz database. Returns: artist_type data fetched from MusicBrainz database. """ + filters = [] + filter_data = {} + + # Get data corresponding to type column in artist table + MB_artist_fk_artist_type = list({value['type'] for value in MB_artist_data}) + + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_artist_data: + filters.append("artist_type.id in :artist_data") + filter_data["artist_data"] = tuple(MB_artist_fk_artist_type) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr + artist_type_query = text(""" SELECT DISTINCT artist_type.id, artist_type.name, @@ -89,24 +118,46 @@ def load_artist_type(connection, gids_in_AB): INNER JOIN artist_credit ON artist.id = artist_credit.id INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid in :gids - """) - result = connection.execute(artist_type_query, {'gids': tuple(gids_in_AB)}) + ON artist_credit.id = recording.artist_credit + {filterstr} + """.format(filterstr=filterstr) + ) + result = connection.execute(artist_type_query, filter_data) MB_artist_type_data = result.fetchall() return MB_artist_type_data -def load_area_type(connection, gids_in_AB): +def load_area_type(connection, gids_in_AB, MB_area_data): """Fetch area_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + Also fetch data corresponding to area table. + Args: connection: database connection to execute the query. + MB_area_data: area data fetched from MusicBrainz database. Returns: area_type data fetched from MusicBrainz database. """ + filters = [] + filter_data = {} + + # Get data corresponding to type column in area table + MB_area_fk_area_type = list({value['type'] for value in MB_area_data}) + + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_area_data: + filters.append("area_type.id in :area_data") + filter_data["area_data"] = tuple(MB_area_fk_area_type) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr + area_type_query = text(""" SELECT DISTINCT area_type.id, area_type.name, @@ -122,10 +173,11 @@ def load_area_type(connection, gids_in_AB): INNER JOIN artist_credit ON artist.id = artist_credit.id INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid in :gids - """) - result = connection.execute(area_type_query, {'gids': tuple(gids_in_AB)}) + ON artist_credit.id = recording.artist_credit + {filterstr} + """.format(filterstr=filterstr) + ) + result = connection.execute(area_type_query, filter_data) MB_area_type_data = result.fetchall() return MB_area_type_data @@ -199,15 +251,34 @@ def load_end_area_type(connection, gids_in_AB): return MB_end_area_type_data -def load_release_status(connection, gids_in_AB): +def load_release_status(connection, gids_in_AB, MB_release_data): """Fetch release_status table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. Args: connection: database connection to execute the query. + MB_release_data: release data fetched from MusicBrainz database. Returns: release_status data fetched from MusicBrainz database. """ + filters = [] + filter_data = {} + + # Get data corresponding to status column in release table + MB_release_fk_status = list({value['status'] for value in MB_release_data}) + + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_release_data: + filters.append("release_status.id in :data") + filter_data["data"] = tuple(MB_release_fk_status) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr + release_status_query = text(""" SELECT DISTINCT release_status.id, release_status.name, @@ -218,11 +289,12 @@ def load_release_status(connection, gids_in_AB): FROM release_status INNER JOIN release ON release.status = release_status.id - INNER JOIN recording + INNER JOIN recording ON recording.artist_credit = release.artist_credit - WHERE recording.gid in :gids - """) - result = connection.execute(release_status_query, {'gids': tuple(gids_in_AB)}) + {filterstr} + """.format(filterstr=filterstr) + ) + result = connection.execute(release_status_query, filter_data) MB_release_status_data = result.fetchall() return MB_release_status_data @@ -244,7 +316,7 @@ def load_release_group_primary_type(connection, gids_in_AB, MB_release_group_dat filter_data = {} # Get data corresponding to release_group_primary_type column in release_group table - MB_release_group_fk_type = list({value['type'] for value in MB_release_group_data if value['type'] == value[4]}) + MB_release_group_fk_type = list({value['type'] for value in MB_release_group_data}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -311,7 +383,7 @@ def load_release_packaging(connection, gids_in_AB, MB_release_data): filter_data = {} # Get data corresponding to release_packaging column in release table - MB_release_fk_packaging = list({value['packaging'] for value in MB_release_data if value['packaging'] == value[6]}) + MB_release_fk_packaging = list({value['packaging'] for value in MB_release_data}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -347,15 +419,36 @@ def load_release_packaging(connection, gids_in_AB, MB_release_data): return MB_release_packaging_data -def load_language(connection, gids_in_AB): +def load_language(connection, gids_in_AB, MB_release_data): """Fetch language table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + Also fetch data corresponding to release table. + Args: connection: database connection to execute the query. + MB_release_data: release data fetched from MusicBrainz database. Returns: language data fetched from MusicBrainz database. """ + filters = [] + filter_data = {} + + # Get data corresponding to language column in release table + MB_release_fk_language = list({value['language'] for value in MB_release_data}) + + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_release_data: + filters.append("language.id in :data") + filter_data["data"] = tuple(MB_release_fk_language) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr + language_query = text(""" SELECT DISTINCT language.id, language.iso_code_2t, @@ -368,24 +461,46 @@ def load_language(connection, gids_in_AB): INNER JOIN release ON release.language = language.id INNER JOIN recording - ON recording.artist_credit=release.artist_credit - WHERE recording.gid in :gids - """) - result = connection.execute(language_query, {'gids': tuple(gids_in_AB)}) + ON recording.artist_credit = release.artist_credit + {filterstr} + """.format(filterstr=filterstr) + ) + result = connection.execute(language_query, filter_data) MB_language_data = result.fetchall() return MB_language_data -def load_script(connection, gids_in_AB): +def load_script(connection, gids_in_AB, MB_release_data): """Fetch script table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + Also fetch data corresponding to release table. + Args: connection: database connection to execute the query. + MB_release_data: release data fetched from MusicBrainz database. Returns: script data fetched from MusicBrainz database. """ + filters = [] + filter_data = {} + + # Get data corresponding to language column in release table + MB_release_fk_script = list({value['script'] for value in MB_release_data}) + + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_release_data: + filters.append("script.id in :data") + filter_data["data"] = tuple(MB_release_fk_script) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr + script_query = text(""" SELECT DISTINCT script.id, script.iso_code, @@ -397,9 +512,10 @@ def load_script(connection, gids_in_AB): ON release.script = script.id INNER JOIN recording ON recording.artist_credit = release.artist_credit - WHERE recording.gid in :gids - """) - result = connection.execute(script_query, {'gids': tuple(gids_in_AB)}) + {filterstr} + """.format(filterstr=filterstr) + ) + result = connection.execute(script_query, filter_data) MB_script_data = result.fetchall() return MB_script_data @@ -452,7 +568,7 @@ def load_area(connection, gids_in_AB, MB_artist_data): filter_data = {} # Get data corresponding to area column in artist table - MB_artist_fk_area = list({value['area'] for value in MB_artist_data if value['area'] == value[11]}) + MB_artist_fk_area = list({value['area'] for value in MB_artist_data}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -514,7 +630,7 @@ def load_begin_area(connection, gids_in_AB, MB_artist_data): filter_data = {} # Get data corresponding to begin_area column in artist table - MB_artist_fk_begin_area = list({value['begin_area'] for value in MB_artist_data if value['begin_area'] == value[17]}) + MB_artist_fk_begin_area = list({value['begin_area'] for value in MB_artist_data}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -560,15 +676,35 @@ def load_begin_area(connection, gids_in_AB, MB_artist_data): return MB_begin_area_data -def load_end_area(connection, gids_in_AB): +def load_end_area(connection, gids_in_AB, MB_artist_data): """Fetch area table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for end area column. + Also fetch data corresponding to artist table. + Args: connection: database connection to execute the query. Returns: end_area data fetched from MusicBrainz database. """ + filters = [] + filter_data = {} + + # Get data corresponding to end_area column in artist table + MB_artist_fk_end_area = list({value['end_area'] for value in MB_artist_data}) + + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_artist_data: + filters.append("area.id in :data") + filter_data["data"] = tuple(MB_artist_fk_end_area) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr + end_area_query = text(""" SELECT DISTINCT area.id, area.gid, @@ -591,9 +727,10 @@ def load_end_area(connection, gids_in_AB): ON artist.id = artist_credit.id INNER JOIN recording ON artist_credit.id = recording.artist_credit - WHERE recording.gid in :gids - """) - result = connection.execute(end_area_query, {'gids': tuple(gids_in_AB)}) + {filterstr} + """.format(filterstr=filterstr) + ) + result = connection.execute(end_area_query, filter_data) MB_end_area_data = result.fetchall() return MB_end_area_data @@ -627,7 +764,7 @@ def load_artist_credit_name(connection, gids_in_AB): return MB_artist_credit_name_data -def load_artist(connection, gids_in_AB, MB_artist_credit_name_data): +def load_artist(connection, gids_in_AB, MB_artist_credit_name_data, MB_artist_gid_redirect_data): """Fetch artist table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -636,6 +773,7 @@ def load_artist(connection, gids_in_AB, MB_artist_credit_name_data): Args: connection: database connection to execute the query. MB_artist_credit_name_data: artist credit name data fetched from MusicBrainz database. + MB_artist_gid_redirect_data: artist_gid_redirect_data fetched from MusicBrainz database. Returns: artist data fetched from MusicBrainz database. """ @@ -643,7 +781,10 @@ def load_artist(connection, gids_in_AB, MB_artist_credit_name_data): filter_data = {} # Get data corresponding to artist column in artist_credit_name table. - MB_artist_credit_name_fk_artist = list({value['artist'] for value in MB_artist_credit_name_data if value['artist'] == value[2]}) + MB_artist_credit_name_fk_artist = list({value['artist'] for value in MB_artist_credit_name_data}) + + # Get data corresponding to new_id column in artist_gid_redirect table. + MB_artist_gid_redirect_fk_artist = list({value['new_id'] for value in MB_artist_gid_redirect_data}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -653,6 +794,10 @@ def load_artist(connection, gids_in_AB, MB_artist_credit_name_data): filters.append("artist.id in :data") filter_data["data"] = tuple(MB_artist_credit_name_fk_artist) + if MB_artist_gid_redirect_data: + filters.append("artist.id in :redirect_data") + filter_data["redirect_data"] = tuple(MB_artist_gid_redirect_fk_artist) + filterstr = " OR ".join(filters) if filterstr: filterstr = " WHERE " + filterstr @@ -705,7 +850,7 @@ def load_artist_gid_redirect(connection, gids_in_AB): return MB_artist_gid_redirect_data -def load_recording(connection, gids_in_AB): +def load_recording(connection, gids_in_AB, MB_recording_gid_redirect_data): """Fetch recording table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -714,14 +859,33 @@ def load_recording(connection, gids_in_AB): Returns: recording data fetched from MusicBrainz database. """ + filters = [] + filter_data = {} + + # Get data corresponding to new_id column in recording_gid_redirect table. + MB_recording_gid_redirect_fk_recording = list({value['new_id'] for value in MB_recording_gid_redirect_data}) + + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_recording_gid_redirect_data: + filters.append("recording.id in :redirect_data") + filter_data["redirect_data"] = tuple(MB_recording_gid_redirect_fk_recording) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr + recording_query = text(""" SELECT DISTINCT recording.id, recording.gid, recording.name, recording.artist_credit, recording.length, recording.comment, recording.edits_pending, recording.last_updated, recording.video FROM recording - WHERE recording.gid in :gids - """) - result = connection.execute(recording_query, {'gids': tuple(gids_in_AB)}) + {filterstr} + """.format(filterstr=filterstr) + ) + result = connection.execute(recording_query, filter_data) MB_recording_data = result.fetchall() return MB_recording_data @@ -769,10 +933,10 @@ def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_dat filter_data = {} # Get data corresponding to release_group column in release_group_gid_redirect table. - MB_release_group_gid_redirect_fk_release_group = list({value['new_id'] for value in MB_release_group_gid_redirect_data if value['new_id'] == value[1]}) + MB_release_group_gid_redirect_fk_release_group = list({value['new_id'] for value in MB_release_group_gid_redirect_data}) # Get data corresponding to release_group column in release table. - MB_release_fk_release_group = list({value['release_group'] for value in MB_release_data if value['release_group'] == value[4]}) + MB_release_fk_release_group = list({value['release_group'] for value in MB_release_data}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -855,10 +1019,10 @@ def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect filter_data = {} # Get data corresponding to release column in medium table. - MB_medium_fk_release = list({value['release'] for value in MB_medium_data if value['release'] == value[1]}) + MB_medium_fk_release = list({value['release'] for value in MB_medium_data}) - # Get data corresponding to release column in release_gid_redirect table. - MB_release_gid_redirect_fk_release = list({value['new_id'] for value in MB_release_gid_redirect_data if value['new_id'] == value[1]}) + # Get data corresponding to new_id column in release_gid_redirect table. + MB_release_gid_redirect_fk_release = list({value['new_id'] for value in MB_release_gid_redirect_data}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -946,7 +1110,7 @@ def load_medium(connection, gids_in_AB, MB_track_data): filter_data = {} # Get data corresponding to medium column in track table. - MB_track_fk_medium = list({value['medium'] for value in MB_track_data if value['medium'] == value[3]}) + MB_track_fk_medium = list({value['medium'] for value in MB_track_data}) if gids_in_AB: filters.append("recording.gid in :gids") @@ -1762,24 +1926,39 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): except ValueError: logging.info("No Data found from artist credit name table for the recordings") + # artist_gid_redirect + try: + logging.info('Getting artist gid redirect data...') + MB_artist_gid_redirect_data = load_artist_gid_redirect(connection, gids_in_AB) + except ValueError: + logging.info("No Data found from artist gid redirect table for the recordings") + # artist try: logging.info('Getting artist data...') - MB_artist_data = load_artist(connection, gids_in_AB, MB_artist_credit_name_data) + MB_artist_data = load_artist(connection, gids_in_AB, MB_artist_credit_name_data, MB_artist_gid_redirect_data) except ValueError: logging.info("No Data found from artist table for the recordings") # artist_type try: logging.info('Getting artist type data...') - MB_artist_type_data = load_artist_type(connection, gids_in_AB) + MB_artist_type_data = load_artist_type(connection, gids_in_AB, MB_artist_data) except ValueError: logging.info("No Data found from artist type table for the recordings") + + # recording_gid_redirect + try: + logging.info('Getting recording gid redirect data...') + MB_recording_gid_redirect_data = load_recording_gid_redirect(connection, gids_in_AB) + except ValueError: + logging.info("No Data found from recording gid redirect table for the recordings") + # recording try: logging.info('Getting recording data...') - MB_recording_data = load_recording(connection, gids_in_AB) + MB_recording_data = load_recording(connection, gids_in_AB, MB_recording_gid_redirect_data) except ValueError: logging.info("No Data found from recording table for the recordings") @@ -1800,14 +1979,14 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # end_area try: logging.info('Getting end area data...') - MB_end_area_data = load_end_area(connection, gids_in_AB) + MB_end_area_data = load_end_area(connection, gids_in_AB, MB_artist_data) except ValueError: logging.info("No Data found from area table for the recordings") # area_type try: logging.info('Getting area type data...') - MB_area_type_data = load_area_type(connection, gids_in_AB) + MB_area_type_data = load_area_type(connection, gids_in_AB, MB_area_data) except ValueError: logging.info("No Data found from area type table for the recordings") @@ -1825,13 +2004,6 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): except ValueError: logging.info("No Data found from area type table for the recordings") - # artist_gid_redirect - try: - logging.info('Getting artist gid redirect data...') - MB_artist_gid_redirect_data = load_artist_gid_redirect(connection, gids_in_AB) - except ValueError: - logging.info("No Data found from artist gid redirect table for the recordings") - # gender try: logging.info('Getting gender data...') @@ -1842,7 +2014,7 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # language try: logging.info('Getting language data...') - MB_language_data = load_language(connection, gids_in_AB) + MB_language_data = load_language(connection, gids_in_AB, MB_release_data) except ValueError: logging.info("No Data found from language table for the recordings") @@ -1853,13 +2025,6 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): except ValueError: logging.info("No Data found from medium format table for the recordings") - # recording_gid_redirect - try: - logging.info('Getting recording gid redirect data...') - MB_recording_gid_redirect_data = load_recording_gid_redirect(connection, gids_in_AB) - except ValueError: - logging.info("No Data found from recording gid redirect table for the recordings") - # release_group gid redirect try: logging.info('Getting release group gid redirect data...') @@ -1877,7 +2042,7 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # artist_credit try: logging.info('Getting artist credit data...') - MB_artist_credit_data = load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group_data, MB_track_data) + MB_artist_credit_data = load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data) except ValueError: logging.info("No Data found from artist credit table for the recordings") @@ -1898,14 +2063,14 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # release_status try: logging.info('Getting release status data...') - MB_release_status_data = load_release_status(connection, gids_in_AB) + MB_release_status_data = load_release_status(connection, gids_in_AB, MB_release_data) except ValueError: logging.info("No Data found from release status table for the recordings") # script try: logging.info('Getting script data...') - MB_script_data = load_script(connection, gids_in_AB) + MB_script_data = load_script(connection, gids_in_AB, MB_release_data) except ValueError: logging.info("No Data found from script table for the recordings") From 5042cba2e30335ebf968ce7253eb98d68baf9d77 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Thu, 5 Jul 2018 14:38:02 +0530 Subject: [PATCH 042/125] Functions to import and write data for track gid redirect table --- db/import_mb_data.py | 58 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 06b1553ee..59b2631b3 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -1181,6 +1181,32 @@ def load_track(connection, gids_in_AB): return MB_track_data +def load_track_gid_redirect(connection, gids_in_AB): + """Fetch track_gid_redirect table data from MusicBrainz database for the + recording MBIDs in AcousticBrainz database. + + Args: + connection: database connection to execute the query. + Returns: + track_gid_redirect data fetched from MusicBrainz database. + """ + track_gid_redirect_query = text(""" + SELECT DISTINCT track_gid_redirect.gid, + track_gid_redirect.new_id, + track_gid_redirect.created + FROM track_gid_redirect + INNER JOIN track + ON track.id = track_gid_redirect.new_id + INNER JOIN recording + ON recording.id = track.recording + WHERE recording.gid in :gids + """) + result = connection.execute(track_gid_redirect_query, {'gids': tuple(gids_in_AB)}) + MB_track_gid_redirect_data = result.fetchall() + + return MB_track_gid_redirect_data + + def write_artist_credit(connection, MB_artist_credit_data): """Insert data into artist_credit table in musicbrainz schema in AcousticBrainz database. @@ -1887,10 +1913,39 @@ def write_track(connection, MB_track_data): logging.info('Inserted %d rows in track table!' % len(MB_track_data)) +def write_track_gid_redirect(connection, MB_track_gid_redirect_data): + """Insert data in track_gid_redirect table in musicbrainz schema in + AcousticBrainz database. + + Args: + connection: database connection to execute the query. + MB_track_gid_redirect_data: track_gid_redirect data fetched from MusicBrainz database. + """ + track_gid_redirect_query = text(""" + INSERT INTO musicbrainz.track_gid_redirect + VALUES (:gid, :new_id, :created) + ON CONFLICT (gid) DO NOTHING + """) + values = [{ + "gid": value[0], + "new_id": value[1], + "created": value[2]} for value in MB_track_gid_redirect_data + ] + connection.execute(track_gid_redirect_query, values) + logging.info('Inserted %d rows in track gid redirect table!' % len(MB_track_gid_redirect_data)) + + def fetch_and_insert_musicbrainz_data(gids_in_AB): # Get MusicBrainz data logging.info('\nGetting %d recordings data at a time...\n' % (len(gids_in_AB))) with musicbrainz_db.engine.begin() as connection: + # track_gid_redirect + try: + logging.info('Getting track gid redirect data...') + MB_track_gid_redirect_data = load_track_gid_redirect(connection, gids_in_AB) + except ValueError: + logging.info("No Data found from track gid redirect table for the recordings") + # track try: logging.info('Getting track data...') @@ -2155,6 +2210,9 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): if MB_track_data: write_track(connection, MB_track_data) + if MB_track_gid_redirect_data: + write_track_gid_redirect(connection, MB_track_gid_redirect_data) + def start_import(): with db.engine.begin() as connection: From 8ff1ecb16baaa6729fc57c8f7ce9b5f3061681be Mon Sep 17 00:00:00 2001 From: rsh7 Date: Thu, 5 Jul 2018 15:02:19 +0530 Subject: [PATCH 043/125] Import data in track table for every new_id in track gid redirect data --- db/import_mb_data.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 59b2631b3..57a0cba1c 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -1148,15 +1148,36 @@ def load_medium(connection, gids_in_AB, MB_track_data): return MB_medium_data -def load_track(connection, gids_in_AB): +def load_track(connection, gids_in_AB, MB_track_gid_redirect_data): """Fetch track table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. + Also fetch data corresponding to track_gid_redirect table. + Args: connection: database connection to execute the query. + MB_track_gid_redirect_data: track gid redirect data fetched from MusicBrainz database Returns: track data fetched from MusicBrainz database. """ + filters = [] + filter_data = {} + + # Get data corresponding to new_id column in track_gid_redirect table. + MB_track_gid_redirect_fk_track = list({value['new_id'] for value in MB_track_gid_redirect_data}) + + if gids_in_AB: + filters.append("recording.gid in :gids") + filter_data["gids"] = tuple(gids_in_AB) + + if MB_track_gid_redirect_data: + filters.append("track.id in :redirect_data") + filter_data["redirect_data"] = tuple(MB_track_gid_redirect_fk_track) + + filterstr = " OR ".join(filters) + if filterstr: + filterstr = " WHERE " + filterstr + track_query = text(""" SELECT DISTINCT track.id, track.gid, @@ -1173,9 +1194,10 @@ def load_track(connection, gids_in_AB): FROM track INNER JOIN recording ON track.recording = recording.id - WHERE recording.gid in :gids - """) - result = connection.execute(track_query, {'gids': tuple(gids_in_AB)}) + {filterstr} + """.format(filterstr=filterstr) + ) + result = connection.execute(track_query, filter_data) MB_track_data = result.fetchall() return MB_track_data @@ -1949,7 +1971,7 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # track try: logging.info('Getting track data...') - MB_track_data = load_track(connection, gids_in_AB) + MB_track_data = load_track(connection, gids_in_AB, MB_track_gid_redirect_data) except ValueError: logging.info("No Data found from track table for the recordings") From 24eaf6c3877bd5f3b9e79fcd9cfae03f6b1dcf2c Mon Sep 17 00:00:00 2001 From: rsh7 Date: Thu, 5 Jul 2018 20:04:11 +0530 Subject: [PATCH 044/125] Reorder the import of a module --- db/import_mb_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 57a0cba1c..b935bf094 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -1,9 +1,9 @@ import db from brainzutils import musicbrainz_db from sqlalchemy import text -from flask import current_app import time import logging +from flask import current_app def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data): """Fetch artist_credit table data from MusicBrainz database for the From 8ffcf1e0d9705942a0679556883b3977f44a92b4 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Mon, 9 Jul 2018 01:07:31 +0530 Subject: [PATCH 045/125] Add more explainations about the arguments in docstrings --- db/import_mb_data.py | 63 ++++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index b935bf094..b7da6cd74 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -13,9 +13,12 @@ def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group Args: connection: database connection to execute the query. - MB_release_data: release data fetched from MusicBrainz database. - MB_release_group_data: release_group data fetched from MusicBrainz database. - MB_track_data: track data fetched from MusicBrainz database. + MB_release_data (of type - sqlalchemy.resultproxy): data retrieved from the release table + of the MusicBrainz database (should contain artist credit values). + MB_release_group_data (of type - sqlalchemy.resultproxy): data retrieved from the release_group + table of the MusicBrainz database (should contain artist credit values). + MB_track_data (of type - sqlalchemy.resultproxy): data retrieved from the track table of the + MusicBrainz database (should contain artist credit values). MB_artist_credit_name_data: artist_credit_name_data fetched from MusicBrainz database. Returns: artist_credit data fetched from MusicBrainz database. @@ -83,7 +86,8 @@ def load_artist_type(connection, gids_in_AB, MB_artist_data): Args: connection: database connection to execute the query. - MB_artist_data: artist data fetched from MusicBrainz database. + MB_artist_data (of type - sqlalchemy.resultproxy): data retrieved from the artist + table of the MusicBrainz database (should contain artist type values). Returns: artist_type data fetched from MusicBrainz database. """ @@ -136,7 +140,8 @@ def load_area_type(connection, gids_in_AB, MB_area_data): Args: connection: database connection to execute the query. - MB_area_data: area data fetched from MusicBrainz database. + MB_area_data (of type - sqlalchemy.resultproxy): data retrieved from the area table + of the MusicBrainz database(should contain area type values). Returns: area_type data fetched from MusicBrainz database. """ @@ -257,7 +262,8 @@ def load_release_status(connection, gids_in_AB, MB_release_data): Args: connection: database connection to execute the query. - MB_release_data: release data fetched from MusicBrainz database. + MB_release_data (of type - sqlalchemy.resultproxy): data retrieved from the release table + of the MusicBrainz database (should contain release_status values). Returns: release_status data fetched from MusicBrainz database. """ @@ -308,7 +314,9 @@ def load_release_group_primary_type(connection, gids_in_AB, MB_release_group_dat Args: connection: database connection to execute the query. - MB_release_group_data: release group data fetched from MusicBrainz database. + MB_release_group_data (of type - sqlalchemy.resultproxy): data retrieved from the + release_group table of the MusicBrainz database + (should contain release_group_primary_type values). Returns: release_group_primary_type data fetched from MusicBrainz database. """ @@ -375,7 +383,8 @@ def load_release_packaging(connection, gids_in_AB, MB_release_data): Args: connection: database connection to execute the query. - MB_release_data: release data fetched from MusicBrainz database. + MB_release_data (of type - sqlalchemy.resultproxy): data retrieved from the + release table of the MusicBrainz database (should contain release_packaging values) Returns: release_packaging data fetched from MusicBrainz database. """ @@ -427,7 +436,8 @@ def load_language(connection, gids_in_AB, MB_release_data): Args: connection: database connection to execute the query. - MB_release_data: release data fetched from MusicBrainz database. + MB_release_data(of type - sqlalchemy.resultproxy): data retrieved from the + release table of the MusicBrainz database (should contain language values). Returns: language data fetched from MusicBrainz database. """ @@ -479,7 +489,8 @@ def load_script(connection, gids_in_AB, MB_release_data): Args: connection: database connection to execute the query. - MB_release_data: release data fetched from MusicBrainz database. + MB_release_data (of type - sqlalchemy.resultproxy): data retrieved from the + release table of the MusicBrainz database (should contain script values). Returns: script data fetched from MusicBrainz database. """ @@ -560,7 +571,8 @@ def load_area(connection, gids_in_AB, MB_artist_data): Args: connection: database connection to execute the query. - MB_artist_data: artist data fetched from MusicBrainz database. + MB_artist_data(of type - sqlalchemy.resultproxy): data retrieved from the + artist table of the MusicBrainz database (should contain area values). Returns: area data fetched from MusicBrainz database. """ @@ -622,7 +634,8 @@ def load_begin_area(connection, gids_in_AB, MB_artist_data): Args: connection: database connection to execute the query. - MB_artist_data: artist data fetched from MusicBrainz database. + MB_artist_data (of type - sqlalchemy.resultproxy): data retrieved from the + artist table of the MusicBrainz database (should contain begin_area values). Returns: begin_area data fetched from MusicBrainz database. """ @@ -772,8 +785,10 @@ def load_artist(connection, gids_in_AB, MB_artist_credit_name_data, MB_artist_gi Args: connection: database connection to execute the query. - MB_artist_credit_name_data: artist credit name data fetched from MusicBrainz database. - MB_artist_gid_redirect_data: artist_gid_redirect_data fetched from MusicBrainz database. + MB_artist_credit_name_data(of type - sqlalchemy.resultproxy): data retrieved from the + artist_credit_name table of the MusicBrainz database (should contain artist values). + MB_artist_gid_redirect_data(of type - sqlalchemy.resultproxy): data retrieved from the + artist_gid_redirect table of the MusicBrainz database (should contain artist values). Returns: artist data fetched from MusicBrainz database. """ @@ -924,8 +939,11 @@ def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_dat Args: connection: database connection to execute the query. - MB_release_group_gid_redirect_data: release group gid redirect data fetched from MusicBrainz database. - MB_release_data: release data fetched from MusicBrainz database. + MB_release_group_gid_redirect_data (of type - sqlalchemy.resultproxy): data retrieved from the + release_group_gid_redirect table of the MusicBrainz database + (should contain release_group values). + MB_release_data(of type - sqlalchemy.resultproxy): data retrieved from the + release table of the MusicBrainz database (should contain release_group values). Returns: release_group data fetched from MusicBrainz database. """ @@ -1010,8 +1028,11 @@ def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect Args: connection: database connection to execute the query. - MB_medium_data: medium data fetched from MusicBrainz database. - MB_release_gid_redirect_data: release_gid_redirect data fetched from MusicBrainz database. + MB_medium_data(of type - sqlalchemy.resultproxy): data retrieved from the + medium table of the MusicBrainz database (should contain release values). + MB_release_gid_redirect_data (of type - sqlalchemy.resultproxy): data retrieved from the + release_gid_redirect table of the MusicBrainz database + (should contain release values). Returns: release data fetched from MusicBrainz database. """ @@ -1102,7 +1123,8 @@ def load_medium(connection, gids_in_AB, MB_track_data): Args: connection: database connection to execute the query. - MB_track_data: track data fetched from MusicBrainz database. + MB_track_data (of type - sqlalchemy.resultproxy): data retrieved from the + track table of the MusicBrainz database (should contain medium values). Returns: medium data fetched from MusicBrainz database. """ @@ -1156,7 +1178,8 @@ def load_track(connection, gids_in_AB, MB_track_gid_redirect_data): Args: connection: database connection to execute the query. - MB_track_gid_redirect_data: track gid redirect data fetched from MusicBrainz database + MB_track_gid_redirect_data (of type - sqlalchemy.resultproxy): data retrieved from the + track_gid_redirect table of the MusicBrainz database (should contain track values). Returns: track data fetched from MusicBrainz database. """ From a0fc75c05827d1e2a872ae2a68d2ccb2a3392259 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Wed, 11 Jul 2018 23:36:07 +0530 Subject: [PATCH 046/125] Enable logging.info to log messages in the mb importer script --- db/import_mb_data.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index b7da6cd74..8f67c2c74 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -5,6 +5,8 @@ import logging from flask import current_app +logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) + def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data): """Fetch artist_credit table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -1982,7 +1984,7 @@ def write_track_gid_redirect(connection, MB_track_gid_redirect_data): def fetch_and_insert_musicbrainz_data(gids_in_AB): # Get MusicBrainz data - logging.info('\nGetting %d recordings data at a time...\n' % (len(gids_in_AB))) + logging.info('Getting %d recordings data at a time...\n' % (len(gids_in_AB))) with musicbrainz_db.engine.begin() as connection: # track_gid_redirect try: @@ -2169,13 +2171,13 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # script try: - logging.info('Getting script data...') - MB_script_data = load_script(connection, gids_in_AB, MB_release_data) + logging.info('Getting script data...\n') + MB_script_data = load_script(connection, MB_release_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from script table for the recordings") # Write MusicBrainz data into AcousticBrainz database - logging.info('\nInserting %d recordings data at a time...\n' % (len(gids_in_AB))) + logging.info('Inserting %d recordings data at a time...\n' % (len(gids_in_AB))) with db.engine.begin() as connection: if MB_artist_credit_data: write_artist_credit(connection, MB_artist_credit_data) From 37bad8e2d1c3b121c008b74eae48d8cd7b4606d9 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Thu, 12 Jul 2018 01:00:22 +0530 Subject: [PATCH 047/125] Write correct arguments for load_script function --- db/import_mb_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 8f67c2c74..209b84933 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -2172,7 +2172,7 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # script try: logging.info('Getting script data...\n') - MB_script_data = load_script(connection, MB_release_data, artist_credit_from_recording) + MB_script_data = load_script(connection, gids_in_AB, MB_release_data) except ValueError: logging.info("No Data found from script table for the recordings") From 4946b31b015c4ce28275992b6f5876d662e37381 Mon Sep 17 00:00:00 2001 From: RashiSah Date: Thu, 28 Jun 2018 19:07:02 +0530 Subject: [PATCH 048/125] Reduce import time by removing inner joins with recording table and remove distinct from query --- db/import_mb_data.py | 341 +++++++++++++++++++------------------------ 1 file changed, 151 insertions(+), 190 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 209b84933..a66c4bebe 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -7,7 +7,7 @@ logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) -def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data): +def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data, artist_credit_from_recording): """Fetch artist_credit table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -40,9 +40,9 @@ def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group # Get data corresponding to artist_credit column in artist_credit_name table MB_artist_credit_name_fk_artist_credit = list({value['artist_credit'] for value in MB_artist_credit_name_data}) - if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + if artist_credit_from_recording: + filters.append("artist_credit.id in :ids") + filter_data["ids"] = tuple(artist_credit_from_recording) if MB_release_data: filters.append("artist_credit.id in :release_data") @@ -65,11 +65,9 @@ def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group filterstr = " WHERE " + filterstr artist_credit_query = text(""" - SELECT DISTINCT artist_credit.id, artist_credit.name, artist_credit.artist_count, + SELECT artist_credit.id, artist_credit.name, artist_credit.artist_count, artist_credit.ref_count, artist_credit.created FROM artist_credit - INNER JOIN recording - ON artist_credit.id = recording.artist_credit {filterstr} """.format(filterstr=filterstr) ) @@ -80,7 +78,7 @@ def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group return MB_artist_credit_data -def load_artist_type(connection, gids_in_AB, MB_artist_data): +def load_artist_type(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording): """Fetch artist_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -99,9 +97,9 @@ def load_artist_type(connection, gids_in_AB, MB_artist_data): # Get data corresponding to type column in artist table MB_artist_fk_artist_type = list({value['type'] for value in MB_artist_data}) - if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + if artist_credit_from_recording: + filters.append("artist_credit.id in :ids") + filter_data["ids"] = tuple(artist_credit_from_recording) if MB_artist_data: filters.append("artist_type.id in :artist_data") @@ -112,7 +110,7 @@ def load_artist_type(connection, gids_in_AB, MB_artist_data): filterstr = " WHERE " + filterstr artist_type_query = text(""" - SELECT DISTINCT artist_type.id, + SELECT artist_type.id, artist_type.name, artist_type.parent, artist_type.child_order, @@ -123,8 +121,6 @@ def load_artist_type(connection, gids_in_AB, MB_artist_data): ON artist.type = artist_type.id INNER JOIN artist_credit ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit {filterstr} """.format(filterstr=filterstr) ) @@ -134,7 +130,7 @@ def load_artist_type(connection, gids_in_AB, MB_artist_data): return MB_artist_type_data -def load_area_type(connection, gids_in_AB, MB_area_data): +def load_area_type(connection, gids_in_AB, MB_area_data, artist_credit_from_recording): """Fetch area_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -153,9 +149,9 @@ def load_area_type(connection, gids_in_AB, MB_area_data): # Get data corresponding to type column in area table MB_area_fk_area_type = list({value['type'] for value in MB_area_data}) - if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + if artist_credit_from_recording: + filters.append("artist_credit.id in :ids") + filter_data["ids"] = tuple(artist_credit_from_recording) if MB_area_data: filters.append("area_type.id in :area_data") @@ -166,7 +162,7 @@ def load_area_type(connection, gids_in_AB, MB_area_data): filterstr = " WHERE " + filterstr area_type_query = text(""" - SELECT DISTINCT area_type.id, + SELECT area_type.id, area_type.name, area_type.parent, area_type.child_order, @@ -179,8 +175,6 @@ def load_area_type(connection, gids_in_AB, MB_area_data): ON area.id = artist.area INNER JOIN artist_credit ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit {filterstr} """.format(filterstr=filterstr) ) @@ -190,7 +184,7 @@ def load_area_type(connection, gids_in_AB, MB_area_data): return MB_area_type_data -def load_begin_area_type(connection, gids_in_AB): +def load_begin_area_type(connection, gids_in_AB, artist_credit_from_recording): """Fetch area_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for the begin area column in artist table. @@ -201,7 +195,7 @@ def load_begin_area_type(connection, gids_in_AB): begin_area_type data fetched from MusicBrainz database. """ begin_area_type_query = text(""" - SELECT DISTINCT area_type.id, + SELECT area_type.id, area_type.name, area_type.parent, area_type.child_order, @@ -214,17 +208,15 @@ def load_begin_area_type(connection, gids_in_AB): ON area.id = artist.begin_area INNER JOIN artist_credit ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid in :gids + WHERE artist_credit.id in :data """) - result = connection.execute(begin_area_type_query, {'gids': tuple(gids_in_AB)}) + result = connection.execute(begin_area_type_query, {'data': tuple(artist_credit_from_recording)}) MB_begin_area_type_data = result.fetchall() return MB_begin_area_type_data -def load_end_area_type(connection, gids_in_AB): +def load_end_area_type(connection, gids_in_AB, artist_credit_from_recording): """Fetch area_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for the end area column in artist table. @@ -235,7 +227,7 @@ def load_end_area_type(connection, gids_in_AB): end_area_type data fetched from MusicBrainz database. """ end_area_type_query = text(""" - SELECT DISTINCT area_type.id, + SELECT area_type.id, area_type.name, area_type.parent, area_type.child_order, @@ -248,17 +240,15 @@ def load_end_area_type(connection, gids_in_AB): ON area.id = artist.end_area INNER JOIN artist_credit ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid in :gids + WHERE artist_credit.id in :data """) - result = connection.execute(end_area_type_query, {'gids': tuple(gids_in_AB)}) + result = connection.execute(end_area_type_query, {'data': tuple(artist_credit_from_recording)}) MB_end_area_type_data = result.fetchall() return MB_end_area_type_data -def load_release_status(connection, gids_in_AB, MB_release_data): +def load_release_status(connection, gids_in_AB, MB_release_data, artist_credit_from_recording): """Fetch release_status table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -275,9 +265,9 @@ def load_release_status(connection, gids_in_AB, MB_release_data): # Get data corresponding to status column in release table MB_release_fk_status = list({value['status'] for value in MB_release_data}) - if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + if artist_credit_from_recording: + filters.append("release.artist_credit in :ids") + filter_data["ids"] = tuple(artist_credit_from_recording) if MB_release_data: filters.append("release_status.id in :data") @@ -288,7 +278,7 @@ def load_release_status(connection, gids_in_AB, MB_release_data): filterstr = " WHERE " + filterstr release_status_query = text(""" - SELECT DISTINCT release_status.id, + SELECT release_status.id, release_status.name, release_status.parent, release_status.child_order, @@ -297,8 +287,6 @@ def load_release_status(connection, gids_in_AB, MB_release_data): FROM release_status INNER JOIN release ON release.status = release_status.id - INNER JOIN recording - ON recording.artist_credit = release.artist_credit {filterstr} """.format(filterstr=filterstr) ) @@ -308,7 +296,7 @@ def load_release_status(connection, gids_in_AB, MB_release_data): return MB_release_status_data -def load_release_group_primary_type(connection, gids_in_AB, MB_release_group_data): +def load_release_group_primary_type(connection, gids_in_AB, MB_release_group_data, artist_credit_from_recording): """Fetch release_group_primary_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -328,9 +316,9 @@ def load_release_group_primary_type(connection, gids_in_AB, MB_release_group_dat # Get data corresponding to release_group_primary_type column in release_group table MB_release_group_fk_type = list({value['type'] for value in MB_release_group_data}) - if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + if artist_credit_from_recording: + filters.append("release_group.artist_credit in :credit") + filter_data["credit"] = tuple(artist_credit_from_recording) if MB_release_group_data: filters.append("release_group_primary_type.id in :data") @@ -341,13 +329,12 @@ def load_release_group_primary_type(connection, gids_in_AB, MB_release_group_dat filterstr = " WHERE " + filterstr release_group_primary_type_query = text(""" - SELECT DISTINCT release_group_primary_type.id, release_group_primary_type.name, + SELECT release_group_primary_type.id, release_group_primary_type.name, release_group_primary_type.parent, release_group_primary_type.child_order, release_group_primary_type.description, release_group_primary_type.gid - FROM release_group_primary_type INNER JOIN release_group + FROM release_group_primary_type + INNER JOIN release_group ON release_group_primary_type.id = release_group.type - INNER JOIN recording - ON recording.artist_credit = release_group.artist_credit {filterstr} """.format(filterstr=filterstr) ) @@ -377,7 +364,7 @@ def load_medium_format(connection, gids_in_AB): return MB_medium_format_data -def load_release_packaging(connection, gids_in_AB, MB_release_data): +def load_release_packaging(connection, gids_in_AB, MB_release_data, artist_credit_from_recording): """Fetch release_packaging table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -396,9 +383,9 @@ def load_release_packaging(connection, gids_in_AB, MB_release_data): # Get data corresponding to release_packaging column in release table MB_release_fk_packaging = list({value['packaging'] for value in MB_release_data}) - if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + if artist_credit_from_recording: + filters.append("release.artist_credit in :credit") + filter_data["credit"] = tuple(artist_credit_from_recording) if MB_release_data: filters.append("release_packaging.id in :data") @@ -409,7 +396,7 @@ def load_release_packaging(connection, gids_in_AB, MB_release_data): filterstr = " WHERE " + filterstr release_packaging_query = text(""" - SELECT DISTINCT release_packaging.id, + SELECT release_packaging.id, release_packaging.name, release_packaging.parent, release_packaging.child_order, @@ -418,8 +405,6 @@ def load_release_packaging(connection, gids_in_AB, MB_release_data): FROM release_packaging INNER JOIN release ON release.packaging = release_packaging.id - INNER JOIN recording - ON recording.artist_credit = release.artist_credit {filterstr} """.format(filterstr=filterstr) ) @@ -430,7 +415,7 @@ def load_release_packaging(connection, gids_in_AB, MB_release_data): return MB_release_packaging_data -def load_language(connection, gids_in_AB, MB_release_data): +def load_language(connection, gids_in_AB, MB_release_data, artist_credit_from_recording): """Fetch language table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -449,9 +434,9 @@ def load_language(connection, gids_in_AB, MB_release_data): # Get data corresponding to language column in release table MB_release_fk_language = list({value['language'] for value in MB_release_data}) - if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + if artist_credit_from_recording: + filters.append("release.artist_credit in :ids") + filter_data["ids"] = tuple(artist_credit_from_recording) if MB_release_data: filters.append("language.id in :data") @@ -462,7 +447,7 @@ def load_language(connection, gids_in_AB, MB_release_data): filterstr = " WHERE " + filterstr language_query = text(""" - SELECT DISTINCT language.id, + SELECT language.id, language.iso_code_2t, language.iso_code_2b, language.iso_code_1, @@ -472,8 +457,6 @@ def load_language(connection, gids_in_AB, MB_release_data): FROM language INNER JOIN release ON release.language = language.id - INNER JOIN recording - ON recording.artist_credit = release.artist_credit {filterstr} """.format(filterstr=filterstr) ) @@ -483,7 +466,7 @@ def load_language(connection, gids_in_AB, MB_release_data): return MB_language_data -def load_script(connection, gids_in_AB, MB_release_data): +def load_script(connection, gids_in_AB, MB_release_data, artist_credit_from_recording): """Fetch script table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -502,9 +485,9 @@ def load_script(connection, gids_in_AB, MB_release_data): # Get data corresponding to language column in release table MB_release_fk_script = list({value['script'] for value in MB_release_data}) - if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + if artist_credit_from_recording: + filters.append("release.artist_credit in :ids") + filter_data["ids"] = tuple(artist_credit_from_recording) if MB_release_data: filters.append("script.id in :data") @@ -515,7 +498,7 @@ def load_script(connection, gids_in_AB, MB_release_data): filterstr = " WHERE " + filterstr script_query = text(""" - SELECT DISTINCT script.id, + SELECT script.id, script.iso_code, script.iso_number, script.name, @@ -523,8 +506,6 @@ def load_script(connection, gids_in_AB, MB_release_data): FROM script INNER JOIN release ON release.script = script.id - INNER JOIN recording - ON recording.artist_credit = release.artist_credit {filterstr} """.format(filterstr=filterstr) ) @@ -534,7 +515,7 @@ def load_script(connection, gids_in_AB, MB_release_data): return MB_script_data -def load_gender(connection, gids_in_AB): +def load_gender(connection, gids_in_AB, artist_credit_from_recording): """ Fetch gender table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -544,7 +525,7 @@ def load_gender(connection, gids_in_AB): gender data fetched from MusicBrainz database. """ gender_query = text(""" - SELECT DISTINCT gender.id, + SELECT gender.id, gender.name, gender.parent, gender.child_order, @@ -555,17 +536,15 @@ def load_gender(connection, gids_in_AB): ON artist.gender = gender.id INNER JOIN artist_credit ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid in :gids + WHERE artist_credit.id in :data """) - result = connection.execute(gender_query, {'gids': tuple(gids_in_AB)}) + result = connection.execute(gender_query, {'data': tuple(artist_credit_from_recording)}) MB_gender_data = result.fetchall() return MB_gender_data -def load_area(connection, gids_in_AB, MB_artist_data): +def load_area(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording): """ Fetch area table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -584,9 +563,9 @@ def load_area(connection, gids_in_AB, MB_artist_data): # Get data corresponding to area column in artist table MB_artist_fk_area = list({value['area'] for value in MB_artist_data}) - if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + if artist_credit_from_recording: + filters.append("artist_credit.id in :credit") + filter_data["credit"] = tuple(artist_credit_from_recording) if MB_artist_data: filters.append("area.id in :data") @@ -597,7 +576,7 @@ def load_area(connection, gids_in_AB, MB_artist_data): filterstr = " WHERE " + filterstr area_query = text(""" - SELECT DISTINCT area.id, + SELECT area.id, area.gid, area.name, area.type, @@ -616,8 +595,6 @@ def load_area(connection, gids_in_AB, MB_artist_data): ON area.id = artist.area INNER JOIN artist_credit ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit {filterstr} """.format(filterstr=filterstr) ) @@ -628,7 +605,7 @@ def load_area(connection, gids_in_AB, MB_artist_data): return MB_area_data -def load_begin_area(connection, gids_in_AB, MB_artist_data): +def load_begin_area(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording): """Fetch area table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for begin area column. @@ -647,9 +624,9 @@ def load_begin_area(connection, gids_in_AB, MB_artist_data): # Get data corresponding to begin_area column in artist table MB_artist_fk_begin_area = list({value['begin_area'] for value in MB_artist_data}) - if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + if artist_credit_from_recording: + filters.append("artist_credit.id in :credit") + filter_data["credit"] = tuple(artist_credit_from_recording) if MB_artist_data: filters.append("area.id in :data") @@ -660,7 +637,7 @@ def load_begin_area(connection, gids_in_AB, MB_artist_data): filterstr = " WHERE " + filterstr begin_area_query = text(""" - SELECT DISTINCT area.id, + SELECT area.id, area.gid, area.name, area.type, @@ -679,8 +656,6 @@ def load_begin_area(connection, gids_in_AB, MB_artist_data): ON area.id = artist.begin_area INNER JOIN artist_credit ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit {filterstr} """.format(filterstr=filterstr) ) @@ -691,7 +666,7 @@ def load_begin_area(connection, gids_in_AB, MB_artist_data): return MB_begin_area_data -def load_end_area(connection, gids_in_AB, MB_artist_data): +def load_end_area(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording): """Fetch area table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for end area column. @@ -708,9 +683,9 @@ def load_end_area(connection, gids_in_AB, MB_artist_data): # Get data corresponding to end_area column in artist table MB_artist_fk_end_area = list({value['end_area'] for value in MB_artist_data}) - if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + if artist_credit_from_recording: + filters.append("artist_credit.id in :ids") + filter_data["ids"] = tuple(artist_credit_from_recording) if MB_artist_data: filters.append("area.id in :data") @@ -721,7 +696,7 @@ def load_end_area(connection, gids_in_AB, MB_artist_data): filterstr = " WHERE " + filterstr end_area_query = text(""" - SELECT DISTINCT area.id, + SELECT area.id, area.gid, area.name, area.type, @@ -740,8 +715,6 @@ def load_end_area(connection, gids_in_AB, MB_artist_data): ON area.id = artist.end_area INNER JOIN artist_credit ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit {filterstr} """.format(filterstr=filterstr) ) @@ -751,7 +724,7 @@ def load_end_area(connection, gids_in_AB, MB_artist_data): return MB_end_area_data -def load_artist_credit_name(connection, gids_in_AB): +def load_artist_credit_name(connection, gids_in_AB, artist_credit_from_recording): """Fetch artist_credit_name table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -761,7 +734,7 @@ def load_artist_credit_name(connection, gids_in_AB): artist_credit_name data fetched from MusicBrainz database. """ artist_credit_name_query = text(""" - SELECT DISTINCT artist_credit_name.artist_credit, + SELECT artist_credit_name.artist_credit, artist_credit_name.position, artist_credit_name.artist, artist_credit_name.name, @@ -769,17 +742,15 @@ def load_artist_credit_name(connection, gids_in_AB): FROM artist_credit_name INNER JOIN artist_credit ON artist_credit_name.artist_credit = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid in :gids + WHERE artist_credit.id in :data """) - result = connection.execute(artist_credit_name_query, {'gids': tuple(gids_in_AB)}) + result = connection.execute(artist_credit_name_query, {'data': tuple(artist_credit_from_recording)}) MB_artist_credit_name_data = result.fetchall() return MB_artist_credit_name_data -def load_artist(connection, gids_in_AB, MB_artist_credit_name_data, MB_artist_gid_redirect_data): +def load_artist(connection, gids_in_AB, MB_artist_credit_name_data, MB_artist_gid_redirect_data, artist_credit_from_recording): """Fetch artist table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -803,9 +774,9 @@ def load_artist(connection, gids_in_AB, MB_artist_credit_name_data, MB_artist_gi # Get data corresponding to new_id column in artist_gid_redirect table. MB_artist_gid_redirect_fk_artist = list({value['new_id'] for value in MB_artist_gid_redirect_data}) - if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + if artist_credit_from_recording: + filters.append("artist_credit.id in :credit") + filter_data["credit"] = tuple(artist_credit_from_recording) if MB_artist_credit_name_data: filters.append("artist.id in :data") @@ -820,15 +791,13 @@ def load_artist(connection, gids_in_AB, MB_artist_credit_name_data, MB_artist_gi filterstr = " WHERE " + filterstr artist_query = text(""" - SELECT DISTINCT artist.id, artist.gid, artist.name, artist.sort_name, artist.begin_date_year, + SELECT artist.id, artist.gid, artist.name, artist.sort_name, artist.begin_date_year, artist.begin_date_month, artist.begin_date_day, artist.end_date_year, artist.end_date_month, artist.end_date_day, artist.type, artist.area, artist.gender, artist.comment, artist.edits_pending, artist.last_updated, artist.ended, artist.begin_area, artist.end_area FROM artist INNER JOIN artist_credit ON artist_credit.id = artist.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit {filterstr} """.format(filterstr=filterstr) ) @@ -839,7 +808,7 @@ def load_artist(connection, gids_in_AB, MB_artist_credit_name_data, MB_artist_gi return MB_artist_data -def load_artist_gid_redirect(connection, gids_in_AB): +def load_artist_gid_redirect(connection, gids_in_AB, artist_credit_from_recording): """Fetch artist_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -849,7 +818,7 @@ def load_artist_gid_redirect(connection, gids_in_AB): artist_gid_redirect data fetched from MusicBrainz database. """ artist_gid_redirect_query = text(""" - SELECT DISTINCT artist_gid_redirect.gid, + SELECT artist_gid_redirect.gid, artist_gid_redirect.new_id, artist_gid_redirect.created FROM artist_gid_redirect @@ -857,11 +826,9 @@ def load_artist_gid_redirect(connection, gids_in_AB): ON artist.id = artist_gid_redirect.new_id INNER JOIN artist_credit ON artist.id = artist_credit.id - INNER JOIN recording - ON artist_credit.id = recording.artist_credit - WHERE recording.gid in :gids + WHERE artist_credit.id in :data """) - result = connection.execute(artist_gid_redirect_query, {'gids': tuple(gids_in_AB)}) + result = connection.execute(artist_gid_redirect_query, {'data': tuple(artist_credit_from_recording)}) MB_artist_gid_redirect_data = result.fetchall() return MB_artist_gid_redirect_data @@ -895,7 +862,7 @@ def load_recording(connection, gids_in_AB, MB_recording_gid_redirect_data): filterstr = " WHERE " + filterstr recording_query = text(""" - SELECT DISTINCT recording.id, recording.gid, recording.name, recording.artist_credit, + SELECT recording.id, recording.gid, recording.name, recording.artist_credit, recording.length, recording.comment, recording.edits_pending, recording.last_updated, recording.video FROM recording @@ -908,7 +875,7 @@ def load_recording(connection, gids_in_AB, MB_recording_gid_redirect_data): return MB_recording_data -def load_recording_gid_redirect(connection, gids_in_AB): +def load_recording_gid_redirect(connection, gids_in_AB, id_from_recording): """Fetch recording_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -918,21 +885,19 @@ def load_recording_gid_redirect(connection, gids_in_AB): recording_gid_redirect data fetched from MusicBrainz database. """ recording_gid_redirect_query = text(""" - SELECT DISTINCT recording_gid_redirect.gid, + SELECT recording_gid_redirect.gid, recording_gid_redirect.new_id, recording_gid_redirect.created FROM recording_gid_redirect - INNER JOIN recording - ON recording.id = recording_gid_redirect.new_id - WHERE recording.gid in :gids + WHERE recording_gid_redirect.new_id in :data """) - result = connection.execute(recording_gid_redirect_query, {'gids': tuple(gids_in_AB)}) + result = connection.execute(recording_gid_redirect_query, {'data': tuple(id_from_recording)}) MB_recording_gid_redirect_data = result.fetchall() return MB_recording_gid_redirect_data -def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_data, MB_release_data): +def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_data, MB_release_data, artist_credit_from_recording): """Fetch release_group table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -959,8 +924,8 @@ def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_dat MB_release_fk_release_group = list({value['release_group'] for value in MB_release_data}) if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + filters.append("release_group.artist_credit in :credit") + filter_data["credit"] = tuple(artist_credit_from_recording) if MB_release_group_gid_redirect_data: filters.append("release_group.id in :redirect_data") @@ -975,7 +940,7 @@ def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_dat filterstr = " WHERE " + filterstr release_group_query = text(""" - SELECT DISTINCT release_group.id, + SELECT release_group.id, release_group.gid, release_group.name, release_group.artist_credit, @@ -984,8 +949,6 @@ def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_dat release_group.edits_pending, release_group.last_updated FROM release_group - INNER JOIN recording - ON recording.artist_credit = release_group.artist_credit {filterstr} """.format(filterstr=filterstr) ) @@ -996,7 +959,7 @@ def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_dat return MB_release_group_data -def load_release_group_gid_redirect(connection, gids_in_AB): +def load_release_group_gid_redirect(connection, gids_in_AB, artist_credit_from_recording): """Fetch release_group_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -1006,23 +969,21 @@ def load_release_group_gid_redirect(connection, gids_in_AB): release_group_gid_redirect data fetched from MusicBrainz database. """ release_group_gid_redirect_query = text(""" - SELECT DISTINCT release_group_gid_redirect.gid, + SELECT release_group_gid_redirect.gid, release_group_gid_redirect.new_id, release_group_gid_redirect.created FROM release_group_gid_redirect INNER JOIN release_group ON release_group.id = release_group_gid_redirect.new_id - INNER JOIN recording - ON recording.artist_credit = release_group.artist_credit - WHERE recording.gid in :gids + WHERE release_group.artist_credit in :data """) - result = connection.execute(release_group_gid_redirect_query, {'gids': tuple(gids_in_AB)}) + result = connection.execute(release_group_gid_redirect_query, {'data': tuple(artist_credit_from_recording)}) MB_release_group_gid_redirect_data = result.fetchall() return MB_release_group_gid_redirect_data -def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect_data): +def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect_data, artist_credit_from_recording): """Fetch release table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -1047,9 +1008,9 @@ def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect # Get data corresponding to new_id column in release_gid_redirect table. MB_release_gid_redirect_fk_release = list({value['new_id'] for value in MB_release_gid_redirect_data}) - if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + if artist_credit_from_recording: + filters.append("release.artist_credit in :credit") + filter_data["credit"] = tuple(artist_credit_from_recording) if MB_medium_data: filters.append("release.id in :medium_data") @@ -1064,7 +1025,7 @@ def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect filterstr = " WHERE " + filterstr release_query = text(""" - SELECT DISTINCT release.id, + SELECT release.id, release.gid, release.name, release.artist_credit, @@ -1079,8 +1040,6 @@ def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect release.quality, release.last_updated FROM release - INNER JOIN recording - ON recording.artist_credit = release.artist_credit {filterstr} """.format(filterstr=filterstr) ) @@ -1091,7 +1050,7 @@ def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect return MB_release_data -def load_release_gid_redirect(connection, gids_in_AB): +def load_release_gid_redirect(connection, gids_in_AB, artist_credit_from_recording): """Fetch release_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -1101,23 +1060,21 @@ def load_release_gid_redirect(connection, gids_in_AB): release_gid_redirect data fetched from MusicBrainz database. """ release_gid_redirect_query = text(""" - SELECT DISTINCT release_gid_redirect.gid, + SELECT release_gid_redirect.gid, release_gid_redirect.new_id, release_gid_redirect.created FROM release_gid_redirect INNER JOIN release ON release.id = release_gid_redirect.new_id - INNER JOIN recording - ON recording.artist_credit = release.artist_credit - WHERE recording.gid in :gids + WHERE release.artist_credit in :data """) - result = connection.execute(release_gid_redirect_query, {'gids': tuple(gids_in_AB)}) + result = connection.execute(release_gid_redirect_query, {'data': tuple(artist_credit_from_recording)}) MB_release_gid_redirect_data = result.fetchall() return MB_release_gid_redirect_data -def load_medium(connection, gids_in_AB, MB_track_data): +def load_medium(connection, gids_in_AB, MB_track_data, artist_credit_from_recording): """Fetch medium table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -1136,9 +1093,9 @@ def load_medium(connection, gids_in_AB, MB_track_data): # Get data corresponding to medium column in track table. MB_track_fk_medium = list({value['medium'] for value in MB_track_data}) - if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + if artist_credit_from_recording: + filters.append("release.artist_credit in :credit") + filter_data["credit"] = tuple(artist_credit_from_recording) if MB_track_data: filters.append("medium.id in :data") @@ -1149,7 +1106,7 @@ def load_medium(connection, gids_in_AB, MB_track_data): filterstr = " WHERE " + filterstr medium_query = text(""" - SELECT DISTINCT medium.id, + SELECT medium.id, medium.release, medium.position, medium.format, @@ -1160,8 +1117,6 @@ def load_medium(connection, gids_in_AB, MB_track_data): FROM medium INNER JOIN release ON release.id = medium.release - INNER JOIN recording - ON recording.artist_credit=release.artist_credit {filterstr} """.format(filterstr=filterstr) ) @@ -1172,7 +1127,7 @@ def load_medium(connection, gids_in_AB, MB_track_data): return MB_medium_data -def load_track(connection, gids_in_AB, MB_track_gid_redirect_data): +def load_track(connection, gids_in_AB, MB_track_gid_redirect_data, id_from_recording): """Fetch track table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -1191,9 +1146,9 @@ def load_track(connection, gids_in_AB, MB_track_gid_redirect_data): # Get data corresponding to new_id column in track_gid_redirect table. MB_track_gid_redirect_fk_track = list({value['new_id'] for value in MB_track_gid_redirect_data}) - if gids_in_AB: - filters.append("recording.gid in :gids") - filter_data["gids"] = tuple(gids_in_AB) + if id_from_recording: + filters.append("track.recording in :ids") + filter_data["ids"] = tuple(id_from_recording) if MB_track_gid_redirect_data: filters.append("track.id in :redirect_data") @@ -1204,7 +1159,7 @@ def load_track(connection, gids_in_AB, MB_track_gid_redirect_data): filterstr = " WHERE " + filterstr track_query = text(""" - SELECT DISTINCT track.id, + SELECT track.id, track.gid, track.recording, track.medium, @@ -1217,8 +1172,6 @@ def load_track(connection, gids_in_AB, MB_track_gid_redirect_data): track.last_updated, track.is_data_track FROM track - INNER JOIN recording - ON track.recording = recording.id {filterstr} """.format(filterstr=filterstr) ) @@ -1228,7 +1181,7 @@ def load_track(connection, gids_in_AB, MB_track_gid_redirect_data): return MB_track_data -def load_track_gid_redirect(connection, gids_in_AB): +def load_track_gid_redirect(connection, gids_in_AB, id_from_recording): """Fetch track_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -1244,11 +1197,9 @@ def load_track_gid_redirect(connection, gids_in_AB): FROM track_gid_redirect INNER JOIN track ON track.id = track_gid_redirect.new_id - INNER JOIN recording - ON recording.id = track.recording - WHERE recording.gid in :gids + WHERE track.recording in :ids """) - result = connection.execute(track_gid_redirect_query, {'gids': tuple(gids_in_AB)}) + result = connection.execute(track_gid_redirect_query, {'ids': tuple(id_from_recording)}) MB_track_gid_redirect_data = result.fetchall() return MB_track_gid_redirect_data @@ -1993,59 +1944,69 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): except ValueError: logging.info("No Data found from track gid redirect table for the recordings") + # recording + try: + logging.info('Getting recording data...') + MB_recording_data = load_recording(connection, gids_in_AB) + artist_credit_from_recording = [value[3] for value in MB_recording_data] + id_from_recording = [value[0] for value in MB_recording_data] + except ValueError: + logging.info("No Data found from recording table for the recordings") + # track try: logging.info('Getting track data...') - MB_track_data = load_track(connection, gids_in_AB, MB_track_gid_redirect_data) + MB_track_data = load_track(connection, gids_in_AB, id_from_recording, MB_track_gid_redirect_data) except ValueError: logging.info("No Data found from track table for the recordings") # medium try: logging.info('Getting medium data...') - MB_medium_data = load_medium(connection, gids_in_AB, MB_track_data) + MB_medium_data = load_medium(connection, gids_in_AB, MB_track_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from medium table for the recordings") # release_gid_redirect try: logging.info('Getting release gid redirect data...') - MB_release_gid_redirect_data = load_release_gid_redirect(connection, gids_in_AB) + MB_release_gid_redirect_data = load_release_gid_redirect(connection, gids_in_AB, artist_credit_from_recording) except ValueError: logging.info("No Data found from release gid redirect table for the recordings") # release try: logging.info('Getting release data...') - MB_release_data = load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect_data) + MB_release_data = load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from release table for the recordings") # artist_credit_name try: logging.info('Getting artist credit name data...') - MB_artist_credit_name_data = load_artist_credit_name(connection, gids_in_AB) + MB_artist_credit_name_data = load_artist_credit_name(connection, gids_in_AB, artist_credit_from_recording) except ValueError: logging.info("No Data found from artist credit name table for the recordings") # artist_gid_redirect try: logging.info('Getting artist gid redirect data...') - MB_artist_gid_redirect_data = load_artist_gid_redirect(connection, gids_in_AB) + MB_artist_gid_redirect_data = load_artist_gid_redirect(connection, gids_in_AB, artist_credit_from_recording) except ValueError: logging.info("No Data found from artist gid redirect table for the recordings") # artist try: logging.info('Getting artist data...') - MB_artist_data = load_artist(connection, gids_in_AB, MB_artist_credit_name_data, MB_artist_gid_redirect_data) + MB_artist_data = load_artist(connection, gids_in_AB, MB_artist_credit_name_data, MB_artist_gid_redirect_data, artist_credit_from_recording) + artist_type_from_artist = [value[10] for value in MB_artist_data] except ValueError: logging.info("No Data found from artist table for the recordings") # artist_type try: logging.info('Getting artist type data...') - MB_artist_type_data = load_artist_type(connection, gids_in_AB, MB_artist_data) + MB_artist_type_data = load_artist_type(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from artist type table for the recordings") @@ -2053,7 +2014,7 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # recording_gid_redirect try: logging.info('Getting recording gid redirect data...') - MB_recording_gid_redirect_data = load_recording_gid_redirect(connection, gids_in_AB) + MB_recording_gid_redirect_data = load_recording_gid_redirect(connection, gids_in_AB, id_from_recording) except ValueError: logging.info("No Data found from recording gid redirect table for the recordings") @@ -2067,56 +2028,56 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # area try: logging.info('Getting area data...') - MB_area_data = load_area(connection, gids_in_AB, MB_artist_data) + MB_area_data = load_area(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from area table for the recordings") # begin_area try: logging.info('Getting begin area data...') - MB_begin_area_data = load_begin_area(connection, gids_in_AB, MB_artist_data) + MB_begin_area_data = load_begin_area(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from area table for the recordings") # end_area try: logging.info('Getting end area data...') - MB_end_area_data = load_end_area(connection, gids_in_AB, MB_artist_data) + MB_end_area_data = load_end_area(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from area table for the recordings") # area_type try: logging.info('Getting area type data...') - MB_area_type_data = load_area_type(connection, gids_in_AB, MB_area_data) + MB_area_type_data = load_area_type(connection, gids_in_AB, MB_area_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from area type table for the recordings") # begin_area_type try: logging.info('Getting begin area type data...') - MB_begin_area_type_data = load_begin_area_type(connection, gids_in_AB) + MB_begin_area_type_data = load_begin_area_type(connection, gids_in_AB, artist_credit_from_recording) except ValueError: logging.info("No Data found from area type table for the recordings") # end_area_type try: logging.info('Getting end area data...') - MB_end_area_type_data = load_end_area_type(connection, gids_in_AB) + MB_end_area_type_data = load_end_area_type(connection, gids_in_AB, artist_credit_from_recording) except ValueError: logging.info("No Data found from area type table for the recordings") # gender try: logging.info('Getting gender data...') - MB_gender_data = load_gender(connection, gids_in_AB) + MB_gender_data = load_gender(connection, gids_in_AB, artist_credit_from_recording) except ValueError: logging.info("No Data found from gender table for the recordings") # language try: logging.info('Getting language data...') - MB_language_data = load_language(connection, gids_in_AB, MB_release_data) + MB_language_data = load_language(connection, gids_in_AB, MB_release_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from language table for the recordings") @@ -2130,49 +2091,49 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # release_group gid redirect try: logging.info('Getting release group gid redirect data...') - MB_release_group_gid_redirect_data = load_release_group_gid_redirect(connection, gids_in_AB) + MB_release_group_gid_redirect_data = load_release_group_gid_redirect(connection, gids_in_AB, artist_credit_from_recording) except ValueError: logging.info("No Data found from release group gid redirect table for the recordings") # release_group try: logging.info('Getting release group data...') - MB_release_group_data = load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_data, MB_release_data) + MB_release_group_data = load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_data, MB_release_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from release group table for the recordings") # artist_credit try: logging.info('Getting artist credit data...') - MB_artist_credit_data = load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data) + MB_artist_credit_data = load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from artist credit table for the recordings") # release_group_primary_type try: logging.info('Getting release group primary type data...') - MB_release_group_primary_type_data = load_release_group_primary_type(connection, gids_in_AB, MB_release_group_data) + MB_release_group_primary_type_data = load_release_group_primary_type(connection, gids_in_AB, MB_release_group_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from release group primary type table for the recordings") # release_packaging try: logging.info('Getting release packaging data...') - MB_release_packaging_data = load_release_packaging(connection, gids_in_AB, MB_release_data) + MB_release_packaging_data = load_release_packaging(connection, gids_in_AB, MB_release_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from release packaging table for the recordings") # release_status try: logging.info('Getting release status data...') - MB_release_status_data = load_release_status(connection, gids_in_AB, MB_release_data) + MB_release_status_data = load_release_status(connection, gids_in_AB, MB_release_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from release status table for the recordings") # script try: logging.info('Getting script data...\n') - MB_script_data = load_script(connection, gids_in_AB, MB_release_data) + MB_script_data = load_script(connection, gids_in_AB, MB_release_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from script table for the recordings") From 8e2ea36d26b041a9206b8f074d63d47a89d981c1 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Thu, 5 Jul 2018 17:28:16 +0530 Subject: [PATCH 049/125] Remove DISTINCT keyword from a track gid redirect function and modify query for gender function --- db/import_mb_data.py | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index a66c4bebe..451ebb3e7 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -356,7 +356,7 @@ def load_medium_format(connection, gids_in_AB): """ medium_format_query = text(""" SELECT * FROM medium_format - ORDER BY id + ORDER BY id """) result = connection.execute(medium_format_query) MB_medium_format_data = result.fetchall() @@ -515,7 +515,7 @@ def load_script(connection, gids_in_AB, MB_release_data, artist_credit_from_reco return MB_script_data -def load_gender(connection, gids_in_AB, artist_credit_from_recording): +def load_gender(connection, gids_in_AB): """ Fetch gender table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -525,20 +525,10 @@ def load_gender(connection, gids_in_AB, artist_credit_from_recording): gender data fetched from MusicBrainz database. """ gender_query = text(""" - SELECT gender.id, - gender.name, - gender.parent, - gender.child_order, - gender.description, - gender.gid - FROM gender - INNER JOIN artist - ON artist.gender = gender.id - INNER JOIN artist_credit - ON artist.id = artist_credit.id - WHERE artist_credit.id in :data + SELECT * FROM gender + ORDER BY id """) - result = connection.execute(gender_query, {'data': tuple(artist_credit_from_recording)}) + result = connection.execute(gender_query) MB_gender_data = result.fetchall() return MB_gender_data @@ -1191,7 +1181,7 @@ def load_track_gid_redirect(connection, gids_in_AB, id_from_recording): track_gid_redirect data fetched from MusicBrainz database. """ track_gid_redirect_query = text(""" - SELECT DISTINCT track_gid_redirect.gid, + SELECT track_gid_redirect.gid, track_gid_redirect.new_id, track_gid_redirect.created FROM track_gid_redirect @@ -2070,7 +2060,7 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # gender try: logging.info('Getting gender data...') - MB_gender_data = load_gender(connection, gids_in_AB, artist_credit_from_recording) + MB_gender_data = load_gender(connection, gids_in_AB) except ValueError: logging.info("No Data found from gender table for the recordings") From 9cc47e91ddd862c7c0a6a5c3b949adfc167b5962 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Thu, 5 Jul 2018 19:37:07 +0530 Subject: [PATCH 050/125] Change order of calling functions to write the data into tables --- db/import_mb_data.py | 43 +++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 451ebb3e7..da70d632d 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -865,7 +865,7 @@ def load_recording(connection, gids_in_AB, MB_recording_gid_redirect_data): return MB_recording_data -def load_recording_gid_redirect(connection, gids_in_AB, id_from_recording): +def load_recording_gid_redirect(connection, gids_in_AB): """Fetch recording_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -879,9 +879,11 @@ def load_recording_gid_redirect(connection, gids_in_AB, id_from_recording): recording_gid_redirect.new_id, recording_gid_redirect.created FROM recording_gid_redirect - WHERE recording_gid_redirect.new_id in :data + INNER JOIN recording + ON recording.id = recording_gid_redirect.new_id + WHERE recording.gid in :gids """) - result = connection.execute(recording_gid_redirect_query, {'data': tuple(id_from_recording)}) + result = connection.execute(recording_gid_redirect_query, {'gids': tuple(gids_in_AB)}) MB_recording_gid_redirect_data = result.fetchall() return MB_recording_gid_redirect_data @@ -1451,6 +1453,7 @@ def write_script(connection, MB_script_data): """Insert data in script table in musicbrainz schema in AcousticBrainz database. + Args: connection: database connection to execute the query. MB_script_data: script data fetched from MusicBrainz database. @@ -1927,26 +1930,33 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # Get MusicBrainz data logging.info('Getting %d recordings data at a time...\n' % (len(gids_in_AB))) with musicbrainz_db.engine.begin() as connection: - # track_gid_redirect + # recording_gid_redirect try: - logging.info('Getting track gid redirect data...') - MB_track_gid_redirect_data = load_track_gid_redirect(connection, gids_in_AB) + logging.info('Getting recording gid redirect data...') + MB_recording_gid_redirect_data = load_recording_gid_redirect(connection, gids_in_AB) except ValueError: - logging.info("No Data found from track gid redirect table for the recordings") + logging.info("No Data found from recording gid redirect table for the recordings") # recording try: logging.info('Getting recording data...') - MB_recording_data = load_recording(connection, gids_in_AB) + MB_recording_data = load_recording(connection, gids_in_AB, MB_recording_gid_redirect_data) artist_credit_from_recording = [value[3] for value in MB_recording_data] id_from_recording = [value[0] for value in MB_recording_data] except ValueError: logging.info("No Data found from recording table for the recordings") + # track_gid_redirect + try: + logging.info('Getting track gid redirect data...') + MB_track_gid_redirect_data = load_track_gid_redirect(connection, gids_in_AB, id_from_recording) + except ValueError: + logging.info("No Data found from track gid redirect table for the recordings") + # track try: logging.info('Getting track data...') - MB_track_data = load_track(connection, gids_in_AB, id_from_recording, MB_track_gid_redirect_data) + MB_track_data = load_track(connection, gids_in_AB, MB_track_gid_redirect_data, id_from_recording) except ValueError: logging.info("No Data found from track table for the recordings") @@ -2000,21 +2010,6 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): except ValueError: logging.info("No Data found from artist type table for the recordings") - - # recording_gid_redirect - try: - logging.info('Getting recording gid redirect data...') - MB_recording_gid_redirect_data = load_recording_gid_redirect(connection, gids_in_AB, id_from_recording) - except ValueError: - logging.info("No Data found from recording gid redirect table for the recordings") - - # recording - try: - logging.info('Getting recording data...') - MB_recording_data = load_recording(connection, gids_in_AB, MB_recording_gid_redirect_data) - except ValueError: - logging.info("No Data found from recording table for the recordings") - # area try: logging.info('Getting area data...') From a7da54cdfe9f40251f14dcebffd7ec929b8fbabd Mon Sep 17 00:00:00 2001 From: rsh7 Date: Thu, 5 Jul 2018 19:46:20 +0530 Subject: [PATCH 051/125] Remove the redundant variable gids_in_Ab from all function's parameter --- db/import_mb_data.py | 102 +++++++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index da70d632d..1ca95aa89 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -7,7 +7,7 @@ logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) -def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data, artist_credit_from_recording): +def load_artist_credit(connection, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data, artist_credit_from_recording): """Fetch artist_credit table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -78,7 +78,7 @@ def load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group return MB_artist_credit_data -def load_artist_type(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording): +def load_artist_type(connection, MB_artist_data, artist_credit_from_recording): """Fetch artist_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -130,7 +130,7 @@ def load_artist_type(connection, gids_in_AB, MB_artist_data, artist_credit_from_ return MB_artist_type_data -def load_area_type(connection, gids_in_AB, MB_area_data, artist_credit_from_recording): +def load_area_type(connection, MB_area_data, artist_credit_from_recording): """Fetch area_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -184,7 +184,7 @@ def load_area_type(connection, gids_in_AB, MB_area_data, artist_credit_from_reco return MB_area_type_data -def load_begin_area_type(connection, gids_in_AB, artist_credit_from_recording): +def load_begin_area_type(connection, artist_credit_from_recording): """Fetch area_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for the begin area column in artist table. @@ -216,7 +216,7 @@ def load_begin_area_type(connection, gids_in_AB, artist_credit_from_recording): return MB_begin_area_type_data -def load_end_area_type(connection, gids_in_AB, artist_credit_from_recording): +def load_end_area_type(connection, artist_credit_from_recording): """Fetch area_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for the end area column in artist table. @@ -248,7 +248,7 @@ def load_end_area_type(connection, gids_in_AB, artist_credit_from_recording): return MB_end_area_type_data -def load_release_status(connection, gids_in_AB, MB_release_data, artist_credit_from_recording): +def load_release_status(connection, MB_release_data, artist_credit_from_recording): """Fetch release_status table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -296,7 +296,7 @@ def load_release_status(connection, gids_in_AB, MB_release_data, artist_credit_f return MB_release_status_data -def load_release_group_primary_type(connection, gids_in_AB, MB_release_group_data, artist_credit_from_recording): +def load_release_group_primary_type(connection, MB_release_group_data, artist_credit_from_recording): """Fetch release_group_primary_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -345,7 +345,7 @@ def load_release_group_primary_type(connection, gids_in_AB, MB_release_group_dat return MB_release_group_primary_type_data -def load_medium_format(connection, gids_in_AB): +def load_medium_format(connection): """Fetch medium_format table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -364,7 +364,7 @@ def load_medium_format(connection, gids_in_AB): return MB_medium_format_data -def load_release_packaging(connection, gids_in_AB, MB_release_data, artist_credit_from_recording): +def load_release_packaging(connection, MB_release_data, artist_credit_from_recording): """Fetch release_packaging table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -415,7 +415,7 @@ def load_release_packaging(connection, gids_in_AB, MB_release_data, artist_credi return MB_release_packaging_data -def load_language(connection, gids_in_AB, MB_release_data, artist_credit_from_recording): +def load_language(connection, MB_release_data, artist_credit_from_recording): """Fetch language table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -466,7 +466,7 @@ def load_language(connection, gids_in_AB, MB_release_data, artist_credit_from_re return MB_language_data -def load_script(connection, gids_in_AB, MB_release_data, artist_credit_from_recording): +def load_script(connection, MB_release_data, artist_credit_from_recording): """Fetch script table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -515,7 +515,7 @@ def load_script(connection, gids_in_AB, MB_release_data, artist_credit_from_reco return MB_script_data -def load_gender(connection, gids_in_AB): +def load_gender(connection): """ Fetch gender table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -534,7 +534,7 @@ def load_gender(connection, gids_in_AB): return MB_gender_data -def load_area(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording): +def load_area(connection, MB_artist_data, artist_credit_from_recording): """ Fetch area table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -595,7 +595,7 @@ def load_area(connection, gids_in_AB, MB_artist_data, artist_credit_from_recordi return MB_area_data -def load_begin_area(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording): +def load_begin_area(connection, MB_artist_data, artist_credit_from_recording): """Fetch area table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for begin area column. @@ -656,7 +656,7 @@ def load_begin_area(connection, gids_in_AB, MB_artist_data, artist_credit_from_r return MB_begin_area_data -def load_end_area(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording): +def load_end_area(connection, MB_artist_data, artist_credit_from_recording): """Fetch area table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database for end area column. @@ -714,7 +714,7 @@ def load_end_area(connection, gids_in_AB, MB_artist_data, artist_credit_from_rec return MB_end_area_data -def load_artist_credit_name(connection, gids_in_AB, artist_credit_from_recording): +def load_artist_credit_name(connection, artist_credit_from_recording): """Fetch artist_credit_name table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -740,7 +740,7 @@ def load_artist_credit_name(connection, gids_in_AB, artist_credit_from_recording return MB_artist_credit_name_data -def load_artist(connection, gids_in_AB, MB_artist_credit_name_data, MB_artist_gid_redirect_data, artist_credit_from_recording): +def load_artist(connection, MB_artist_credit_name_data, MB_artist_gid_redirect_data, artist_credit_from_recording): """Fetch artist table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -798,7 +798,7 @@ def load_artist(connection, gids_in_AB, MB_artist_credit_name_data, MB_artist_gi return MB_artist_data -def load_artist_gid_redirect(connection, gids_in_AB, artist_credit_from_recording): +def load_artist_gid_redirect(connection, artist_credit_from_recording): """Fetch artist_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -889,7 +889,7 @@ def load_recording_gid_redirect(connection, gids_in_AB): return MB_recording_gid_redirect_data -def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_data, MB_release_data, artist_credit_from_recording): +def load_release_group(connection, MB_release_group_gid_redirect_data, MB_release_data, artist_credit_from_recording): """Fetch release_group table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -915,7 +915,7 @@ def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_dat # Get data corresponding to release_group column in release table. MB_release_fk_release_group = list({value['release_group'] for value in MB_release_data}) - if gids_in_AB: + if artist_credit_from_recording: filters.append("release_group.artist_credit in :credit") filter_data["credit"] = tuple(artist_credit_from_recording) @@ -951,7 +951,7 @@ def load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_dat return MB_release_group_data -def load_release_group_gid_redirect(connection, gids_in_AB, artist_credit_from_recording): +def load_release_group_gid_redirect(connection, artist_credit_from_recording): """Fetch release_group_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -975,7 +975,7 @@ def load_release_group_gid_redirect(connection, gids_in_AB, artist_credit_from_r return MB_release_group_gid_redirect_data -def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect_data, artist_credit_from_recording): +def load_release(connection, MB_medium_data, MB_release_gid_redirect_data, artist_credit_from_recording): """Fetch release table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -1042,7 +1042,7 @@ def load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect return MB_release_data -def load_release_gid_redirect(connection, gids_in_AB, artist_credit_from_recording): +def load_release_gid_redirect(connection, artist_credit_from_recording): """Fetch release_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -1066,7 +1066,7 @@ def load_release_gid_redirect(connection, gids_in_AB, artist_credit_from_recordi return MB_release_gid_redirect_data -def load_medium(connection, gids_in_AB, MB_track_data, artist_credit_from_recording): +def load_medium(connection, MB_track_data, artist_credit_from_recording): """Fetch medium table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -1119,7 +1119,7 @@ def load_medium(connection, gids_in_AB, MB_track_data, artist_credit_from_record return MB_medium_data -def load_track(connection, gids_in_AB, MB_track_gid_redirect_data, id_from_recording): +def load_track(connection, MB_track_gid_redirect_data, id_from_recording): """Fetch track table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -1173,7 +1173,7 @@ def load_track(connection, gids_in_AB, MB_track_gid_redirect_data, id_from_recor return MB_track_data -def load_track_gid_redirect(connection, gids_in_AB, id_from_recording): +def load_track_gid_redirect(connection, id_from_recording): """Fetch track_gid_redirect table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -1949,56 +1949,56 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # track_gid_redirect try: logging.info('Getting track gid redirect data...') - MB_track_gid_redirect_data = load_track_gid_redirect(connection, gids_in_AB, id_from_recording) + MB_track_gid_redirect_data = load_track_gid_redirect(connection, id_from_recording) except ValueError: logging.info("No Data found from track gid redirect table for the recordings") # track try: logging.info('Getting track data...') - MB_track_data = load_track(connection, gids_in_AB, MB_track_gid_redirect_data, id_from_recording) + MB_track_data = load_track(connection, MB_track_gid_redirect_data, id_from_recording) except ValueError: logging.info("No Data found from track table for the recordings") # medium try: logging.info('Getting medium data...') - MB_medium_data = load_medium(connection, gids_in_AB, MB_track_data, artist_credit_from_recording) + MB_medium_data = load_medium(connection, MB_track_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from medium table for the recordings") # release_gid_redirect try: logging.info('Getting release gid redirect data...') - MB_release_gid_redirect_data = load_release_gid_redirect(connection, gids_in_AB, artist_credit_from_recording) + MB_release_gid_redirect_data = load_release_gid_redirect(connection, artist_credit_from_recording) except ValueError: logging.info("No Data found from release gid redirect table for the recordings") # release try: logging.info('Getting release data...') - MB_release_data = load_release(connection, gids_in_AB, MB_medium_data, MB_release_gid_redirect_data, artist_credit_from_recording) + MB_release_data = load_release(connection, MB_medium_data, MB_release_gid_redirect_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from release table for the recordings") # artist_credit_name try: logging.info('Getting artist credit name data...') - MB_artist_credit_name_data = load_artist_credit_name(connection, gids_in_AB, artist_credit_from_recording) + MB_artist_credit_name_data = load_artist_credit_name(connection, artist_credit_from_recording) except ValueError: logging.info("No Data found from artist credit name table for the recordings") # artist_gid_redirect try: logging.info('Getting artist gid redirect data...') - MB_artist_gid_redirect_data = load_artist_gid_redirect(connection, gids_in_AB, artist_credit_from_recording) + MB_artist_gid_redirect_data = load_artist_gid_redirect(connection, artist_credit_from_recording) except ValueError: logging.info("No Data found from artist gid redirect table for the recordings") # artist try: logging.info('Getting artist data...') - MB_artist_data = load_artist(connection, gids_in_AB, MB_artist_credit_name_data, MB_artist_gid_redirect_data, artist_credit_from_recording) + MB_artist_data = load_artist(connection, MB_artist_credit_name_data, MB_artist_gid_redirect_data, artist_credit_from_recording) artist_type_from_artist = [value[10] for value in MB_artist_data] except ValueError: logging.info("No Data found from artist table for the recordings") @@ -2006,119 +2006,119 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # artist_type try: logging.info('Getting artist type data...') - MB_artist_type_data = load_artist_type(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording) + MB_artist_type_data = load_artist_type(connection, MB_artist_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from artist type table for the recordings") # area try: logging.info('Getting area data...') - MB_area_data = load_area(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording) + MB_area_data = load_area(connection, MB_artist_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from area table for the recordings") # begin_area try: logging.info('Getting begin area data...') - MB_begin_area_data = load_begin_area(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording) + MB_begin_area_data = load_begin_area(connection, MB_artist_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from area table for the recordings") # end_area try: logging.info('Getting end area data...') - MB_end_area_data = load_end_area(connection, gids_in_AB, MB_artist_data, artist_credit_from_recording) + MB_end_area_data = load_end_area(connection, MB_artist_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from area table for the recordings") # area_type try: logging.info('Getting area type data...') - MB_area_type_data = load_area_type(connection, gids_in_AB, MB_area_data, artist_credit_from_recording) + MB_area_type_data = load_area_type(connection, MB_area_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from area type table for the recordings") # begin_area_type try: logging.info('Getting begin area type data...') - MB_begin_area_type_data = load_begin_area_type(connection, gids_in_AB, artist_credit_from_recording) + MB_begin_area_type_data = load_begin_area_type(connection, artist_credit_from_recording) except ValueError: logging.info("No Data found from area type table for the recordings") # end_area_type try: logging.info('Getting end area data...') - MB_end_area_type_data = load_end_area_type(connection, gids_in_AB, artist_credit_from_recording) + MB_end_area_type_data = load_end_area_type(connection, artist_credit_from_recording) except ValueError: logging.info("No Data found from area type table for the recordings") # gender try: logging.info('Getting gender data...') - MB_gender_data = load_gender(connection, gids_in_AB) + MB_gender_data = load_gender(connection) except ValueError: logging.info("No Data found from gender table for the recordings") # language try: logging.info('Getting language data...') - MB_language_data = load_language(connection, gids_in_AB, MB_release_data, artist_credit_from_recording) + MB_language_data = load_language(connection, MB_release_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from language table for the recordings") # medium_format try: logging.info('Getting medium format data...') - MB_medium_format_data = load_medium_format(connection, gids_in_AB) + MB_medium_format_data = load_medium_format(connection) except ValueError: logging.info("No Data found from medium format table for the recordings") # release_group gid redirect try: logging.info('Getting release group gid redirect data...') - MB_release_group_gid_redirect_data = load_release_group_gid_redirect(connection, gids_in_AB, artist_credit_from_recording) + MB_release_group_gid_redirect_data = load_release_group_gid_redirect(connection, artist_credit_from_recording) except ValueError: logging.info("No Data found from release group gid redirect table for the recordings") # release_group try: logging.info('Getting release group data...') - MB_release_group_data = load_release_group(connection, gids_in_AB, MB_release_group_gid_redirect_data, MB_release_data, artist_credit_from_recording) + MB_release_group_data = load_release_group(connection, MB_release_group_gid_redirect_data, MB_release_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from release group table for the recordings") # artist_credit try: logging.info('Getting artist credit data...') - MB_artist_credit_data = load_artist_credit(connection, gids_in_AB, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data, artist_credit_from_recording) + MB_artist_credit_data = load_artist_credit(connection, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from artist credit table for the recordings") # release_group_primary_type try: logging.info('Getting release group primary type data...') - MB_release_group_primary_type_data = load_release_group_primary_type(connection, gids_in_AB, MB_release_group_data, artist_credit_from_recording) + MB_release_group_primary_type_data = load_release_group_primary_type(connection, MB_release_group_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from release group primary type table for the recordings") # release_packaging try: logging.info('Getting release packaging data...') - MB_release_packaging_data = load_release_packaging(connection, gids_in_AB, MB_release_data, artist_credit_from_recording) + MB_release_packaging_data = load_release_packaging(connection, MB_release_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from release packaging table for the recordings") # release_status try: logging.info('Getting release status data...') - MB_release_status_data = load_release_status(connection, gids_in_AB, MB_release_data, artist_credit_from_recording) + MB_release_status_data = load_release_status(connection, MB_release_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from release status table for the recordings") # script try: logging.info('Getting script data...\n') - MB_script_data = load_script(connection, gids_in_AB, MB_release_data, artist_credit_from_recording) + MB_script_data = load_script(connection, MB_release_data, artist_credit_from_recording) except ValueError: logging.info("No Data found from script table for the recordings") From 30e834829463163a345790f514fb56e4a8d85d40 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Thu, 5 Jul 2018 20:01:34 +0530 Subject: [PATCH 052/125] Add more documentation for new function's parameters and update some docs --- db/import_mb_data.py | 81 +++++++++++++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 1ca95aa89..6c64049ac 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -22,6 +22,7 @@ def load_artist_credit(connection, MB_release_data, MB_release_group_data, MB_tr MB_track_data (of type - sqlalchemy.resultproxy): data retrieved from the track table of the MusicBrainz database (should contain artist credit values). MB_artist_credit_name_data: artist_credit_name_data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: artist_credit data fetched from MusicBrainz database. """ @@ -88,6 +89,7 @@ def load_artist_type(connection, MB_artist_data, artist_credit_from_recording): connection: database connection to execute the query. MB_artist_data (of type - sqlalchemy.resultproxy): data retrieved from the artist table of the MusicBrainz database (should contain artist type values). + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: artist_type data fetched from MusicBrainz database. """ @@ -140,6 +142,7 @@ def load_area_type(connection, MB_area_data, artist_credit_from_recording): connection: database connection to execute the query. MB_area_data (of type - sqlalchemy.resultproxy): data retrieved from the area table of the MusicBrainz database(should contain area type values). + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: area_type data fetched from MusicBrainz database. """ @@ -191,6 +194,7 @@ def load_begin_area_type(connection, artist_credit_from_recording): Args: connection: database connection to execute the query. + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: begin_area_type data fetched from MusicBrainz database. """ @@ -223,6 +227,7 @@ def load_end_area_type(connection, artist_credit_from_recording): Args: connection: database connection to execute the query. + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: end_area_type data fetched from MusicBrainz database. """ @@ -256,6 +261,7 @@ def load_release_status(connection, MB_release_data, artist_credit_from_recordin connection: database connection to execute the query. MB_release_data (of type - sqlalchemy.resultproxy): data retrieved from the release table of the MusicBrainz database (should contain release_status values). + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: release_status data fetched from MusicBrainz database. """ @@ -307,6 +313,7 @@ def load_release_group_primary_type(connection, MB_release_group_data, artist_cr MB_release_group_data (of type - sqlalchemy.resultproxy): data retrieved from the release_group table of the MusicBrainz database (should contain release_group_primary_type values). + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: release_group_primary_type data fetched from MusicBrainz database. """ @@ -374,6 +381,7 @@ def load_release_packaging(connection, MB_release_data, artist_credit_from_recor connection: database connection to execute the query. MB_release_data (of type - sqlalchemy.resultproxy): data retrieved from the release table of the MusicBrainz database (should contain release_packaging values) + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: release_packaging data fetched from MusicBrainz database. """ @@ -425,6 +433,7 @@ def load_language(connection, MB_release_data, artist_credit_from_recording): connection: database connection to execute the query. MB_release_data(of type - sqlalchemy.resultproxy): data retrieved from the release table of the MusicBrainz database (should contain language values). + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: language data fetched from MusicBrainz database. """ @@ -476,6 +485,7 @@ def load_script(connection, MB_release_data, artist_credit_from_recording): connection: database connection to execute the query. MB_release_data (of type - sqlalchemy.resultproxy): data retrieved from the release table of the MusicBrainz database (should contain script values). + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: script data fetched from MusicBrainz database. """ @@ -544,6 +554,7 @@ def load_area(connection, MB_artist_data, artist_credit_from_recording): connection: database connection to execute the query. MB_artist_data(of type - sqlalchemy.resultproxy): data retrieved from the artist table of the MusicBrainz database (should contain area values). + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: area data fetched from MusicBrainz database. """ @@ -605,6 +616,7 @@ def load_begin_area(connection, MB_artist_data, artist_credit_from_recording): connection: database connection to execute the query. MB_artist_data (of type - sqlalchemy.resultproxy): data retrieved from the artist table of the MusicBrainz database (should contain begin_area values). + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: begin_area data fetched from MusicBrainz database. """ @@ -664,6 +676,8 @@ def load_end_area(connection, MB_artist_data, artist_credit_from_recording): Args: connection: database connection to execute the query. + MB_artist_data: list of artist data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: end_area data fetched from MusicBrainz database. """ @@ -720,6 +734,7 @@ def load_artist_credit_name(connection, artist_credit_from_recording): Args: connection: database connection to execute the query. + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: artist_credit_name data fetched from MusicBrainz database. """ @@ -752,6 +767,7 @@ def load_artist(connection, MB_artist_credit_name_data, MB_artist_gid_redirect_d artist_credit_name table of the MusicBrainz database (should contain artist values). MB_artist_gid_redirect_data(of type - sqlalchemy.resultproxy): data retrieved from the artist_gid_redirect table of the MusicBrainz database (should contain artist values). + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: artist data fetched from MusicBrainz database. """ @@ -804,6 +820,7 @@ def load_artist_gid_redirect(connection, artist_credit_from_recording): Args: connection: database connection to execute the query. + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: artist_gid_redirect data fetched from MusicBrainz database. """ @@ -830,6 +847,8 @@ def load_recording(connection, gids_in_AB, MB_recording_gid_redirect_data): Args: connection: database connection to execute the query. + gids_in_AB: list of recordings mbids present in lowlevel table in AB database. + MB_recording_gid_redirect_data: list of recording gid redirect data fetched from MusicBrainz database. Returns: recording data fetched from MusicBrainz database. """ @@ -871,6 +890,7 @@ def load_recording_gid_redirect(connection, gids_in_AB): Args: connection: database connection to execute the query. + gids_in_AB: list of recordings mbids present in lowlevel table in AB database. Returns: recording_gid_redirect data fetched from MusicBrainz database. """ @@ -903,6 +923,7 @@ def load_release_group(connection, MB_release_group_gid_redirect_data, MB_releas (should contain release_group values). MB_release_data(of type - sqlalchemy.resultproxy): data retrieved from the release table of the MusicBrainz database (should contain release_group values). + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: release_group data fetched from MusicBrainz database. """ @@ -957,6 +978,7 @@ def load_release_group_gid_redirect(connection, artist_credit_from_recording): Args: connection: database connection to execute the query. + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: release_group_gid_redirect data fetched from MusicBrainz database. """ @@ -988,6 +1010,7 @@ def load_release(connection, MB_medium_data, MB_release_gid_redirect_data, artis MB_release_gid_redirect_data (of type - sqlalchemy.resultproxy): data retrieved from the release_gid_redirect table of the MusicBrainz database (should contain release values). + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: release data fetched from MusicBrainz database. """ @@ -1048,6 +1071,7 @@ def load_release_gid_redirect(connection, artist_credit_from_recording): Args: connection: database connection to execute the query. + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: release_gid_redirect data fetched from MusicBrainz database. """ @@ -1076,6 +1100,7 @@ def load_medium(connection, MB_track_data, artist_credit_from_recording): connection: database connection to execute the query. MB_track_data (of type - sqlalchemy.resultproxy): data retrieved from the track table of the MusicBrainz database (should contain medium values). + artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: medium data fetched from MusicBrainz database. """ @@ -1129,6 +1154,7 @@ def load_track(connection, MB_track_gid_redirect_data, id_from_recording): connection: database connection to execute the query. MB_track_gid_redirect_data (of type - sqlalchemy.resultproxy): data retrieved from the track_gid_redirect table of the MusicBrainz database (should contain track values). + id_from_recording: list of recording ids from recording data fetched from MusicBrainz database. Returns: track data fetched from MusicBrainz database. """ @@ -1179,6 +1205,7 @@ def load_track_gid_redirect(connection, id_from_recording): Args: connection: database connection to execute the query. + id_from_recording: list of recording ids from recording data fetched from MusicBrainz database. Returns: track_gid_redirect data fetched from MusicBrainz database. """ @@ -1203,7 +1230,7 @@ def write_artist_credit(connection, MB_artist_credit_data): Args: connection: database connection to execute the query. - MB_artist_credit_data: artist_credit data fetched from MusicBrainz database. + MB_artist_credit_data: list of artist_credit data fetched from MusicBrainz database. """ artist_credit_query = text(""" INSERT INTO musicbrainz.artist_credit @@ -1227,7 +1254,7 @@ def write_artist_type(connection, MB_artist_type_data): Args: connection: database connection to execute the query. - MB_artist_type_data: artist_type data fetched from MusicBrainz database. + MB_artist_type_data: list of artist_type data fetched from MusicBrainz database. """ artist_type_query = text(""" INSERT INTO musicbrainz.artist_type(id, name, parent, child_order, description, gid) @@ -1252,7 +1279,7 @@ def write_area_type(connection, MB_area_type_data): Args: connection: database connection to execute the query. - MB_area_type_data: area_type data fetched from MusicBrainz database. + MB_area_type_data: list of area_type data fetched from MusicBrainz database. """ area_type_query = text(""" INSERT INTO musicbrainz.area_type @@ -1277,7 +1304,7 @@ def write_begin_area_type(connection, MB_begin_area_type_data): Args: connection: database connection to execute the query. - MB_begin_area_type_data: begin_area_type data fetched from MusicBrainz database. + MB_begin_area_type_data: list of begin_area_type data fetched from MusicBrainz database. """ begin_area_type_query = text(""" INSERT INTO musicbrainz.area_type @@ -1302,7 +1329,7 @@ def write_end_area_type(connection, MB_end_area_type_data): Args: connection: database connection to execute the query. - MB_end_area_type_data: end_area_type data fetched from MusicBrainz database. + MB_end_area_type_data: list of end_area_type data fetched from MusicBrainz database. """ end_area_type_query = text(""" INSERT INTO musicbrainz.area_type(id, name, parent, child_order, description, gid) @@ -1327,7 +1354,7 @@ def write_release_status(connection, MB_release_status_data): Args: connection: database connection to execute the query. - MB_release_status_data: release_status data fetched from MusicBrainz database. + MB_release_status_data: list of release_status data fetched from MusicBrainz database. """ release_status_query = text(""" INSERT INTO musicbrainz.release_status @@ -1352,7 +1379,7 @@ def write_release_group_primary_type(connection, MB_release_group_primary_type_d Args: connection: database connection to execute the query. - MB_release_group_primary_type_data: release_group_primary_type data fetched from MusicBrainz database. + MB_release_group_primary_type_data: list of release_group_primary_type data fetched from MusicBrainz database. """ release_group_primary_type_query = text(""" INSERT INTO musicbrainz.release_group_primary_type @@ -1377,7 +1404,7 @@ def write_medium_format(connection, MB_medium_format_data): Args: connection: database connection to execute the query. - MB_medium_format_data: medium_format data fetched from MusicBrainz database. + MB_medium_format_data: list of medium_format data fetched from MusicBrainz database. """ medium_format_query = text(""" INSERT INTO musicbrainz.medium_format @@ -1405,7 +1432,7 @@ def write_release_packaging(connection, MB_release_packaging_data): Args: connection: database connection to execute the query. - MB_release_packaging_data: release_packaging data fetched from MusicBrainz database. + MB_release_packaging_data: list of release_packaging data fetched from MusicBrainz database. """ release_packaging_query = text(""" INSERT INTO musicbrainz.release_packaging @@ -1430,7 +1457,7 @@ def write_language(connection, MB_language_data): Args: connection: database connection to execute the query. - MB_language_data: language data fetched from MusicBrainz database. + MB_language_data: list of language data fetched from MusicBrainz database. """ language_query = text(""" INSERT INTO musicbrainz.language @@ -1456,7 +1483,7 @@ def write_script(connection, MB_script_data): Args: connection: database connection to execute the query. - MB_script_data: script data fetched from MusicBrainz database. + MB_script_data: list of script data fetched from MusicBrainz database. """ script_query = text(""" INSERT INTO musicbrainz.script @@ -1480,7 +1507,7 @@ def write_gender(connection, MB_gender_data): Args: connection: database connection to execute the query. - MB_gender_data: gender data fetched from MusicBrainz database. + MB_gender_data: list of gender data fetched from MusicBrainz database. """ gender_query = text(""" INSERT INTO musicbrainz.gender @@ -1505,7 +1532,7 @@ def write_area(connection, MB_area_data): Args: connection: database connection to execute the query. - MB_area_data: area data fetched from MusicBrainz database. + MB_area_data: list of area data fetched from MusicBrainz database. """ area_query = text(""" INSERT INTO musicbrainz.area @@ -1541,7 +1568,7 @@ def write_begin_area(connection, MB_begin_area_data): Args: connection: database connection to execute the query. - MB_begin_area_data: begin_area data fetched from MusicBrainz database. + MB_begin_area_data: list of begin_area data fetched from MusicBrainz database. """ begin_area_query = text(""" INSERT INTO musicbrainz.area @@ -1576,7 +1603,7 @@ def write_end_area(connection, MB_end_area_data): Args: connection: database connection to execute the query. - MB_end_area_data: end_area data fetched from MusicBrainz database. + MB_end_area_data: list of end_area data fetched from MusicBrainz database. """ end_area_query = text(""" INSERT INTO musicbrainz.area @@ -1611,7 +1638,7 @@ def write_artist(connection, MB_artist_data): Args: connection: database connection to execute the query. - MB_artist_data: artist data fetched from MusicBrainz database. + MB_artist_data: list of artist data fetched from MusicBrainz database. """ artist_query = text(""" INSERT INTO musicbrainz.artist @@ -1651,7 +1678,7 @@ def write_artist_credit_name(connection, MB_artist_credit_name_data): Args: connection: database connection to execute the query. - MB_artist_credit_name_data: artist_credit_name data fetched from MusicBrainz database. + MB_artist_credit_name_data: list of artist_credit_name data fetched from MusicBrainz database. """ artist_credit_name_query = text(""" INSERT INTO musicbrainz.artist_credit_name @@ -1675,7 +1702,7 @@ def write_artist_gid_redirect(connection, MB_artist_gid_redirect_data): Args: connection: database connection to execute the query. - MB_artist_gid_redirect_data: artist_gid_redirect data fetched from MusicBrainz database. + MB_artist_gid_redirect_data: list of artist_gid_redirect data fetched from MusicBrainz database. """ artist_gid_redirect_query = text(""" INSERT INTO musicbrainz.artist_gid_redirect @@ -1697,7 +1724,7 @@ def write_recording(connection, MB_recording_data): Args: connection: database connection to execute the query. - MB_recording_data: recording data fetched from MusicBrainz database. + MB_recording_data: list of recording data fetched from MusicBrainz database. """ recording_query = text(""" INSERT INTO musicbrainz.recording @@ -1725,7 +1752,7 @@ def write_recording_gid_redirect(connection, MB_recording_gid_redirect_data): Args: connection: database connection to execute the query. - MB_recording_gid_redirect_data: recording_gid_redirect data fetched from MusicBrainz database. + MB_recording_gid_redirect_data: list of recording_gid_redirect data fetched from MusicBrainz database. """ recording_gid_redirect_query = text(""" INSERT INTO musicbrainz.recording_gid_redirect @@ -1746,7 +1773,7 @@ def write_release_group(connection, MB_release_group_data): Args: connection: database connection to execute the query. - MB_release_group_data: release_group data fetched from MusicBrainz database. + MB_release_group_data: list of release_group data fetched from MusicBrainz database. """ release_group_query = text(""" INSERT INTO musicbrainz.release_group @@ -1773,7 +1800,7 @@ def write_release_group_gid_redirect(connection, MB_release_gid_redirect_data): Args: connection: database connection to execute the query. - MB_release_group_gid_redirect_data: release_group_gid_redirect data fetched from MusicBrainz database. + MB_release_group_gid_redirect_data: list of release_group_gid_redirect data fetched from MusicBrainz database. """ release_group_gid_redirect_query = text(""" INSERT INTO musicbrainz.release_group_gid_redirect @@ -1795,7 +1822,7 @@ def write_release(connection, MB_release_data): Args: connection: database connection to execute the query. - MB_release_data: release data fetched from MusicBrainz database. + MB_release_data: list of release data fetched from MusicBrainz database. """ release_query = text(""" INSERT INTO musicbrainz.release @@ -1829,7 +1856,7 @@ def write_release_gid_redirect(connection, MB_release_gid_redirect_data): Args: connection: database connection to execute the query. - MB_release_gid_redirect_data: release_gid_redirect data fetched from MusicBrainz database. + MB_release_gid_redirect_data: list of release_gid_redirect data fetched from MusicBrainz database. """ release_gid_redirect_query = text(""" INSERT INTO musicbrainz.release_gid_redirect @@ -1851,7 +1878,7 @@ def write_medium(connection, MB_medium_data): Args: connection: database connection to execute the query. - MB_medium_data: medium data fetched from MusicBrainz database. + MB_medium_data: list of medium data fetched from MusicBrainz database. """ medium_query = text(""" INSERT INTO musicbrainz.medium @@ -1878,7 +1905,7 @@ def write_track(connection, MB_track_data): Args: connection: database connection to execute the query. - MB_track_data: track data fetched from MusicBrainz database. + MB_track_data: list of track data fetched from MusicBrainz database. """ track_query = text(""" INSERT INTO musicbrainz.track @@ -1910,7 +1937,7 @@ def write_track_gid_redirect(connection, MB_track_gid_redirect_data): Args: connection: database connection to execute the query. - MB_track_gid_redirect_data: track_gid_redirect data fetched from MusicBrainz database. + MB_track_gid_redirect_data: list of track_gid_redirect data fetched from MusicBrainz database. """ track_gid_redirect_query = text(""" INSERT INTO musicbrainz.track_gid_redirect From 6513180362314b0dee6c837fc6c444010785b277 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 6 Jul 2018 18:51:28 +0530 Subject: [PATCH 053/125] Update some docstrings --- db/import_mb_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 6c64049ac..fbe035f83 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -676,7 +676,7 @@ def load_end_area(connection, MB_artist_data, artist_credit_from_recording): Args: connection: database connection to execute the query. - MB_artist_data: list of artist data fetched from MusicBrainz database. + MB_artist_data: artist data fetched from MusicBrainz database. artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: end_area data fetched from MusicBrainz database. From 8d64b95afc6effa53a1e294cedb730c147deb88c Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 6 Jul 2018 21:27:14 +0530 Subject: [PATCH 054/125] Improve some queries whose number of rows in actual MB db is less than 15 --- db/import_mb_data.py | 228 ++++++------------------------------------- 1 file changed, 32 insertions(+), 196 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index fbe035f83..0e371b063 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -79,109 +79,39 @@ def load_artist_credit(connection, MB_release_data, MB_release_group_data, MB_tr return MB_artist_credit_data -def load_artist_type(connection, MB_artist_data, artist_credit_from_recording): +def load_artist_type(connection): """Fetch artist_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. - Also fetch data corresponding to artist table. - Args: connection: database connection to execute the query. - MB_artist_data (of type - sqlalchemy.resultproxy): data retrieved from the artist - table of the MusicBrainz database (should contain artist type values). - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: artist_type data fetched from MusicBrainz database. """ - filters = [] - filter_data = {} - - # Get data corresponding to type column in artist table - MB_artist_fk_artist_type = list({value['type'] for value in MB_artist_data}) - - if artist_credit_from_recording: - filters.append("artist_credit.id in :ids") - filter_data["ids"] = tuple(artist_credit_from_recording) - - if MB_artist_data: - filters.append("artist_type.id in :artist_data") - filter_data["artist_data"] = tuple(MB_artist_fk_artist_type) - - filterstr = " OR ".join(filters) - if filterstr: - filterstr = " WHERE " + filterstr - artist_type_query = text(""" - SELECT artist_type.id, - artist_type.name, - artist_type.parent, - artist_type.child_order, - artist_type.description, - artist_type.gid - FROM artist_type - INNER JOIN artist - ON artist.type = artist_type.id - INNER JOIN artist_credit - ON artist.id = artist_credit.id - {filterstr} - """.format(filterstr=filterstr) - ) - result = connection.execute(artist_type_query, filter_data) + SELECT * FROM artist_type + ORDER BY id + """) + result = connection.execute(artist_type_query) MB_artist_type_data = result.fetchall() return MB_artist_type_data -def load_area_type(connection, MB_area_data, artist_credit_from_recording): +def load_area_type(connection): """Fetch area_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. - Also fetch data corresponding to area table. - Args: connection: database connection to execute the query. - MB_area_data (of type - sqlalchemy.resultproxy): data retrieved from the area table - of the MusicBrainz database(should contain area type values). - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: area_type data fetched from MusicBrainz database. """ - filters = [] - filter_data = {} - - # Get data corresponding to type column in area table - MB_area_fk_area_type = list({value['type'] for value in MB_area_data}) - - if artist_credit_from_recording: - filters.append("artist_credit.id in :ids") - filter_data["ids"] = tuple(artist_credit_from_recording) - - if MB_area_data: - filters.append("area_type.id in :area_data") - filter_data["area_data"] = tuple(MB_area_fk_area_type) - - filterstr = " OR ".join(filters) - if filterstr: - filterstr = " WHERE " + filterstr - - area_type_query = text(""" - SELECT area_type.id, - area_type.name, - area_type.parent, - area_type.child_order, - area_type.description, - area_type.gid - FROM area_type - INNER JOIN area - ON area.type = area_type.id - INNER JOIN artist - ON area.id = artist.area - INNER JOIN artist_credit - ON artist.id = artist_credit.id - {filterstr} - """.format(filterstr=filterstr) - ) - result = connection.execute(area_type_query, filter_data) + area_type_query = text(""" + SELECT * FROM area_type + ORDER BY id + """) + result = connection.execute(area_type_query) MB_area_type_data = result.fetchall() return MB_area_type_data @@ -253,100 +183,39 @@ def load_end_area_type(connection, artist_credit_from_recording): return MB_end_area_type_data -def load_release_status(connection, MB_release_data, artist_credit_from_recording): +def load_release_status(connection): """Fetch release_status table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. Args: connection: database connection to execute the query. - MB_release_data (of type - sqlalchemy.resultproxy): data retrieved from the release table - of the MusicBrainz database (should contain release_status values). - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: release_status data fetched from MusicBrainz database. """ - filters = [] - filter_data = {} - - # Get data corresponding to status column in release table - MB_release_fk_status = list({value['status'] for value in MB_release_data}) - - if artist_credit_from_recording: - filters.append("release.artist_credit in :ids") - filter_data["ids"] = tuple(artist_credit_from_recording) - - if MB_release_data: - filters.append("release_status.id in :data") - filter_data["data"] = tuple(MB_release_fk_status) - - filterstr = " OR ".join(filters) - if filterstr: - filterstr = " WHERE " + filterstr - release_status_query = text(""" - SELECT release_status.id, - release_status.name, - release_status.parent, - release_status.child_order, - release_status.description, - release_status.gid - FROM release_status - INNER JOIN release - ON release.status = release_status.id - {filterstr} - """.format(filterstr=filterstr) - ) - result = connection.execute(release_status_query, filter_data) + SELECT * FROM release_status + ORDER BY id + """) + result = connection.execute(release_status_query) MB_release_status_data = result.fetchall() return MB_release_status_data -def load_release_group_primary_type(connection, MB_release_group_data, artist_credit_from_recording): +def load_release_group_primary_type(connection): """Fetch release_group_primary_type table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. - Also fetch data corresponding to release_group table. - Args: connection: database connection to execute the query. - MB_release_group_data (of type - sqlalchemy.resultproxy): data retrieved from the - release_group table of the MusicBrainz database - (should contain release_group_primary_type values). - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: release_group_primary_type data fetched from MusicBrainz database. """ - filters = [] - filter_data = {} - - # Get data corresponding to release_group_primary_type column in release_group table - MB_release_group_fk_type = list({value['type'] for value in MB_release_group_data}) - - if artist_credit_from_recording: - filters.append("release_group.artist_credit in :credit") - filter_data["credit"] = tuple(artist_credit_from_recording) - - if MB_release_group_data: - filters.append("release_group_primary_type.id in :data") - filter_data["data"] = tuple(MB_release_group_fk_type) - - filterstr = " OR ".join(filters) - if filterstr: - filterstr = " WHERE " + filterstr - - release_group_primary_type_query = text(""" - SELECT release_group_primary_type.id, release_group_primary_type.name, - release_group_primary_type.parent, release_group_primary_type.child_order, - release_group_primary_type.description, release_group_primary_type.gid - FROM release_group_primary_type - INNER JOIN release_group - ON release_group_primary_type.id = release_group.type - {filterstr} - """.format(filterstr=filterstr) - ) - - result = connection.execute(release_group_primary_type_query, filter_data) + release_status_query = text(""" + SELECT * FROM release_group_primary_type + ORDER BY id + """) + result = connection.execute(release_status_query) MB_release_group_primary_type_data = result.fetchall() return MB_release_group_primary_type_data @@ -371,53 +240,20 @@ def load_medium_format(connection): return MB_medium_format_data -def load_release_packaging(connection, MB_release_data, artist_credit_from_recording): +def load_release_packaging(connection): """Fetch release_packaging table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. - Also fetch data corresponding to release table. - Args: connection: database connection to execute the query. - MB_release_data (of type - sqlalchemy.resultproxy): data retrieved from the - release table of the MusicBrainz database (should contain release_packaging values) - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. Returns: release_packaging data fetched from MusicBrainz database. """ - filters = [] - filter_data = {} - - # Get data corresponding to release_packaging column in release table - MB_release_fk_packaging = list({value['packaging'] for value in MB_release_data}) - - if artist_credit_from_recording: - filters.append("release.artist_credit in :credit") - filter_data["credit"] = tuple(artist_credit_from_recording) - - if MB_release_data: - filters.append("release_packaging.id in :data") - filter_data["data"] = tuple(MB_release_fk_packaging) - - filterstr = " OR ".join(filters) - if filterstr: - filterstr = " WHERE " + filterstr - release_packaging_query = text(""" - SELECT release_packaging.id, - release_packaging.name, - release_packaging.parent, - release_packaging.child_order, - release_packaging.description, - release_packaging.gid - FROM release_packaging - INNER JOIN release - ON release.packaging = release_packaging.id - {filterstr} - """.format(filterstr=filterstr) - ) - - result = connection.execute(release_packaging_query, filter_data) + SELECT * FROM release_packaging + ORDER BY id + """) + result = connection.execute(release_packaging_query) MB_release_packaging_data = result.fetchall() return MB_release_packaging_data @@ -2033,7 +1869,7 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # artist_type try: logging.info('Getting artist type data...') - MB_artist_type_data = load_artist_type(connection, MB_artist_data, artist_credit_from_recording) + MB_artist_type_data = load_artist_type(connection) except ValueError: logging.info("No Data found from artist type table for the recordings") @@ -2061,7 +1897,7 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # area_type try: logging.info('Getting area type data...') - MB_area_type_data = load_area_type(connection, MB_area_data, artist_credit_from_recording) + MB_area_type_data = load_area_type(connection) except ValueError: logging.info("No Data found from area type table for the recordings") @@ -2124,21 +1960,21 @@ def fetch_and_insert_musicbrainz_data(gids_in_AB): # release_group_primary_type try: logging.info('Getting release group primary type data...') - MB_release_group_primary_type_data = load_release_group_primary_type(connection, MB_release_group_data, artist_credit_from_recording) + MB_release_group_primary_type_data = load_release_group_primary_type(connection) except ValueError: logging.info("No Data found from release group primary type table for the recordings") # release_packaging try: logging.info('Getting release packaging data...') - MB_release_packaging_data = load_release_packaging(connection, MB_release_data, artist_credit_from_recording) + MB_release_packaging_data = load_release_packaging(connection) except ValueError: logging.info("No Data found from release packaging table for the recordings") # release_status try: logging.info('Getting release status data...') - MB_release_status_data = load_release_status(connection, MB_release_data, artist_credit_from_recording) + MB_release_status_data = load_release_status(connection) except ValueError: logging.info("No Data found from release status table for the recordings") From 9a0e0f23063f1dd092af7e3f6d97897334c9f9d9 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 6 Jul 2018 21:33:17 +0530 Subject: [PATCH 055/125] Add DISTINCT in queries, otherwise there was a lot duplications in the resultant data --- db/import_mb_data.py | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 0e371b063..1ea6973d8 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -66,8 +66,8 @@ def load_artist_credit(connection, MB_release_data, MB_release_group_data, MB_tr filterstr = " WHERE " + filterstr artist_credit_query = text(""" - SELECT artist_credit.id, artist_credit.name, artist_credit.artist_count, - artist_credit.ref_count, artist_credit.created + SELECT DISTINCT artist_credit.id, artist_credit.name, artist_credit.artist_count, + artist_credit.ref_count, artist_credit.created FROM artist_credit {filterstr} """.format(filterstr=filterstr) @@ -129,7 +129,7 @@ def load_begin_area_type(connection, artist_credit_from_recording): begin_area_type data fetched from MusicBrainz database. """ begin_area_type_query = text(""" - SELECT area_type.id, + SELECT DISTINCT area_type.id, area_type.name, area_type.parent, area_type.child_order, @@ -162,7 +162,7 @@ def load_end_area_type(connection, artist_credit_from_recording): end_area_type data fetched from MusicBrainz database. """ end_area_type_query = text(""" - SELECT area_type.id, + SELECT DISTINCT area_type.id, area_type.name, area_type.parent, area_type.child_order, @@ -292,7 +292,7 @@ def load_language(connection, MB_release_data, artist_credit_from_recording): filterstr = " WHERE " + filterstr language_query = text(""" - SELECT language.id, + SELECT DISTINCT language.id, language.iso_code_2t, language.iso_code_2b, language.iso_code_1, @@ -344,7 +344,7 @@ def load_script(connection, MB_release_data, artist_credit_from_recording): filterstr = " WHERE " + filterstr script_query = text(""" - SELECT script.id, + SELECT DISTINCT script.id, script.iso_code, script.iso_number, script.name, @@ -413,7 +413,7 @@ def load_area(connection, MB_artist_data, artist_credit_from_recording): filterstr = " WHERE " + filterstr area_query = text(""" - SELECT area.id, + SELECT DISTINCT area.id, area.gid, area.name, area.type, @@ -475,7 +475,7 @@ def load_begin_area(connection, MB_artist_data, artist_credit_from_recording): filterstr = " WHERE " + filterstr begin_area_query = text(""" - SELECT area.id, + SELECT DISTINCT area.id, area.gid, area.name, area.type, @@ -536,7 +536,7 @@ def load_end_area(connection, MB_artist_data, artist_credit_from_recording): filterstr = " WHERE " + filterstr end_area_query = text(""" - SELECT area.id, + SELECT DISTINCT area.id, area.gid, area.name, area.type, @@ -575,7 +575,7 @@ def load_artist_credit_name(connection, artist_credit_from_recording): artist_credit_name data fetched from MusicBrainz database. """ artist_credit_name_query = text(""" - SELECT artist_credit_name.artist_credit, + SELECT DISTINCT artist_credit_name.artist_credit, artist_credit_name.position, artist_credit_name.artist, artist_credit_name.name, @@ -633,7 +633,7 @@ def load_artist(connection, MB_artist_credit_name_data, MB_artist_gid_redirect_d filterstr = " WHERE " + filterstr artist_query = text(""" - SELECT artist.id, artist.gid, artist.name, artist.sort_name, artist.begin_date_year, + SELECT DISTINCT artist.id, artist.gid, artist.name, artist.sort_name, artist.begin_date_year, artist.begin_date_month, artist.begin_date_day, artist.end_date_year, artist.end_date_month, artist.end_date_day, artist.type, artist.area, artist.gender, artist.comment, artist.edits_pending, artist.last_updated, artist.ended, artist.begin_area, artist.end_area @@ -661,7 +661,7 @@ def load_artist_gid_redirect(connection, artist_credit_from_recording): artist_gid_redirect data fetched from MusicBrainz database. """ artist_gid_redirect_query = text(""" - SELECT artist_gid_redirect.gid, + SELECT DISTINCT artist_gid_redirect.gid, artist_gid_redirect.new_id, artist_gid_redirect.created FROM artist_gid_redirect @@ -707,7 +707,7 @@ def load_recording(connection, gids_in_AB, MB_recording_gid_redirect_data): filterstr = " WHERE " + filterstr recording_query = text(""" - SELECT recording.id, recording.gid, recording.name, recording.artist_credit, + SELECT DISTINCT recording.id, recording.gid, recording.name, recording.artist_credit, recording.length, recording.comment, recording.edits_pending, recording.last_updated, recording.video FROM recording @@ -731,7 +731,7 @@ def load_recording_gid_redirect(connection, gids_in_AB): recording_gid_redirect data fetched from MusicBrainz database. """ recording_gid_redirect_query = text(""" - SELECT recording_gid_redirect.gid, + SELECT DISTINCT recording_gid_redirect.gid, recording_gid_redirect.new_id, recording_gid_redirect.created FROM recording_gid_redirect @@ -789,7 +789,7 @@ def load_release_group(connection, MB_release_group_gid_redirect_data, MB_releas filterstr = " WHERE " + filterstr release_group_query = text(""" - SELECT release_group.id, + SELECT DISTINCT release_group.id, release_group.gid, release_group.name, release_group.artist_credit, @@ -819,7 +819,7 @@ def load_release_group_gid_redirect(connection, artist_credit_from_recording): release_group_gid_redirect data fetched from MusicBrainz database. """ release_group_gid_redirect_query = text(""" - SELECT release_group_gid_redirect.gid, + SELECT DISTINCT release_group_gid_redirect.gid, release_group_gid_redirect.new_id, release_group_gid_redirect.created FROM release_group_gid_redirect @@ -876,7 +876,7 @@ def load_release(connection, MB_medium_data, MB_release_gid_redirect_data, artis filterstr = " WHERE " + filterstr release_query = text(""" - SELECT release.id, + SELECT DISTINCT release.id, release.gid, release.name, release.artist_credit, @@ -912,7 +912,7 @@ def load_release_gid_redirect(connection, artist_credit_from_recording): release_gid_redirect data fetched from MusicBrainz database. """ release_gid_redirect_query = text(""" - SELECT release_gid_redirect.gid, + SELECT DISTINCT release_gid_redirect.gid, release_gid_redirect.new_id, release_gid_redirect.created FROM release_gid_redirect @@ -959,7 +959,7 @@ def load_medium(connection, MB_track_data, artist_credit_from_recording): filterstr = " WHERE " + filterstr medium_query = text(""" - SELECT medium.id, + SELECT DISTINCT medium.id, medium.release, medium.position, medium.format, @@ -1013,7 +1013,7 @@ def load_track(connection, MB_track_gid_redirect_data, id_from_recording): filterstr = " WHERE " + filterstr track_query = text(""" - SELECT track.id, + SELECT DISTINCT track.id, track.gid, track.recording, track.medium, @@ -1046,7 +1046,7 @@ def load_track_gid_redirect(connection, id_from_recording): track_gid_redirect data fetched from MusicBrainz database. """ track_gid_redirect_query = text(""" - SELECT track_gid_redirect.gid, + SELECT DISTINCT track_gid_redirect.gid, track_gid_redirect.new_id, track_gid_redirect.created FROM track_gid_redirect From 6ab4a5ec1e6aa1dad430d05b04a44e01f9279bff Mon Sep 17 00:00:00 2001 From: rsh7 Date: Wed, 11 Jul 2018 16:13:40 +0530 Subject: [PATCH 056/125] Add detailed docs to artist_credit_from_recording argument in all functions --- db/import_mb_data.py | 70 ++++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 1ea6973d8..e84f0f8dc 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -21,8 +21,10 @@ def load_artist_credit(connection, MB_release_data, MB_release_group_data, MB_tr table of the MusicBrainz database (should contain artist credit values). MB_track_data (of type - sqlalchemy.resultproxy): data retrieved from the track table of the MusicBrainz database (should contain artist credit values). - MB_artist_credit_name_data: artist_credit_name_data fetched from MusicBrainz database. - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + MB_artist_credit_name_data (of type - sqlalchemy.resultproxy): data retrieved from the artist_credit_name + table of the MusicBrainz database (should contain artist credit values). + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: artist_credit data fetched from MusicBrainz database. """ @@ -124,7 +126,8 @@ def load_begin_area_type(connection, artist_credit_from_recording): Args: connection: database connection to execute the query. - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: begin_area_type data fetched from MusicBrainz database. """ @@ -157,7 +160,8 @@ def load_end_area_type(connection, artist_credit_from_recording): Args: connection: database connection to execute the query. - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: end_area_type data fetched from MusicBrainz database. """ @@ -267,9 +271,10 @@ def load_language(connection, MB_release_data, artist_credit_from_recording): Args: connection: database connection to execute the query. - MB_release_data(of type - sqlalchemy.resultproxy): data retrieved from the + MB_release_data (of type - sqlalchemy.resultproxy): data retrieved from the release table of the MusicBrainz database (should contain language values). - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: language data fetched from MusicBrainz database. """ @@ -321,7 +326,8 @@ def load_script(connection, MB_release_data, artist_credit_from_recording): connection: database connection to execute the query. MB_release_data (of type - sqlalchemy.resultproxy): data retrieved from the release table of the MusicBrainz database (should contain script values). - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: script data fetched from MusicBrainz database. """ @@ -388,9 +394,10 @@ def load_area(connection, MB_artist_data, artist_credit_from_recording): Args: connection: database connection to execute the query. - MB_artist_data(of type - sqlalchemy.resultproxy): data retrieved from the + MB_artist_data (of type - sqlalchemy.resultproxy): data retrieved from the artist table of the MusicBrainz database (should contain area values). - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: area data fetched from MusicBrainz database. """ @@ -452,7 +459,8 @@ def load_begin_area(connection, MB_artist_data, artist_credit_from_recording): connection: database connection to execute the query. MB_artist_data (of type - sqlalchemy.resultproxy): data retrieved from the artist table of the MusicBrainz database (should contain begin_area values). - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: begin_area data fetched from MusicBrainz database. """ @@ -512,8 +520,10 @@ def load_end_area(connection, MB_artist_data, artist_credit_from_recording): Args: connection: database connection to execute the query. - MB_artist_data: artist data fetched from MusicBrainz database. - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + MB_artist_data (of type - sqlalchemy.resultproxy): data retrieved from the + artist table of the MusicBrainz database (should contain end_area values). + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: end_area data fetched from MusicBrainz database. """ @@ -570,7 +580,8 @@ def load_artist_credit_name(connection, artist_credit_from_recording): Args: connection: database connection to execute the query. - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: artist_credit_name data fetched from MusicBrainz database. """ @@ -599,11 +610,12 @@ def load_artist(connection, MB_artist_credit_name_data, MB_artist_gid_redirect_d Args: connection: database connection to execute the query. - MB_artist_credit_name_data(of type - sqlalchemy.resultproxy): data retrieved from the + MB_artist_credit_name_data (of type - sqlalchemy.resultproxy): data retrieved from the artist_credit_name table of the MusicBrainz database (should contain artist values). - MB_artist_gid_redirect_data(of type - sqlalchemy.resultproxy): data retrieved from the + MB_artist_gid_redirect_data (of type - sqlalchemy.resultproxy): data retrieved from the artist_gid_redirect table of the MusicBrainz database (should contain artist values). - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: artist data fetched from MusicBrainz database. """ @@ -656,7 +668,8 @@ def load_artist_gid_redirect(connection, artist_credit_from_recording): Args: connection: database connection to execute the query. - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: artist_gid_redirect data fetched from MusicBrainz database. """ @@ -684,7 +697,8 @@ def load_recording(connection, gids_in_AB, MB_recording_gid_redirect_data): Args: connection: database connection to execute the query. gids_in_AB: list of recordings mbids present in lowlevel table in AB database. - MB_recording_gid_redirect_data: list of recording gid redirect data fetched from MusicBrainz database. + MB_recording_gid_redirect_data (of type - sqlalchemy.resultproxy): data retrieved from the + recording_gid_redirect table of the MusicBrainz database (should contain recording values). Returns: recording data fetched from MusicBrainz database. """ @@ -757,9 +771,10 @@ def load_release_group(connection, MB_release_group_gid_redirect_data, MB_releas MB_release_group_gid_redirect_data (of type - sqlalchemy.resultproxy): data retrieved from the release_group_gid_redirect table of the MusicBrainz database (should contain release_group values). - MB_release_data(of type - sqlalchemy.resultproxy): data retrieved from the + MB_release_data (of type - sqlalchemy.resultproxy): data retrieved from the release table of the MusicBrainz database (should contain release_group values). - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: release_group data fetched from MusicBrainz database. """ @@ -814,7 +829,8 @@ def load_release_group_gid_redirect(connection, artist_credit_from_recording): Args: connection: database connection to execute the query. - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: release_group_gid_redirect data fetched from MusicBrainz database. """ @@ -841,12 +857,13 @@ def load_release(connection, MB_medium_data, MB_release_gid_redirect_data, artis Args: connection: database connection to execute the query. - MB_medium_data(of type - sqlalchemy.resultproxy): data retrieved from the + MB_medium_data (of type - sqlalchemy.resultproxy): data retrieved from the medium table of the MusicBrainz database (should contain release values). MB_release_gid_redirect_data (of type - sqlalchemy.resultproxy): data retrieved from the release_gid_redirect table of the MusicBrainz database (should contain release values). - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: release data fetched from MusicBrainz database. """ @@ -907,7 +924,8 @@ def load_release_gid_redirect(connection, artist_credit_from_recording): Args: connection: database connection to execute the query. - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: release_gid_redirect data fetched from MusicBrainz database. """ @@ -936,7 +954,8 @@ def load_medium(connection, MB_track_data, artist_credit_from_recording): connection: database connection to execute the query. MB_track_data (of type - sqlalchemy.resultproxy): data retrieved from the track table of the MusicBrainz database (should contain medium values). - artist_credit_from_recording: list of artist_credit data from recording data fetched from MusicBrainz database. + artist_credit_from_recording: list of artist_credit data from recording data retrieved from + recording table of MusicBrainz database. Returns: medium data fetched from MusicBrainz database. """ @@ -1316,7 +1335,6 @@ def write_script(connection, MB_script_data): """Insert data in script table in musicbrainz schema in AcousticBrainz database. - Args: connection: database connection to execute the query. MB_script_data: list of script data fetched from MusicBrainz database. From ebf14b02c53e936d4fed1878f7c26a7a33183659 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Thu, 12 Jul 2018 15:00:34 +0530 Subject: [PATCH 057/125] Change format of select queries --- db/import_mb_data.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index e84f0f8dc..b88705bc9 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -91,8 +91,9 @@ def load_artist_type(connection): artist_type data fetched from MusicBrainz database. """ artist_type_query = text(""" - SELECT * FROM artist_type - ORDER BY id + SELECT * + FROM artist_type + ORDER BY id """) result = connection.execute(artist_type_query) MB_artist_type_data = result.fetchall() @@ -110,8 +111,9 @@ def load_area_type(connection): area_type data fetched from MusicBrainz database. """ area_type_query = text(""" - SELECT * FROM area_type - ORDER BY id + SELECT * + FROM area_type + ORDER BY id """) result = connection.execute(area_type_query) MB_area_type_data = result.fetchall() @@ -197,8 +199,9 @@ def load_release_status(connection): release_status data fetched from MusicBrainz database. """ release_status_query = text(""" - SELECT * FROM release_status - ORDER BY id + SELECT * + FROM release_status + ORDER BY id """) result = connection.execute(release_status_query) MB_release_status_data = result.fetchall() @@ -216,8 +219,9 @@ def load_release_group_primary_type(connection): release_group_primary_type data fetched from MusicBrainz database. """ release_status_query = text(""" - SELECT * FROM release_group_primary_type - ORDER BY id + SELECT * + FROM release_group_primary_type + ORDER BY id """) result = connection.execute(release_status_query) MB_release_group_primary_type_data = result.fetchall() @@ -235,8 +239,9 @@ def load_medium_format(connection): medium_format data fetched from MusicBrainz database. """ medium_format_query = text(""" - SELECT * FROM medium_format - ORDER BY id + SELECT * + FROM medium_format + ORDER BY id """) result = connection.execute(medium_format_query) MB_medium_format_data = result.fetchall() @@ -254,8 +259,9 @@ def load_release_packaging(connection): release_packaging data fetched from MusicBrainz database. """ release_packaging_query = text(""" - SELECT * FROM release_packaging - ORDER BY id + SELECT * + FROM release_packaging + ORDER BY id """) result = connection.execute(release_packaging_query) MB_release_packaging_data = result.fetchall() @@ -377,8 +383,9 @@ def load_gender(connection): gender data fetched from MusicBrainz database. """ gender_query = text(""" - SELECT * FROM gender - ORDER BY id + SELECT * + FROM gender + ORDER BY id """) result = connection.execute(gender_query) MB_gender_data = result.fetchall() From 34c6dcf41289296d1e092b3b25b41d8964a4938a Mon Sep 17 00:00:00 2001 From: rsh7 Date: Thu, 12 Jul 2018 15:14:43 +0530 Subject: [PATCH 058/125] Change docstrings to include details of fetching complete data for some tables --- db/import_mb_data.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index b88705bc9..2cd2cf022 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -82,8 +82,9 @@ def load_artist_credit(connection, MB_release_data, MB_release_group_data, MB_tr def load_artist_type(connection): - """Fetch artist_type table data from MusicBrainz database for the - recording MBIDs in AcousticBrainz database. + """Fetch artist_type table data from MusicBrainz database for the recording MBIDs in + AcousticBrainz database. Retrieving complete data because the rows in MusicBrainz database + for this table are much less in number. Args: connection: database connection to execute the query. @@ -102,8 +103,9 @@ def load_artist_type(connection): def load_area_type(connection): - """Fetch area_type table data from MusicBrainz database for the - recording MBIDs in AcousticBrainz database. + """Fetch area_type table data from MusicBrainz database for the recording MBIDs in + AcousticBrainz database. Retrieving complete data because the rows in MusicBrainz database + for this table are much less in number. Args: connection: database connection to execute the query. @@ -190,8 +192,9 @@ def load_end_area_type(connection, artist_credit_from_recording): def load_release_status(connection): - """Fetch release_status table data from MusicBrainz database for the - recording MBIDs in AcousticBrainz database. + """Fetch release_status table data from MusicBrainz database for the recording MBID + in AcousticBrainz database. Retrieving complete data because the rows in MusicBrainz database + for this table are much less in number. Args: connection: database connection to execute the query. @@ -211,7 +214,8 @@ def load_release_status(connection): def load_release_group_primary_type(connection): """Fetch release_group_primary_type table data from MusicBrainz database for the - recording MBIDs in AcousticBrainz database. + recording MBIDs in AcousticBrainz database. Retrieving complete data because the rows + in MusicBrainz database for this table are much less in number. Args: connection: database connection to execute the query. @@ -230,8 +234,9 @@ def load_release_group_primary_type(connection): def load_medium_format(connection): - """Fetch medium_format table data from MusicBrainz database for the - recording MBIDs in AcousticBrainz database. + """Fetch medium_format table data from MusicBrainz database for the recording MBIDs in + AcousticBrainz database. Retrieving complete data because the rows in + MusicBrainz database for this table are much less in number. Args: connection: database connection to execute the query. @@ -250,8 +255,9 @@ def load_medium_format(connection): def load_release_packaging(connection): - """Fetch release_packaging table data from MusicBrainz database for the - recording MBIDs in AcousticBrainz database. + """Fetch release_packaging table data from MusicBrainz database for the recording MBIDs in + AcousticBrainz database. Retrieving complete data because the rows in MusicBrainz database + for this table are much less in number. Args: connection: database connection to execute the query. @@ -374,8 +380,9 @@ def load_script(connection, MB_release_data, artist_credit_from_recording): def load_gender(connection): - """ Fetch gender table data from MusicBrainz database for the - recording MBIDs in AcousticBrainz database. + """ Fetch gender table data from MusicBrainz database for the recording MBIDs in + AcousticBrainz database. Retrieving complete data because the rows in MusicBrainz + database for this table are much less in number. Args: connection: database connection to execute the query. From a1e2cf079e3afb1775e569e939ec4972209f76eb Mon Sep 17 00:00:00 2001 From: rsh7 Date: Wed, 25 Jul 2018 01:12:21 +0530 Subject: [PATCH 059/125] Include a sleep schedule of 5 seconds after every batch import --- musicbrainz_importer/musicbrainz_importer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/musicbrainz_importer/musicbrainz_importer.py b/musicbrainz_importer/musicbrainz_importer.py index f0be184e8..424f2ca44 100644 --- a/musicbrainz_importer/musicbrainz_importer.py +++ b/musicbrainz_importer/musicbrainz_importer.py @@ -4,6 +4,7 @@ import db.import_mb_data SLEEP_DURATION = 30 # number of seconds to wait between runs +BATCH_SLEEP_DURATION = 5 # number of seconds to wait between batches def main(): @@ -14,6 +15,8 @@ def main(): logging.info("Importing MusicBrainz data...") logging.info('Inserting data for %d recordings...' % (len(gids_in_AB))) db.import_mb_data.fetch_and_insert_musicbrainz_data(gids_in_AB) + logging.info("Sleeping %s seconds to start next batch of import." % BATCH_SLEEP_DURATION) + time.sleep(BATCH_SLEEP_DURATION) else: logging.info("No new recording found. Sleeping %s seconds." % SLEEP_DURATION) time.sleep(SLEEP_DURATION) From 70bc9383e2fefae4335ae0bb9c4c7d145a257c90 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Wed, 25 Jul 2018 01:14:37 +0530 Subject: [PATCH 060/125] Add a sleep for every batch import in import_mb_data script as well --- db/import_mb_data.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 2cd2cf022..dd5e8b023 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -5,8 +5,10 @@ import logging from flask import current_app +BATCH_SLEEP_DURATION = 5 # number of seconds to wait between batches logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) + def load_artist_credit(connection, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data, artist_credit_from_recording): """Fetch artist_credit table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. @@ -2121,6 +2123,8 @@ def start_import(): if gids_in_AB: fetch_and_insert_musicbrainz_data(gids_in_AB) + logging.info("Sleeping %s seconds to start next batch of import." % BATCH_SLEEP_DURATION) + time.sleep(BATCH_SLEEP_DURATION) else: break logging.info('Done!') From 5d0c76e9b231c4adc76b7ceaf24b0997ca82b407 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Wed, 25 Jul 2018 01:19:03 +0530 Subject: [PATCH 061/125] Move sleep variables to config file --- config.py.example | 5 +++++ db/import_mb_data.py | 5 +++-- musicbrainz_importer/musicbrainz_importer.py | 11 +++++++---- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/config.py.example b/config.py.example index b8c4f7d69..ec0e6ec91 100644 --- a/config.py.example +++ b/config.py.example @@ -66,3 +66,8 @@ FEATURE_EVAL_LOCATION = False # Maximum number of recordings to fetch at a time for importing MusicBrainz metadata. RECORDINGS_FETCHED_PER_BATCH = 10000 + +# Sleep duration for musicbrainz importer to wait after a complete import and +# between every 2 batches +SLEEP_DURATION = 30 # number of seconds to wait between runs +BATCH_SLEEP_DURATION = 5 # number of seconds to wait between batches diff --git a/db/import_mb_data.py b/db/import_mb_data.py index dd5e8b023..c76ea2707 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -2123,8 +2123,9 @@ def start_import(): if gids_in_AB: fetch_and_insert_musicbrainz_data(gids_in_AB) - logging.info("Sleeping %s seconds to start next batch of import." % BATCH_SLEEP_DURATION) - time.sleep(BATCH_SLEEP_DURATION) + batch_sleep = current_app.config['BATCH_SLEEP_DURATION'] + logging.info("Sleeping %s seconds to start next batch of import." % batch_sleep) + time.sleep(batch_sleep) else: break logging.info('Done!') diff --git a/musicbrainz_importer/musicbrainz_importer.py b/musicbrainz_importer/musicbrainz_importer.py index 424f2ca44..11550a4ce 100644 --- a/musicbrainz_importer/musicbrainz_importer.py +++ b/musicbrainz_importer/musicbrainz_importer.py @@ -2,6 +2,7 @@ import time import db.data import db.import_mb_data +from flask import current_app SLEEP_DURATION = 30 # number of seconds to wait between runs BATCH_SLEEP_DURATION = 5 # number of seconds to wait between batches @@ -15,8 +16,10 @@ def main(): logging.info("Importing MusicBrainz data...") logging.info('Inserting data for %d recordings...' % (len(gids_in_AB))) db.import_mb_data.fetch_and_insert_musicbrainz_data(gids_in_AB) - logging.info("Sleeping %s seconds to start next batch of import." % BATCH_SLEEP_DURATION) - time.sleep(BATCH_SLEEP_DURATION) + batch_sleep = current_app.config['BATCH_SLEEP_DURATION'] + logging.info("Sleeping %s seconds to start next batch of import." % batch_sleep) + time.sleep(batch_sleep) else: - logging.info("No new recording found. Sleeping %s seconds." % SLEEP_DURATION) - time.sleep(SLEEP_DURATION) + sleep = current_app.config['SLEEP_DURATION'] + logging.info("No new recording found. Sleeping %s seconds." % sleep) + time.sleep(sleep) From 0ac8b9a57f59177c95d8cd6056b2ecdbc3fdac96 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Wed, 25 Jul 2018 01:20:23 +0530 Subject: [PATCH 062/125] Change the sleep info message in musicbrainz importer script --- musicbrainz_importer/musicbrainz_importer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/musicbrainz_importer/musicbrainz_importer.py b/musicbrainz_importer/musicbrainz_importer.py index 11550a4ce..396f3a039 100644 --- a/musicbrainz_importer/musicbrainz_importer.py +++ b/musicbrainz_importer/musicbrainz_importer.py @@ -17,7 +17,7 @@ def main(): logging.info('Inserting data for %d recordings...' % (len(gids_in_AB))) db.import_mb_data.fetch_and_insert_musicbrainz_data(gids_in_AB) batch_sleep = current_app.config['BATCH_SLEEP_DURATION'] - logging.info("Sleeping %s seconds to start next batch of import." % batch_sleep) + logging.info("Sleeping %s seconds before starting next batch's import." % batch_sleep) time.sleep(batch_sleep) else: sleep = current_app.config['SLEEP_DURATION'] From 076c18f100f8bb465dca3e2a44b02f8d4b328bef Mon Sep 17 00:00:00 2001 From: rsh7 Date: Wed, 25 Jul 2018 01:21:45 +0530 Subject: [PATCH 063/125] Change the sleep info message in import-mb_data script --- db/import_mb_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index c76ea2707..336c182d0 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -2124,7 +2124,7 @@ def start_import(): if gids_in_AB: fetch_and_insert_musicbrainz_data(gids_in_AB) batch_sleep = current_app.config['BATCH_SLEEP_DURATION'] - logging.info("Sleeping %s seconds to start next batch of import." % batch_sleep) + logging.info("Sleeping %s seconds before starting next batch's import." % batch_sleep) time.sleep(batch_sleep) else: break From 03060487e55d2801626af981934f32f71bdeff0b Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sat, 21 Jul 2018 17:15:32 +0530 Subject: [PATCH 064/125] Tests for writing data into musicbrainz schema tables --- db/import_mb_data.py | 6 + db/test/test_import_mb_data.py | 252 +++++++++++++++++++++++++++++++++ 2 files changed, 258 insertions(+) create mode 100644 db/test/test_import_mb_data.py diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 336c182d0..3b01fb1e4 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -9,6 +9,12 @@ logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) +def load_musicbrainz_schema_data(connection, table_name): + query = text("""SELECT * FROM musicbrainz.%s""" % (table_name)) + result = connection.execute(query) + return result.fetchall() + + def load_artist_credit(connection, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data, artist_credit_from_recording): """Fetch artist_credit table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. diff --git a/db/test/test_import_mb_data.py b/db/test/test_import_mb_data.py new file mode 100644 index 000000000..24447d19c --- /dev/null +++ b/db/test/test_import_mb_data.py @@ -0,0 +1,252 @@ +from db.testing import DatabaseTestCase, TEST_DATA_PATH, gid_types +from brainzutils import musicbrainz_db +import db +import db.exceptions +import db.import_mb_data +import os.path +import mock +import uuid +import datetime +import psycopg2 + + +class DataMusicBrainzDBTestCase(DatabaseTestCase): + + def setUp(self): + super(DataMusicBrainzDBTestCase, self).setUp() + + + def test_write_to_musicbrainz_schema_tables(self): + # artist_credit + data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (73502, u'Georg Friedrich Handel', 1, 27041, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (831440, u'George Frideric Handel', 1, 24955, datetime.datetime(2011, 6, 19, 7, 36, 56, 8576, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (847994, u'Handel', 1, 717, datetime.datetime(2011, 8, 11, 19, 43, 1, 279447, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (6747, u'Tampa Red', 1, 1600, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_credit(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit')) + + # area_type + data = [(1, u'Country', None, 1, u'Country is used for areas included (or previously included) in ISO 3166-1, e.g. United States.', uuid.UUID('06dd0ae4-8c74-30bb-b43d-95dcedf961de')), + (3, u'City', None, 3, u'City is used for settlements of any size, including towns and villages.', uuid.UUID('6fd8f29a-3d0a-32fc-980d-ea697b69da78')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_area_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'area_type')) + + # area + data = [(24482, uuid.UUID('915a5576-b30c-4160-93cd-e1185cebb6ac'), u'Smithville', 3, 0, + datetime.datetime(2013, 11, 14, 1, 33, 0, 377353, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u''), + (222, uuid.UUID('489ce91b-6658-3307-9877-795b68554c98'), u'United States', 1, 0, + datetime.datetime(2013, 6, 15, 18, 6, 39, 593230, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u''), + (5099, uuid.UUID('29a709d8-0320-493e-8d0c-f2c386662b7f'), u'Chicago', 3, 0, + datetime.datetime(2013, 5, 24, 20, 27, 13, 405462, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u'') + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_area(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'area')) + + # artist_type + data = [(1, u'Person', None, 1, None, uuid.UUID('b6e035f4-3ce9-331c-97df-83397230b0df')), + (2, u'Group', None, 2, None, uuid.UUID('e431f5f6-b5d2-343d-8b36-72607fffb74b')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_type')) + + # gender + data = [(1, u'Male', None, 1, None, uuid.UUID('36d3d30a-839d-3eda-8cb3-29be4384e4a9')), + (2, u'Female', None, 2, None, uuid.UUID('93452b5a-a947-30c8-934f-6a4056b151c2')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_gender(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'gender')) + + # artist + data = [(6747, uuid.UUID('1b62df85-00d2-464f-81bc-a5c0cdcad278'), u'Tampa Red', u'Tampa Red', 1904, 1, 8, 1981, 3, 19, 1, 222, 1, u'', 0, + datetime.datetime(2016, 8, 21, 5, 0, 58, 662928, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), True, 24482, 5099) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist')) + + # artist_gid_redirect + data = [(uuid.UUID('6873559d-8cb9-494d-9f78-4c1eeab1f851'), 6747, datetime.datetime(2016, 3, 13, 23, 0, 21, 981437, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_gid_redirect(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_gid_redirect')) + + # artist_credit_name + data = [(6747, 0, 6747, u'Tampa Red', u'')] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_credit_name(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit_name')) + + # recording + data = [(11768371, uuid.UUID('d51cf7fb-97e1-4070-a40b-b03707f91c92'), u'(Rinaldo, HWV 7: Act I. "Combatti da forte" (Almirena)', 73502, 203000, u'', 0, None, False), + (8598260, uuid.UUID('9086b742-358b-4f73-9a14-84cb1a9ce4ce'), u'Love Story', 399541, 235000, u'', 0, None, False) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_recording(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'recording')) + + # recording_gid_redirect + data = [(uuid.UUID('05e1ab2e-f54f-464b-a1fd-fcc6bceaaa20'), 8598260, datetime.datetime(2011, 5, 16, 16, 8, 20, 288158, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] + + with db.engine.begin() as connection: + db.import_mb_data.write_recording_gid_redirect(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'recording_gid_redirect')) + + # release_group_primary_type + data = [(1, u'Album', None, 1, None, uuid.UUID('f529b476-6e62-324f-b0aa-1f3e33d313fc'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group_primary_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_primary_type')) + + # release_group + data = [(631361, uuid.UUID('2ec35fc4-6797-3324-b775-9a3df3d4723a'), u'The Masterworks', 73502, 1, u'', 0, + datetime.datetime(2012, 5, 15, 19, 1, 58, 718541, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (617137, uuid.UUID('c834f5ee-d362-3da7-966b-8915a86e808c'), u'1981-08-29: Tangerine Tree Volume 53: Berlin 1981', 1418, 1, u'', 0, + datetime.datetime(2016, 9, 21, 23, 0, 26, 94608, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group')) + + # release_group_gid_redirect + data = [(uuid.UUID('21f0a3e8-c37b-33a1-b769-daf16e4e252e'), 617137, datetime.datetime(2011, 5, 16, 14, 57, 6, 530063, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group_gid_redirect(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_gid_redirect')) + + # language + data = [(120, u'eng', u'eng', u'en', u'English', 2, None)] + + with db.engine.begin() as connection: + db.import_mb_data.write_language(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'language')) + + # release_status + data = [(1, u'Official', None, 1, u'Any release officially sanctioned by the artist and/or their record company. Most releases will fit into this category.', + uuid.UUID('4e304316-386d-3409-af2e-78857eec5cfe'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_status(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_status')) + + # release_packaging + data = [(1, u'Jewel Case', None, 0, u'The traditional CD case, made of hard, brittle plastic.', + uuid.UUID('ec27701a-4a22-37f4-bfac-6616e0f9750a')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_packaging(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_packaging')) + + # script + data = [(28, u'Latn', u'215', u'Latin', 4)] + + with db.engine.begin() as connection: + db.import_mb_data.write_script(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'script')) + + # release + data = [(692283, uuid.UUID('a830f892-6be0-35f0-a392-b91d89d89a94'), u'The Masterworks', 847994, 631361, 1, None, 120, 28, u'5028421923901', u'', 0, -1, + datetime.datetime(2018, 5, 13, 11, 0, 22, 832493, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release')) + + # release_gid_redirect + data = [(uuid.UUID('03c44c5d-cbe5-32b2-af20-376a30fd98a0'), 692283, datetime.datetime(2011, 5, 16, 15, 59, 0, 785958, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_gid_redirect(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_gid_redirect')) + + # medium_format + data = [(1, u'CD', None, 0, 1982, True, None, uuid.UUID('9712d52a-4509-3d4b-a1a2-67c88c643e31'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_medium_format(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'medium_format')) + + # medium + data = [(1089027, 692283, 20, 1, u'Rinaldo, Part 1', 0, datetime.datetime(2011, 10, 24, 21, 0, 13, 19209, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 21)] + + with db.engine.begin() as connection: + db.import_mb_data.write_medium(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'medium')) + + # track + data = [(11261020, uuid.UUID('e0537cb9-4720-3eb3-a07a-d8a7477519ea'), 11768371, 1089027, 5, u'5', u'Rinaldo, HWV 7: Act I. "Combatti da forte" (Almirena)', + 831440, 203000, 0, datetime.datetime(2013, 7, 13, 11, 0, 38, 285946, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), False)] + + with db.engine.begin() as connection: + db.import_mb_data.write_track(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'track')) + + # track_gid_redirect + data = [(uuid.UUID('67a0d0cd-fd61-328d-80a2-ca888c5fd15c'), 11261020, datetime.datetime(2014, 10, 15, 0, 0, 9, 772435, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] + + with db.engine.begin() as connection: + db.import_mb_data.write_track_gid_redirect(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'track_gid_redirect')) + + + # def test_write_artist_type(self): + + # def test_write_area_type(self): + + # def test_write_release_status(self): + + # def test_write_release_group_primary_type(self): + + # def test_write_medium_format(self): + + # def test_write_release_packaging(self): + + # def test_write_language(self): + + # def test_write_script(self): + + # def test_write_gender(self): + + # def test_write_area_type_and_area(self): + + # def test_write_artist_type_and_artist_and_artist_gid_redirect(self): + + # def test_write_artist_credit_name(self): + + # def test_write_artist_gid_redirect(self): + + # def test_write_recording(sel + + # def test_write_release(self): + + # def test_write_release_gid_redirect(self): + + # def test_write_release_group_gid_redirect(self): + + # def test_write_track(self): + + # def test_write_track_gid_redirect(self): + + # def test_write_medium(self): From d4155ebb2e9a65193be0f77a6d1b55589834ae56 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 3 Aug 2018 13:01:07 +0530 Subject: [PATCH 065/125] Add tests in modular form for area, artist & gid_redirect, artist_credit, artist_credit_name --- db/test/test_import_mb_data.py | 227 ++++++++++++++++++++++++++++++++- 1 file changed, 225 insertions(+), 2 deletions(-) diff --git a/db/test/test_import_mb_data.py b/db/test/test_import_mb_data.py index 24447d19c..79b6673c2 100644 --- a/db/test/test_import_mb_data.py +++ b/db/test/test_import_mb_data.py @@ -16,7 +16,158 @@ def setUp(self): super(DataMusicBrainzDBTestCase, self).setUp() - def test_write_to_musicbrainz_schema_tables(self): + def test_load_and_write_area(self): + + # area_type + data = [(1, u'Country', None, 1, u'Country is used for areas included (or previously included) in ISO 3166-1, e.g. United States.', uuid.UUID('06dd0ae4-8c74-30bb-b43d-95dcedf961de')), + (3, u'City', None, 3, u'City is used for settlements of any size, including towns and villages.', uuid.UUID('6fd8f29a-3d0a-32fc-980d-ea697b69da78')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_area_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'area_type')) + + # area + data = [(24482, uuid.UUID('915a5576-b30c-4160-93cd-e1185cebb6ac'), u'Smithville', 3, 0, + datetime.datetime(2013, 11, 14, 1, 33, 0, 377353, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u''), + (222, uuid.UUID('489ce91b-6658-3307-9877-795b68554c98'), u'United States', 1, 0, + datetime.datetime(2013, 6, 15, 18, 6, 39, 593230, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u''), + (5099, uuid.UUID('29a709d8-0320-493e-8d0c-f2c386662b7f'), u'Chicago', 3, 0, + datetime.datetime(2013, 5, 24, 20, 27, 13, 405462, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u'') + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_area(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'area')) + + + def test_load_and_write_artist(self): + + # area_type + data = [(1, u'Country', None, 1, u'Country is used for areas included (or previously included) in ISO 3166-1, e.g. United States.', uuid.UUID('06dd0ae4-8c74-30bb-b43d-95dcedf961de')), + (3, u'City', None, 3, u'City is used for settlements of any size, including towns and villages.', uuid.UUID('6fd8f29a-3d0a-32fc-980d-ea697b69da78')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_area_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'area_type')) + + # area + data = [(24482, uuid.UUID('915a5576-b30c-4160-93cd-e1185cebb6ac'), u'Smithville', 3, 0, + datetime.datetime(2013, 11, 14, 1, 33, 0, 377353, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u''), + (222, uuid.UUID('489ce91b-6658-3307-9877-795b68554c98'), u'United States', 1, 0, + datetime.datetime(2013, 6, 15, 18, 6, 39, 593230, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u''), + (5099, uuid.UUID('29a709d8-0320-493e-8d0c-f2c386662b7f'), u'Chicago', 3, 0, + datetime.datetime(2013, 5, 24, 20, 27, 13, 405462, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u'') + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_area(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'area')) + + # artist_type + data = [(1, u'Person', None, 1, None, uuid.UUID('b6e035f4-3ce9-331c-97df-83397230b0df')), + (2, u'Group', None, 2, None, uuid.UUID('e431f5f6-b5d2-343d-8b36-72607fffb74b')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_type')) + + # gender + data = [(1, u'Male', None, 1, None, uuid.UUID('36d3d30a-839d-3eda-8cb3-29be4384e4a9')), + (2, u'Female', None, 2, None, uuid.UUID('93452b5a-a947-30c8-934f-6a4056b151c2')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_gender(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'gender')) + + # artist + data = [(6747, uuid.UUID('1b62df85-00d2-464f-81bc-a5c0cdcad278'), u'Tampa Red', u'Tampa Red', 1904, 1, 8, 1981, 3, 19, 1, 222, 1, u'', 0, + datetime.datetime(2016, 8, 21, 5, 0, 58, 662928, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), True, 24482, 5099) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist')) + + + def test_load_and_write_artist_gid_redirect(self): + # area_type + data = [(1, u'Country', None, 1, u'Country is used for areas included (or previously included) in ISO 3166-1, e.g. United States.', uuid.UUID('06dd0ae4-8c74-30bb-b43d-95dcedf961de')), + (3, u'City', None, 3, u'City is used for settlements of any size, including towns and villages.', uuid.UUID('6fd8f29a-3d0a-32fc-980d-ea697b69da78')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_area_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'area_type')) + + # area + data = [(24482, uuid.UUID('915a5576-b30c-4160-93cd-e1185cebb6ac'), u'Smithville', 3, 0, + datetime.datetime(2013, 11, 14, 1, 33, 0, 377353, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u''), + (222, uuid.UUID('489ce91b-6658-3307-9877-795b68554c98'), u'United States', 1, 0, + datetime.datetime(2013, 6, 15, 18, 6, 39, 593230, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u''), + (5099, uuid.UUID('29a709d8-0320-493e-8d0c-f2c386662b7f'), u'Chicago', 3, 0, + datetime.datetime(2013, 5, 24, 20, 27, 13, 405462, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u'') + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_area(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'area')) + + # artist_type + data = [(1, u'Person', None, 1, None, uuid.UUID('b6e035f4-3ce9-331c-97df-83397230b0df')), + (2, u'Group', None, 2, None, uuid.UUID('e431f5f6-b5d2-343d-8b36-72607fffb74b')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_type')) + + # gender + data = [(1, u'Male', None, 1, None, uuid.UUID('36d3d30a-839d-3eda-8cb3-29be4384e4a9')), + (2, u'Female', None, 2, None, uuid.UUID('93452b5a-a947-30c8-934f-6a4056b151c2')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_gender(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'gender')) + + # artist + data = [(6747, uuid.UUID('1b62df85-00d2-464f-81bc-a5c0cdcad278'), u'Tampa Red', u'Tampa Red', 1904, 1, 8, 1981, 3, 19, 1, 222, 1, u'', 0, + datetime.datetime(2016, 8, 21, 5, 0, 58, 662928, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), True, 24482, 5099) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist')) + + # artist_gid_redirect + data = [(uuid.UUID('6873559d-8cb9-494d-9f78-4c1eeab1f851'), 6747, datetime.datetime(2016, 3, 13, 23, 0, 21, 981437, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_gid_redirect(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_gid_redirect')) + + + def test_load_and_write_artist_credit(self): + # artist_credit + data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (73502, u'Georg Friedrich Handel', 1, 27041, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (831440, u'George Frideric Handel', 1, 24955, datetime.datetime(2011, 6, 19, 7, 36, 56, 8576, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (847994, u'Handel', 1, 717, datetime.datetime(2011, 8, 11, 19, 43, 1, 279447, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (6747, u'Tampa Red', 1, 1600, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_credit(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit')) + + + def test_load_and_write_artist_credit_name(self): + # artist_credit data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), @@ -64,7 +215,79 @@ def test_write_to_musicbrainz_schema_tables(self): # gender data = [(1, u'Male', None, 1, None, uuid.UUID('36d3d30a-839d-3eda-8cb3-29be4384e4a9')), (2, u'Female', None, 2, None, uuid.UUID('93452b5a-a947-30c8-934f-6a4056b151c2')) - ] + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_gender(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'gender')) + + # artist + data = [(6747, uuid.UUID('1b62df85-00d2-464f-81bc-a5c0cdcad278'), u'Tampa Red', u'Tampa Red', 1904, 1, 8, 1981, 3, 19, 1, 222, 1, u'', 0, + datetime.datetime(2016, 8, 21, 5, 0, 58, 662928, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), True, 24482, 5099) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist')) + + # artist_credit_name + data = [(6747, 0, 6747, u'Tampa Red', u'')] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_credit_name(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit_name')) + + + def test_load_and_write_musicbrainz_schema_tables(self): + + # artist_credit + data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (73502, u'Georg Friedrich Handel', 1, 27041, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (831440, u'George Frideric Handel', 1, 24955, datetime.datetime(2011, 6, 19, 7, 36, 56, 8576, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (847994, u'Handel', 1, 717, datetime.datetime(2011, 8, 11, 19, 43, 1, 279447, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (6747, u'Tampa Red', 1, 1600, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_credit(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit')) + + # area_type + data = [(1, u'Country', None, 1, u'Country is used for areas included (or previously included) in ISO 3166-1, e.g. United States.', uuid.UUID('06dd0ae4-8c74-30bb-b43d-95dcedf961de')), + (3, u'City', None, 3, u'City is used for settlements of any size, including towns and villages.', uuid.UUID('6fd8f29a-3d0a-32fc-980d-ea697b69da78')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_area_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'area_type')) + + # area + data = [(24482, uuid.UUID('915a5576-b30c-4160-93cd-e1185cebb6ac'), u'Smithville', 3, 0, + datetime.datetime(2013, 11, 14, 1, 33, 0, 377353, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u''), + (222, uuid.UUID('489ce91b-6658-3307-9877-795b68554c98'), u'United States', 1, 0, + datetime.datetime(2013, 6, 15, 18, 6, 39, 593230, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u''), + (5099, uuid.UUID('29a709d8-0320-493e-8d0c-f2c386662b7f'), u'Chicago', 3, 0, + datetime.datetime(2013, 5, 24, 20, 27, 13, 405462, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u'') + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_area(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'area')) + + # artist_type + data = [(1, u'Person', None, 1, None, uuid.UUID('b6e035f4-3ce9-331c-97df-83397230b0df')), + (2, u'Group', None, 2, None, uuid.UUID('e431f5f6-b5d2-343d-8b36-72607fffb74b')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_type')) + + # gender + data = [(1, u'Male', None, 1, None, uuid.UUID('36d3d30a-839d-3eda-8cb3-29be4384e4a9')), + (2, u'Female', None, 2, None, uuid.UUID('93452b5a-a947-30c8-934f-6a4056b151c2')) + ] with db.engine.begin() as connection: db.import_mb_data.write_gender(connection, data) From a445334576b1ebbcd8ed7c271aeec84d9be5f398 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 3 Aug 2018 13:12:42 +0530 Subject: [PATCH 066/125] Add tests to load and write recording and recording_gid_redirect tables --- db/test/test_import_mb_data.py | 55 ++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/db/test/test_import_mb_data.py b/db/test/test_import_mb_data.py index 79b6673c2..8c515ad0f 100644 --- a/db/test/test_import_mb_data.py +++ b/db/test/test_import_mb_data.py @@ -238,6 +238,61 @@ def test_load_and_write_artist_credit_name(self): self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit_name')) + def test_load_and_write_recording(self): + # artist_credit + data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (73502, u'Georg Friedrich Handel', 1, 27041, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (831440, u'George Frideric Handel', 1, 24955, datetime.datetime(2011, 6, 19, 7, 36, 56, 8576, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (847994, u'Handel', 1, 717, datetime.datetime(2011, 8, 11, 19, 43, 1, 279447, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (6747, u'Tampa Red', 1, 1600, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_credit(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit')) + + # recording + data = [(11768371, uuid.UUID('d51cf7fb-97e1-4070-a40b-b03707f91c92'), u'(Rinaldo, HWV 7: Act I. "Combatti da forte" (Almirena)', 73502, 203000, u'', 0, None, False), + (8598260, uuid.UUID('9086b742-358b-4f73-9a14-84cb1a9ce4ce'), u'Love Story', 399541, 235000, u'', 0, None, False) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_recording(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'recording')) + + + def test_load_and_write_recording_gid_redirect(self): + # artist_credit + data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (73502, u'Georg Friedrich Handel', 1, 27041, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (831440, u'George Frideric Handel', 1, 24955, datetime.datetime(2011, 6, 19, 7, 36, 56, 8576, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (847994, u'Handel', 1, 717, datetime.datetime(2011, 8, 11, 19, 43, 1, 279447, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (6747, u'Tampa Red', 1, 1600, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_credit(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit')) + + # recording + data = [(11768371, uuid.UUID('d51cf7fb-97e1-4070-a40b-b03707f91c92'), u'(Rinaldo, HWV 7: Act I. "Combatti da forte" (Almirena)', 73502, 203000, u'', 0, None, False), + (8598260, uuid.UUID('9086b742-358b-4f73-9a14-84cb1a9ce4ce'), u'Love Story', 399541, 235000, u'', 0, None, False) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_recording(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'recording')) + + # recording_gid_redirect + data = [(uuid.UUID('05e1ab2e-f54f-464b-a1fd-fcc6bceaaa20'), 8598260, datetime.datetime(2011, 5, 16, 16, 8, 20, 288158, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] + + with db.engine.begin() as connection: + db.import_mb_data.write_recording_gid_redirect(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'recording_gid_redirect')) + + def test_load_and_write_musicbrainz_schema_tables(self): # artist_credit From 587c81f9497860645b6aaecc536df028e7c89c37 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 3 Aug 2018 13:15:02 +0530 Subject: [PATCH 067/125] Add tests to load and write release and release_gid_redirect tables --- db/test/test_import_mb_data.py | 146 +++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/db/test/test_import_mb_data.py b/db/test/test_import_mb_data.py index 8c515ad0f..f92b38ced 100644 --- a/db/test/test_import_mb_data.py +++ b/db/test/test_import_mb_data.py @@ -293,6 +293,152 @@ def test_load_and_write_recording_gid_redirect(self): self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'recording_gid_redirect')) + def test_load_and_write_release(self): + # artist_credit + data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (73502, u'Georg Friedrich Handel', 1, 27041, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (831440, u'George Frideric Handel', 1, 24955, datetime.datetime(2011, 6, 19, 7, 36, 56, 8576, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (847994, u'Handel', 1, 717, datetime.datetime(2011, 8, 11, 19, 43, 1, 279447, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (6747, u'Tampa Red', 1, 1600, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_credit(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit')) + + # release_group_primary_type + data = [(1, u'Album', None, 1, None, uuid.UUID('f529b476-6e62-324f-b0aa-1f3e33d313fc'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group_primary_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_primary_type')) + + # release_group + data = [(631361, uuid.UUID('2ec35fc4-6797-3324-b775-9a3df3d4723a'), u'The Masterworks', 73502, 1, u'', 0, + datetime.datetime(2012, 5, 15, 19, 1, 58, 718541, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (617137, uuid.UUID('c834f5ee-d362-3da7-966b-8915a86e808c'), u'1981-08-29: Tangerine Tree Volume 53: Berlin 1981', 1418, 1, u'', 0, + datetime.datetime(2016, 9, 21, 23, 0, 26, 94608, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group')) + + # language + data = [(120, u'eng', u'eng', u'en', u'English', 2, None)] + + with db.engine.begin() as connection: + db.import_mb_data.write_language(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'language')) + + # release_status + data = [(1, u'Official', None, 1, u'Any release officially sanctioned by the artist and/or their record company. Most releases will fit into this category.', + uuid.UUID('4e304316-386d-3409-af2e-78857eec5cfe'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_status(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_status')) + + # release_packaging + data = [(1, u'Jewel Case', None, 0, u'The traditional CD case, made of hard, brittle plastic.', + uuid.UUID('ec27701a-4a22-37f4-bfac-6616e0f9750a')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_packaging(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_packaging')) + + # script + data = [(28, u'Latn', u'215', u'Latin', 4)] + + with db.engine.begin() as connection: + db.import_mb_data.write_script(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'script')) + + # release + data = [(692283, uuid.UUID('a830f892-6be0-35f0-a392-b91d89d89a94'), u'The Masterworks', 847994, 631361, 1, None, 120, 28, u'5028421923901', u'', 0, -1, + datetime.datetime(2018, 5, 13, 11, 0, 22, 832493, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release')) + + + def test_load_and_write_release_gid_redirect(self): + # artist_credit + data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (73502, u'Georg Friedrich Handel', 1, 27041, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (831440, u'George Frideric Handel', 1, 24955, datetime.datetime(2011, 6, 19, 7, 36, 56, 8576, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (847994, u'Handel', 1, 717, datetime.datetime(2011, 8, 11, 19, 43, 1, 279447, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (6747, u'Tampa Red', 1, 1600, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_credit(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit')) + + # release_group_primary_type + data = [(1, u'Album', None, 1, None, uuid.UUID('f529b476-6e62-324f-b0aa-1f3e33d313fc'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group_primary_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_primary_type')) + + # release_group + data = [(631361, uuid.UUID('2ec35fc4-6797-3324-b775-9a3df3d4723a'), u'The Masterworks', 73502, 1, u'', 0, + datetime.datetime(2012, 5, 15, 19, 1, 58, 718541, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (617137, uuid.UUID('c834f5ee-d362-3da7-966b-8915a86e808c'), u'1981-08-29: Tangerine Tree Volume 53: Berlin 1981', 1418, 1, u'', 0, + datetime.datetime(2016, 9, 21, 23, 0, 26, 94608, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group')) + + # language + data = [(120, u'eng', u'eng', u'en', u'English', 2, None)] + + with db.engine.begin() as connection: + db.import_mb_data.write_language(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'language')) + + # release_status + data = [(1, u'Official', None, 1, u'Any release officially sanctioned by the artist and/or their record company. Most releases will fit into this category.', + uuid.UUID('4e304316-386d-3409-af2e-78857eec5cfe'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_status(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_status')) + + # release_packaging + data = [(1, u'Jewel Case', None, 0, u'The traditional CD case, made of hard, brittle plastic.', + uuid.UUID('ec27701a-4a22-37f4-bfac-6616e0f9750a')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_packaging(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_packaging')) + + # script + data = [(28, u'Latn', u'215', u'Latin', 4)] + + with db.engine.begin() as connection: + db.import_mb_data.write_script(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'script')) + + # release + data = [(692283, uuid.UUID('a830f892-6be0-35f0-a392-b91d89d89a94'), u'The Masterworks', 847994, 631361, 1, None, 120, 28, u'5028421923901', u'', 0, -1, + datetime.datetime(2018, 5, 13, 11, 0, 22, 832493, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release')) + + def test_load_and_write_musicbrainz_schema_tables(self): # artist_credit From 5ac347f09508352f53c64327059fc35cb8ac59ba Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 3 Aug 2018 13:18:42 +0530 Subject: [PATCH 068/125] Add tests to load and write release_group and release_group_gid_redirect tables --- db/test/test_import_mb_data.py | 73 ++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/db/test/test_import_mb_data.py b/db/test/test_import_mb_data.py index f92b38ced..e80f5f9ec 100644 --- a/db/test/test_import_mb_data.py +++ b/db/test/test_import_mb_data.py @@ -439,6 +439,79 @@ def test_load_and_write_release_gid_redirect(self): self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release')) + def test_load_and_write_release_group(self): + # artist_credit + data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (73502, u'Georg Friedrich Handel', 1, 27041, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (831440, u'George Frideric Handel', 1, 24955, datetime.datetime(2011, 6, 19, 7, 36, 56, 8576, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (847994, u'Handel', 1, 717, datetime.datetime(2011, 8, 11, 19, 43, 1, 279447, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (6747, u'Tampa Red', 1, 1600, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_credit(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit')) + + # release_group_primary_type + data = [(1, u'Album', None, 1, None, uuid.UUID('f529b476-6e62-324f-b0aa-1f3e33d313fc'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group_primary_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_primary_type')) + + # release_group + data = [(631361, uuid.UUID('2ec35fc4-6797-3324-b775-9a3df3d4723a'), u'The Masterworks', 73502, 1, u'', 0, + datetime.datetime(2012, 5, 15, 19, 1, 58, 718541, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (617137, uuid.UUID('c834f5ee-d362-3da7-966b-8915a86e808c'), u'1981-08-29: Tangerine Tree Volume 53: Berlin 1981', 1418, 1, u'', 0, + datetime.datetime(2016, 9, 21, 23, 0, 26, 94608, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group')) + + + def test_load_and_write_release_group_gid_redirect(self): + # artist_credit + data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (73502, u'Georg Friedrich Handel', 1, 27041, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (831440, u'George Frideric Handel', 1, 24955, datetime.datetime(2011, 6, 19, 7, 36, 56, 8576, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (847994, u'Handel', 1, 717, datetime.datetime(2011, 8, 11, 19, 43, 1, 279447, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (6747, u'Tampa Red', 1, 1600, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_credit(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit')) + + # release_group_primary_type + data = [(1, u'Album', None, 1, None, uuid.UUID('f529b476-6e62-324f-b0aa-1f3e33d313fc'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group_primary_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_primary_type')) + + # release_group + data = [(631361, uuid.UUID('2ec35fc4-6797-3324-b775-9a3df3d4723a'), u'The Masterworks', 73502, 1, u'', 0, + datetime.datetime(2012, 5, 15, 19, 1, 58, 718541, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (617137, uuid.UUID('c834f5ee-d362-3da7-966b-8915a86e808c'), u'1981-08-29: Tangerine Tree Volume 53: Berlin 1981', 1418, 1, u'', 0, + datetime.datetime(2016, 9, 21, 23, 0, 26, 94608, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group')) + + # release_group_gid_redirect + data = [(uuid.UUID('21f0a3e8-c37b-33a1-b769-daf16e4e252e'), 617137, datetime.datetime(2011, 5, 16, 14, 57, 6, 530063, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group_gid_redirect(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_gid_redirect')) + + def test_load_and_write_musicbrainz_schema_tables(self): # artist_credit From 293a36b65039ec1b37d0047aa47392def8937f9f Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 3 Aug 2018 13:21:26 +0530 Subject: [PATCH 069/125] Add tests to load and write medium table --- db/test/test_import_mb_data.py | 87 ++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/db/test/test_import_mb_data.py b/db/test/test_import_mb_data.py index e80f5f9ec..bb7d35906 100644 --- a/db/test/test_import_mb_data.py +++ b/db/test/test_import_mb_data.py @@ -512,6 +512,93 @@ def test_load_and_write_release_group_gid_redirect(self): self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_gid_redirect')) + def test_load_and_write_medium(self): + # medium_format + data = [(1, u'CD', None, 0, 1982, True, None, uuid.UUID('9712d52a-4509-3d4b-a1a2-67c88c643e31'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_medium_format(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'medium_format')) + + # artist_credit + data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (73502, u'Georg Friedrich Handel', 1, 27041, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (831440, u'George Frideric Handel', 1, 24955, datetime.datetime(2011, 6, 19, 7, 36, 56, 8576, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (847994, u'Handel', 1, 717, datetime.datetime(2011, 8, 11, 19, 43, 1, 279447, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (6747, u'Tampa Red', 1, 1600, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_credit(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit')) + + # release_group_primary_type + data = [(1, u'Album', None, 1, None, uuid.UUID('f529b476-6e62-324f-b0aa-1f3e33d313fc'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group_primary_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_primary_type')) + + # release_group + data = [(631361, uuid.UUID('2ec35fc4-6797-3324-b775-9a3df3d4723a'), u'The Masterworks', 73502, 1, u'', 0, + datetime.datetime(2012, 5, 15, 19, 1, 58, 718541, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (617137, uuid.UUID('c834f5ee-d362-3da7-966b-8915a86e808c'), u'1981-08-29: Tangerine Tree Volume 53: Berlin 1981', 1418, 1, u'', 0, + datetime.datetime(2016, 9, 21, 23, 0, 26, 94608, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group')) + + # language + data = [(120, u'eng', u'eng', u'en', u'English', 2, None)] + + with db.engine.begin() as connection: + db.import_mb_data.write_language(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'language')) + + # release_status + data = [(1, u'Official', None, 1, u'Any release officially sanctioned by the artist and/or their record company. Most releases will fit into this category.', + uuid.UUID('4e304316-386d-3409-af2e-78857eec5cfe'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_status(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_status')) + + # release_packaging + data = [(1, u'Jewel Case', None, 0, u'The traditional CD case, made of hard, brittle plastic.', + uuid.UUID('ec27701a-4a22-37f4-bfac-6616e0f9750a')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_packaging(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_packaging')) + + # script + data = [(28, u'Latn', u'215', u'Latin', 4)] + + with db.engine.begin() as connection: + db.import_mb_data.write_script(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'script')) + + # release + data = [(692283, uuid.UUID('a830f892-6be0-35f0-a392-b91d89d89a94'), u'The Masterworks', 847994, 631361, 1, None, 120, 28, u'5028421923901', u'', 0, -1, + datetime.datetime(2018, 5, 13, 11, 0, 22, 832493, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release')) + + # medium + data = [(1089027, 692283, 20, 1, u'Rinaldo, Part 1', 0, datetime.datetime(2011, 10, 24, 21, 0, 13, 19209, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 21)] + + with db.engine.begin() as connection: + db.import_mb_data.write_medium(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'medium')) + + def test_load_and_write_musicbrainz_schema_tables(self): # artist_credit From 9ada2b52ae415aaa050a529969b22b774e29f4f0 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 3 Aug 2018 13:26:25 +0530 Subject: [PATCH 070/125] Add tests to load and write track and track_gid_redirect tables --- db/test/test_import_mb_data.py | 216 +++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) diff --git a/db/test/test_import_mb_data.py b/db/test/test_import_mb_data.py index bb7d35906..b6da7d487 100644 --- a/db/test/test_import_mb_data.py +++ b/db/test/test_import_mb_data.py @@ -599,6 +599,222 @@ def test_load_and_write_medium(self): self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'medium')) + def test_and_write_track(self): + + # artist_credit + data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (73502, u'Georg Friedrich Handel', 1, 27041, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (831440, u'George Frideric Handel', 1, 24955, datetime.datetime(2011, 6, 19, 7, 36, 56, 8576, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (847994, u'Handel', 1, 717, datetime.datetime(2011, 8, 11, 19, 43, 1, 279447, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (6747, u'Tampa Red', 1, 1600, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_credit(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit')) + + # recording + data = [(11768371, uuid.UUID('d51cf7fb-97e1-4070-a40b-b03707f91c92'), u'(Rinaldo, HWV 7: Act I. "Combatti da forte" (Almirena)', 73502, 203000, u'', 0, None, False), + (8598260, uuid.UUID('9086b742-358b-4f73-9a14-84cb1a9ce4ce'), u'Love Story', 399541, 235000, u'', 0, None, False) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_recording(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'recording')) + + # medium_format + data = [(1, u'CD', None, 0, 1982, True, None, uuid.UUID('9712d52a-4509-3d4b-a1a2-67c88c643e31'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_medium_format(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'medium_format')) + + # release_group_primary_type + data = [(1, u'Album', None, 1, None, uuid.UUID('f529b476-6e62-324f-b0aa-1f3e33d313fc'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group_primary_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_primary_type')) + + # release_group + data = [(631361, uuid.UUID('2ec35fc4-6797-3324-b775-9a3df3d4723a'), u'The Masterworks', 73502, 1, u'', 0, + datetime.datetime(2012, 5, 15, 19, 1, 58, 718541, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (617137, uuid.UUID('c834f5ee-d362-3da7-966b-8915a86e808c'), u'1981-08-29: Tangerine Tree Volume 53: Berlin 1981', 1418, 1, u'', 0, + datetime.datetime(2016, 9, 21, 23, 0, 26, 94608, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group')) + + # language + data = [(120, u'eng', u'eng', u'en', u'English', 2, None)] + + with db.engine.begin() as connection: + db.import_mb_data.write_language(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'language')) + + # release_status + data = [(1, u'Official', None, 1, u'Any release officially sanctioned by the artist and/or their record company. Most releases will fit into this category.', + uuid.UUID('4e304316-386d-3409-af2e-78857eec5cfe'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_status(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_status')) + + # release_packaging + data = [(1, u'Jewel Case', None, 0, u'The traditional CD case, made of hard, brittle plastic.', + uuid.UUID('ec27701a-4a22-37f4-bfac-6616e0f9750a')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_packaging(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_packaging')) + + # script + data = [(28, u'Latn', u'215', u'Latin', 4)] + + with db.engine.begin() as connection: + db.import_mb_data.write_script(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'script')) + + # release + data = [(692283, uuid.UUID('a830f892-6be0-35f0-a392-b91d89d89a94'), u'The Masterworks', 847994, 631361, 1, None, 120, 28, u'5028421923901', u'', 0, -1, + datetime.datetime(2018, 5, 13, 11, 0, 22, 832493, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release')) + + # medium + data = [(1089027, 692283, 20, 1, u'Rinaldo, Part 1', 0, datetime.datetime(2011, 10, 24, 21, 0, 13, 19209, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 21)] + + with db.engine.begin() as connection: + db.import_mb_data.write_medium(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'medium')) + + # track + data = [(11261020, uuid.UUID('e0537cb9-4720-3eb3-a07a-d8a7477519ea'), 11768371, 1089027, 5, u'5', u'Rinaldo, HWV 7: Act I. "Combatti da forte" (Almirena)', + 831440, 203000, 0, datetime.datetime(2013, 7, 13, 11, 0, 38, 285946, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), False)] + + with db.engine.begin() as connection: + db.import_mb_data.write_track(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'track')) + + + def test_load_and_write_track_gid_redirect(self): + # artist_credit + data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (73502, u'Georg Friedrich Handel', 1, 27041, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (831440, u'George Frideric Handel', 1, 24955, datetime.datetime(2011, 6, 19, 7, 36, 56, 8576, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (847994, u'Handel', 1, 717, datetime.datetime(2011, 8, 11, 19, 43, 1, 279447, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (6747, u'Tampa Red', 1, 1600, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_artist_credit(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit')) + + # recording + data = [(11768371, uuid.UUID('d51cf7fb-97e1-4070-a40b-b03707f91c92'), u'(Rinaldo, HWV 7: Act I. "Combatti da forte" (Almirena)', 73502, 203000, u'', 0, None, False), + (8598260, uuid.UUID('9086b742-358b-4f73-9a14-84cb1a9ce4ce'), u'Love Story', 399541, 235000, u'', 0, None, False) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_recording(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'recording')) + + # medium_format + data = [(1, u'CD', None, 0, 1982, True, None, uuid.UUID('9712d52a-4509-3d4b-a1a2-67c88c643e31'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_medium_format(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'medium_format')) + + # release_group_primary_type + data = [(1, u'Album', None, 1, None, uuid.UUID('f529b476-6e62-324f-b0aa-1f3e33d313fc'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group_primary_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_primary_type')) + + # release_group + data = [(631361, uuid.UUID('2ec35fc4-6797-3324-b775-9a3df3d4723a'), u'The Masterworks', 73502, 1, u'', 0, + datetime.datetime(2012, 5, 15, 19, 1, 58, 718541, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (617137, uuid.UUID('c834f5ee-d362-3da7-966b-8915a86e808c'), u'1981-08-29: Tangerine Tree Volume 53: Berlin 1981', 1418, 1, u'', 0, + datetime.datetime(2016, 9, 21, 23, 0, 26, 94608, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_group(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group')) + + # language + data = [(120, u'eng', u'eng', u'en', u'English', 2, None)] + + with db.engine.begin() as connection: + db.import_mb_data.write_language(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'language')) + + # release_status + data = [(1, u'Official', None, 1, u'Any release officially sanctioned by the artist and/or their record company. Most releases will fit into this category.', + uuid.UUID('4e304316-386d-3409-af2e-78857eec5cfe'))] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_status(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_status')) + + # release_packaging + data = [(1, u'Jewel Case', None, 0, u'The traditional CD case, made of hard, brittle plastic.', + uuid.UUID('ec27701a-4a22-37f4-bfac-6616e0f9750a')) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_packaging(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_packaging')) + + # script + data = [(28, u'Latn', u'215', u'Latin', 4)] + + with db.engine.begin() as connection: + db.import_mb_data.write_script(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'script')) + + # release + data = [(692283, uuid.UUID('a830f892-6be0-35f0-a392-b91d89d89a94'), u'The Masterworks', 847994, 631361, 1, None, 120, 28, u'5028421923901', u'', 0, -1, + datetime.datetime(2018, 5, 13, 11, 0, 22, 832493, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release')) + + # medium + data = [(1089027, 692283, 20, 1, u'Rinaldo, Part 1', 0, datetime.datetime(2011, 10, 24, 21, 0, 13, 19209, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 21)] + + with db.engine.begin() as connection: + db.import_mb_data.write_medium(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'medium')) + + # track + data = [(11261020, uuid.UUID('e0537cb9-4720-3eb3-a07a-d8a7477519ea'), 11768371, 1089027, 5, u'5', u'Rinaldo, HWV 7: Act I. "Combatti da forte" (Almirena)', + 831440, 203000, 0, datetime.datetime(2013, 7, 13, 11, 0, 38, 285946, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), False)] + + with db.engine.begin() as connection: + db.import_mb_data.write_track(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'track')) + + # track_gid_redirect + data = [(uuid.UUID('67a0d0cd-fd61-328d-80a2-ca888c5fd15c'), 11261020, datetime.datetime(2014, 10, 15, 0, 0, 9, 772435, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] + + with db.engine.begin() as connection: + db.import_mb_data.write_track_gid_redirect(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'track_gid_redirect')) + + def test_load_and_write_musicbrainz_schema_tables(self): # artist_credit From 6ce054dd599eda82c72e1358e18a99a429f56e97 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 3 Aug 2018 13:27:44 +0530 Subject: [PATCH 071/125] Change testing order of release and release_group tables --- db/test/test_import_mb_data.py | 114 ++++++++++++++++----------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/db/test/test_import_mb_data.py b/db/test/test_import_mb_data.py index b6da7d487..1515e8f4a 100644 --- a/db/test/test_import_mb_data.py +++ b/db/test/test_import_mb_data.py @@ -293,7 +293,7 @@ def test_load_and_write_recording_gid_redirect(self): self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'recording_gid_redirect')) - def test_load_and_write_release(self): + def test_load_and_write_release_group(self): # artist_credit data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), @@ -325,48 +325,48 @@ def test_load_and_write_release(self): db.import_mb_data.write_release_group(connection, data) self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group')) - # language - data = [(120, u'eng', u'eng', u'en', u'English', 2, None)] - - with db.engine.begin() as connection: - db.import_mb_data.write_language(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'language')) - # release_status - data = [(1, u'Official', None, 1, u'Any release officially sanctioned by the artist and/or their record company. Most releases will fit into this category.', - uuid.UUID('4e304316-386d-3409-af2e-78857eec5cfe'))] + def test_load_and_write_release_group_gid_redirect(self): + # artist_credit + data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (73502, u'Georg Friedrich Handel', 1, 27041, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (831440, u'George Frideric Handel', 1, 24955, datetime.datetime(2011, 6, 19, 7, 36, 56, 8576, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (847994, u'Handel', 1, 717, datetime.datetime(2011, 8, 11, 19, 43, 1, 279447, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (6747, u'Tampa Red', 1, 1600, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] with db.engine.begin() as connection: - db.import_mb_data.write_release_status(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_status')) + db.import_mb_data.write_artist_credit(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit')) - # release_packaging - data = [(1, u'Jewel Case', None, 0, u'The traditional CD case, made of hard, brittle plastic.', - uuid.UUID('ec27701a-4a22-37f4-bfac-6616e0f9750a')) - ] + # release_group_primary_type + data = [(1, u'Album', None, 1, None, uuid.UUID('f529b476-6e62-324f-b0aa-1f3e33d313fc'))] with db.engine.begin() as connection: - db.import_mb_data.write_release_packaging(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_packaging')) + db.import_mb_data.write_release_group_primary_type(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_primary_type')) - # script - data = [(28, u'Latn', u'215', u'Latin', 4)] + # release_group + data = [(631361, uuid.UUID('2ec35fc4-6797-3324-b775-9a3df3d4723a'), u'The Masterworks', 73502, 1, u'', 0, + datetime.datetime(2012, 5, 15, 19, 1, 58, 718541, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), + (617137, uuid.UUID('c834f5ee-d362-3da7-966b-8915a86e808c'), u'1981-08-29: Tangerine Tree Volume 53: Berlin 1981', 1418, 1, u'', 0, + datetime.datetime(2016, 9, 21, 23, 0, 26, 94608, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] with db.engine.begin() as connection: - db.import_mb_data.write_script(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'script')) + db.import_mb_data.write_release_group(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group')) - # release - data = [(692283, uuid.UUID('a830f892-6be0-35f0-a392-b91d89d89a94'), u'The Masterworks', 847994, 631361, 1, None, 120, 28, u'5028421923901', u'', 0, -1, - datetime.datetime(2018, 5, 13, 11, 0, 22, 832493, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) - ] + # release_group_gid_redirect + data = [(uuid.UUID('21f0a3e8-c37b-33a1-b769-daf16e4e252e'), 617137, datetime.datetime(2011, 5, 16, 14, 57, 6, 530063, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] with db.engine.begin() as connection: - db.import_mb_data.write_release(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release')) + db.import_mb_data.write_release_group_gid_redirect(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_gid_redirect')) - def test_load_and_write_release_gid_redirect(self): + def test_load_and_write_release(self): # artist_credit data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), @@ -439,7 +439,7 @@ def test_load_and_write_release_gid_redirect(self): self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release')) - def test_load_and_write_release_group(self): + def test_load_and_write_release_gid_redirect(self): # artist_credit data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), @@ -471,45 +471,45 @@ def test_load_and_write_release_group(self): db.import_mb_data.write_release_group(connection, data) self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group')) - - def test_load_and_write_release_group_gid_redirect(self): - # artist_credit - data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), - (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), - (73502, u'Georg Friedrich Handel', 1, 27041, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), - (831440, u'George Frideric Handel', 1, 24955, datetime.datetime(2011, 6, 19, 7, 36, 56, 8576, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), - (847994, u'Handel', 1, 717, datetime.datetime(2011, 8, 11, 19, 43, 1, 279447, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), - (6747, u'Tampa Red', 1, 1600, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) - ] + # language + data = [(120, u'eng', u'eng', u'en', u'English', 2, None)] with db.engine.begin() as connection: - db.import_mb_data.write_artist_credit(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit')) + db.import_mb_data.write_language(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'language')) - # release_group_primary_type - data = [(1, u'Album', None, 1, None, uuid.UUID('f529b476-6e62-324f-b0aa-1f3e33d313fc'))] + # release_status + data = [(1, u'Official', None, 1, u'Any release officially sanctioned by the artist and/or their record company. Most releases will fit into this category.', + uuid.UUID('4e304316-386d-3409-af2e-78857eec5cfe'))] with db.engine.begin() as connection: - db.import_mb_data.write_release_group_primary_type(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_primary_type')) + db.import_mb_data.write_release_status(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_status')) - # release_group - data = [(631361, uuid.UUID('2ec35fc4-6797-3324-b775-9a3df3d4723a'), u'The Masterworks', 73502, 1, u'', 0, - datetime.datetime(2012, 5, 15, 19, 1, 58, 718541, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), - (617137, uuid.UUID('c834f5ee-d362-3da7-966b-8915a86e808c'), u'1981-08-29: Tangerine Tree Volume 53: Berlin 1981', 1418, 1, u'', 0, - datetime.datetime(2016, 9, 21, 23, 0, 26, 94608, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + # release_packaging + data = [(1, u'Jewel Case', None, 0, u'The traditional CD case, made of hard, brittle plastic.', + uuid.UUID('ec27701a-4a22-37f4-bfac-6616e0f9750a')) ] with db.engine.begin() as connection: - db.import_mb_data.write_release_group(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group')) + db.import_mb_data.write_release_packaging(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_packaging')) - # release_group_gid_redirect - data = [(uuid.UUID('21f0a3e8-c37b-33a1-b769-daf16e4e252e'), 617137, datetime.datetime(2011, 5, 16, 14, 57, 6, 530063, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] + # script + data = [(28, u'Latn', u'215', u'Latin', 4)] with db.engine.begin() as connection: - db.import_mb_data.write_release_group_gid_redirect(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_gid_redirect')) + db.import_mb_data.write_script(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'script')) + + # release + data = [(692283, uuid.UUID('a830f892-6be0-35f0-a392-b91d89d89a94'), u'The Masterworks', 847994, 631361, 1, None, 120, 28, u'5028421923901', u'', 0, -1, + datetime.datetime(2018, 5, 13, 11, 0, 22, 832493, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release')) def test_load_and_write_medium(self): From 57a705c533a1966029e0be6321989a974c76b33a Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 3 Aug 2018 13:29:14 +0530 Subject: [PATCH 072/125] Remove a big function for load and write as it is broken into modules already --- db/test/test_import_mb_data.py | 196 --------------------------------- 1 file changed, 196 deletions(-) diff --git a/db/test/test_import_mb_data.py b/db/test/test_import_mb_data.py index 1515e8f4a..74003a3f6 100644 --- a/db/test/test_import_mb_data.py +++ b/db/test/test_import_mb_data.py @@ -815,202 +815,6 @@ def test_load_and_write_track_gid_redirect(self): self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'track_gid_redirect')) - def test_load_and_write_musicbrainz_schema_tables(self): - - # artist_credit - data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), - (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), - (73502, u'Georg Friedrich Handel', 1, 27041, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), - (831440, u'George Frideric Handel', 1, 24955, datetime.datetime(2011, 6, 19, 7, 36, 56, 8576, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), - (847994, u'Handel', 1, 717, datetime.datetime(2011, 8, 11, 19, 43, 1, 279447, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), - (6747, u'Tampa Red', 1, 1600, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) - ] - - with db.engine.begin() as connection: - db.import_mb_data.write_artist_credit(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit')) - - # area_type - data = [(1, u'Country', None, 1, u'Country is used for areas included (or previously included) in ISO 3166-1, e.g. United States.', uuid.UUID('06dd0ae4-8c74-30bb-b43d-95dcedf961de')), - (3, u'City', None, 3, u'City is used for settlements of any size, including towns and villages.', uuid.UUID('6fd8f29a-3d0a-32fc-980d-ea697b69da78')) - ] - - with db.engine.begin() as connection: - db.import_mb_data.write_area_type(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'area_type')) - - # area - data = [(24482, uuid.UUID('915a5576-b30c-4160-93cd-e1185cebb6ac'), u'Smithville', 3, 0, - datetime.datetime(2013, 11, 14, 1, 33, 0, 377353, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u''), - (222, uuid.UUID('489ce91b-6658-3307-9877-795b68554c98'), u'United States', 1, 0, - datetime.datetime(2013, 6, 15, 18, 6, 39, 593230, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u''), - (5099, uuid.UUID('29a709d8-0320-493e-8d0c-f2c386662b7f'), u'Chicago', 3, 0, - datetime.datetime(2013, 5, 24, 20, 27, 13, 405462, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), None, None, None, None, None, None, False, u'') - ] - - with db.engine.begin() as connection: - db.import_mb_data.write_area(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'area')) - - # artist_type - data = [(1, u'Person', None, 1, None, uuid.UUID('b6e035f4-3ce9-331c-97df-83397230b0df')), - (2, u'Group', None, 2, None, uuid.UUID('e431f5f6-b5d2-343d-8b36-72607fffb74b')) - ] - - with db.engine.begin() as connection: - db.import_mb_data.write_artist_type(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_type')) - - # gender - data = [(1, u'Male', None, 1, None, uuid.UUID('36d3d30a-839d-3eda-8cb3-29be4384e4a9')), - (2, u'Female', None, 2, None, uuid.UUID('93452b5a-a947-30c8-934f-6a4056b151c2')) - ] - - with db.engine.begin() as connection: - db.import_mb_data.write_gender(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'gender')) - - # artist - data = [(6747, uuid.UUID('1b62df85-00d2-464f-81bc-a5c0cdcad278'), u'Tampa Red', u'Tampa Red', 1904, 1, 8, 1981, 3, 19, 1, 222, 1, u'', 0, - datetime.datetime(2016, 8, 21, 5, 0, 58, 662928, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), True, 24482, 5099) - ] - - with db.engine.begin() as connection: - db.import_mb_data.write_artist(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist')) - - # artist_gid_redirect - data = [(uuid.UUID('6873559d-8cb9-494d-9f78-4c1eeab1f851'), 6747, datetime.datetime(2016, 3, 13, 23, 0, 21, 981437, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] - - with db.engine.begin() as connection: - db.import_mb_data.write_artist_gid_redirect(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_gid_redirect')) - - # artist_credit_name - data = [(6747, 0, 6747, u'Tampa Red', u'')] - - with db.engine.begin() as connection: - db.import_mb_data.write_artist_credit_name(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'artist_credit_name')) - - # recording - data = [(11768371, uuid.UUID('d51cf7fb-97e1-4070-a40b-b03707f91c92'), u'(Rinaldo, HWV 7: Act I. "Combatti da forte" (Almirena)', 73502, 203000, u'', 0, None, False), - (8598260, uuid.UUID('9086b742-358b-4f73-9a14-84cb1a9ce4ce'), u'Love Story', 399541, 235000, u'', 0, None, False) - ] - - with db.engine.begin() as connection: - db.import_mb_data.write_recording(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'recording')) - - # recording_gid_redirect - data = [(uuid.UUID('05e1ab2e-f54f-464b-a1fd-fcc6bceaaa20'), 8598260, datetime.datetime(2011, 5, 16, 16, 8, 20, 288158, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] - - with db.engine.begin() as connection: - db.import_mb_data.write_recording_gid_redirect(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'recording_gid_redirect')) - - # release_group_primary_type - data = [(1, u'Album', None, 1, None, uuid.UUID('f529b476-6e62-324f-b0aa-1f3e33d313fc'))] - - with db.engine.begin() as connection: - db.import_mb_data.write_release_group_primary_type(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_primary_type')) - - # release_group - data = [(631361, uuid.UUID('2ec35fc4-6797-3324-b775-9a3df3d4723a'), u'The Masterworks', 73502, 1, u'', 0, - datetime.datetime(2012, 5, 15, 19, 1, 58, 718541, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), - (617137, uuid.UUID('c834f5ee-d362-3da7-966b-8915a86e808c'), u'1981-08-29: Tangerine Tree Volume 53: Berlin 1981', 1418, 1, u'', 0, - datetime.datetime(2016, 9, 21, 23, 0, 26, 94608, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) - ] - - with db.engine.begin() as connection: - db.import_mb_data.write_release_group(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group')) - - # release_group_gid_redirect - data = [(uuid.UUID('21f0a3e8-c37b-33a1-b769-daf16e4e252e'), 617137, datetime.datetime(2011, 5, 16, 14, 57, 6, 530063, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] - - with db.engine.begin() as connection: - db.import_mb_data.write_release_group_gid_redirect(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_group_gid_redirect')) - - # language - data = [(120, u'eng', u'eng', u'en', u'English', 2, None)] - - with db.engine.begin() as connection: - db.import_mb_data.write_language(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'language')) - - # release_status - data = [(1, u'Official', None, 1, u'Any release officially sanctioned by the artist and/or their record company. Most releases will fit into this category.', - uuid.UUID('4e304316-386d-3409-af2e-78857eec5cfe'))] - - with db.engine.begin() as connection: - db.import_mb_data.write_release_status(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_status')) - - # release_packaging - data = [(1, u'Jewel Case', None, 0, u'The traditional CD case, made of hard, brittle plastic.', - uuid.UUID('ec27701a-4a22-37f4-bfac-6616e0f9750a')) - ] - - with db.engine.begin() as connection: - db.import_mb_data.write_release_packaging(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_packaging')) - - # script - data = [(28, u'Latn', u'215', u'Latin', 4)] - - with db.engine.begin() as connection: - db.import_mb_data.write_script(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'script')) - - # release - data = [(692283, uuid.UUID('a830f892-6be0-35f0-a392-b91d89d89a94'), u'The Masterworks', 847994, 631361, 1, None, 120, 28, u'5028421923901', u'', 0, -1, - datetime.datetime(2018, 5, 13, 11, 0, 22, 832493, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) - ] - - with db.engine.begin() as connection: - db.import_mb_data.write_release(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release')) - - # release_gid_redirect - data = [(uuid.UUID('03c44c5d-cbe5-32b2-af20-376a30fd98a0'), 692283, datetime.datetime(2011, 5, 16, 15, 59, 0, 785958, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] - - with db.engine.begin() as connection: - db.import_mb_data.write_release_gid_redirect(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_gid_redirect')) - - # medium_format - data = [(1, u'CD', None, 0, 1982, True, None, uuid.UUID('9712d52a-4509-3d4b-a1a2-67c88c643e31'))] - - with db.engine.begin() as connection: - db.import_mb_data.write_medium_format(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'medium_format')) - - # medium - data = [(1089027, 692283, 20, 1, u'Rinaldo, Part 1', 0, datetime.datetime(2011, 10, 24, 21, 0, 13, 19209, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 21)] - - with db.engine.begin() as connection: - db.import_mb_data.write_medium(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'medium')) - - # track - data = [(11261020, uuid.UUID('e0537cb9-4720-3eb3-a07a-d8a7477519ea'), 11768371, 1089027, 5, u'5', u'Rinaldo, HWV 7: Act I. "Combatti da forte" (Almirena)', - 831440, 203000, 0, datetime.datetime(2013, 7, 13, 11, 0, 38, 285946, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), False)] - - with db.engine.begin() as connection: - db.import_mb_data.write_track(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'track')) - - # track_gid_redirect - data = [(uuid.UUID('67a0d0cd-fd61-328d-80a2-ca888c5fd15c'), 11261020, datetime.datetime(2014, 10, 15, 0, 0, 9, 772435, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)))] - - with db.engine.begin() as connection: - db.import_mb_data.write_track_gid_redirect(connection, data) - self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'track_gid_redirect')) - - # def test_write_artist_type(self): # def test_write_area_type(self): From d125f89486e986a1f96037a67a02fc2dfffee144 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 3 Aug 2018 13:30:14 +0530 Subject: [PATCH 073/125] Remove unnecessary comments and new lines --- db/test/test_import_mb_data.py | 41 ---------------------------------- 1 file changed, 41 deletions(-) diff --git a/db/test/test_import_mb_data.py b/db/test/test_import_mb_data.py index 74003a3f6..13f76ab01 100644 --- a/db/test/test_import_mb_data.py +++ b/db/test/test_import_mb_data.py @@ -813,44 +813,3 @@ def test_load_and_write_track_gid_redirect(self): with db.engine.begin() as connection: db.import_mb_data.write_track_gid_redirect(connection, data) self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'track_gid_redirect')) - - - # def test_write_artist_type(self): - - # def test_write_area_type(self): - - # def test_write_release_status(self): - - # def test_write_release_group_primary_type(self): - - # def test_write_medium_format(self): - - # def test_write_release_packaging(self): - - # def test_write_language(self): - - # def test_write_script(self): - - # def test_write_gender(self): - - # def test_write_area_type_and_area(self): - - # def test_write_artist_type_and_artist_and_artist_gid_redirect(self): - - # def test_write_artist_credit_name(self): - - # def test_write_artist_gid_redirect(self): - - # def test_write_recording(sel - - # def test_write_release(self): - - # def test_write_release_gid_redirect(self): - - # def test_write_release_group_gid_redirect(self): - - # def test_write_track(self): - - # def test_write_track_gid_redirect(self): - - # def test_write_medium(self): From 51b827f21af721acd65adde6864cef4f313d1129 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 3 Aug 2018 22:24:31 +0530 Subject: [PATCH 074/125] Add docstrings for all the test functions --- db/test/test_import_mb_data.py | 51 ++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/db/test/test_import_mb_data.py b/db/test/test_import_mb_data.py index 13f76ab01..993c44425 100644 --- a/db/test/test_import_mb_data.py +++ b/db/test/test_import_mb_data.py @@ -17,6 +17,7 @@ def setUp(self): def test_load_and_write_area(self): + """Writing and loading data for area table using values from referenced area_type table""" # area_type data = [(1, u'Country', None, 1, u'Country is used for areas included (or previously included) in ISO 3166-1, e.g. United States.', uuid.UUID('06dd0ae4-8c74-30bb-b43d-95dcedf961de')), @@ -42,6 +43,9 @@ def test_load_and_write_area(self): def test_load_and_write_artist(self): + """Writing and loading data for artist table using values from referenced area_type, area, + artist_type & gender tables. + """ # area_type data = [(1, u'Country', None, 1, u'Country is used for areas included (or previously included) in ISO 3166-1, e.g. United States.', uuid.UUID('06dd0ae4-8c74-30bb-b43d-95dcedf961de')), @@ -94,6 +98,10 @@ def test_load_and_write_artist(self): def test_load_and_write_artist_gid_redirect(self): + """Writing and loading data for artist_gid_redirect table using values from referenced area_type, + area, artist_type, gender & artist tables. + """ + # area_type data = [(1, u'Country', None, 1, u'Country is used for areas included (or previously included) in ISO 3166-1, e.g. United States.', uuid.UUID('06dd0ae4-8c74-30bb-b43d-95dcedf961de')), (3, u'City', None, 3, u'City is used for settlements of any size, including towns and villages.', uuid.UUID('6fd8f29a-3d0a-32fc-980d-ea697b69da78')) @@ -152,6 +160,8 @@ def test_load_and_write_artist_gid_redirect(self): def test_load_and_write_artist_credit(self): + """Writing and loading data for artist_credit table.""" + # artist_credit data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), @@ -167,6 +177,9 @@ def test_load_and_write_artist_credit(self): def test_load_and_write_artist_credit_name(self): + """Writing and loading data for artist_gid_redirect table using values from referenced area_type, + area, artist_type, gender & artist tables. + """ # artist_credit data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), @@ -239,6 +252,8 @@ def test_load_and_write_artist_credit_name(self): def test_load_and_write_recording(self): + """Writing and loading data for recording table using values from referenced artist_credit table.""" + # artist_credit data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), @@ -263,6 +278,10 @@ def test_load_and_write_recording(self): def test_load_and_write_recording_gid_redirect(self): + """Writing and loading data for recording_gid_redirect table using values from referenced\ + artist_credit & recording tables. + """ + # artist_credit data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), @@ -294,6 +313,10 @@ def test_load_and_write_recording_gid_redirect(self): def test_load_and_write_release_group(self): + """Writing and loading data for release_group table using values from referenced artist_credit + & release_group_primary_type tables. + """ + # artist_credit data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), @@ -327,6 +350,10 @@ def test_load_and_write_release_group(self): def test_load_and_write_release_group_gid_redirect(self): + """Writing and loading data for release_group_gid_redirect table using values from referenced artist_credit, + release_group_primary_type & release_group tables. + """ + # artist_credit data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), @@ -367,6 +394,11 @@ def test_load_and_write_release_group_gid_redirect(self): def test_load_and_write_release(self): + """Writing and loading data for release table using values from referenced artist_credit, + release_group_primary_type, release_group, language, script, release_status & release_packaging + tables. + """ + # artist_credit data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), @@ -440,6 +472,11 @@ def test_load_and_write_release(self): def test_load_and_write_release_gid_redirect(self): + """Writing and loading data for release_gid_redirect table using values from referenced artist_credit, + release_group_primary_type, release_group, language, release_status, release_packaging, script + & release tables. + """ + # artist_credit data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), @@ -513,6 +550,11 @@ def test_load_and_write_release_gid_redirect(self): def test_load_and_write_medium(self): + """Writing and loading data for medium table using values from referenced medium_format, + artist_credit, release_group_primary_type, release_group, language, release_status, release_packaging, + script & release tables. + """ + # medium_format data = [(1, u'CD', None, 0, 1982, True, None, uuid.UUID('9712d52a-4509-3d4b-a1a2-67c88c643e31'))] @@ -600,6 +642,10 @@ def test_load_and_write_medium(self): def test_and_write_track(self): + """Writing and loading data for track table using values from referenced artist_credit, + recording, medium_format, release_group_primary_type, release_group, language, release_status, + release_packaging, script, release & medium tables. + """ # artist_credit data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), @@ -705,6 +751,11 @@ def test_and_write_track(self): def test_load_and_write_track_gid_redirect(self): + """Writing and loading data for track_gid_redirect table using values from referenced artist_credit, + recording, medium_format, release_group_primary_type, release_group, language, release_status, + release_packaging, script, release, medium & track tables. + """ + # artist_credit data = [(1418, u'Tangerine Dream', 1, 13729, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), (399541, u'Taylor Swift', 1, 3139, datetime.datetime(2011, 5, 16, 16, 32, 11, 963929, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))), From 7e7bda12d9a4d7c2e2dad2e9ef1e12be1958c4af Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 3 Aug 2018 22:26:24 +0530 Subject: [PATCH 075/125] Add release_gid_redirect fetch and write code, missed in the previous commits --- db/test/test_import_mb_data.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/db/test/test_import_mb_data.py b/db/test/test_import_mb_data.py index 993c44425..283e68b5b 100644 --- a/db/test/test_import_mb_data.py +++ b/db/test/test_import_mb_data.py @@ -470,6 +470,15 @@ def test_load_and_write_release(self): db.import_mb_data.write_release(connection, data) self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release')) + # release_gid_redirect + data = [(uuid.UUID('03c44c5d-cbe5-32b2-af20-376a30fd98a0'), 692283, datetime.datetime(2011, 5, 16, 15, 59, 0, 785958, + tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))) + ] + + with db.engine.begin() as connection: + db.import_mb_data.write_release_gid_redirect(connection, data) + self.assertEqual(data, db.import_mb_data.load_musicbrainz_schema_data(connection, 'release_gid_redirect')) + def test_load_and_write_release_gid_redirect(self): """Writing and loading data for release_gid_redirect table using values from referenced artist_credit, From 5c64ffa8d5051bdfd22807cba154f7712b4bf64e Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 29 Jul 2018 03:34:23 +0530 Subject: [PATCH 076/125] redirect mbids to their original entities and returns recordings ids along --- manage.py | 8 +++++ webserver/external/get_entities.py | 39 ++++++++++++++++++++++++ webserver/external/mbid_redirects.py | 44 ++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) create mode 100644 webserver/external/get_entities.py create mode 100644 webserver/external/mbid_redirects.py diff --git a/manage.py b/manage.py index 2f26655eb..3b4a6b773 100644 --- a/manage.py +++ b/manage.py @@ -20,6 +20,8 @@ from brainzutils import musicbrainz_db from db.testing import DatabaseTestCase +import webserver.external.get_entities + ADMIN_SQL_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'admin', 'sql') cli = FlaskGroup(add_default_commands=False, create_app=webserver.create_app_flaskgroup) @@ -193,6 +195,12 @@ def import_musicbrainz_db(): print("\nImporting MusicBrainz data...") db.import_mb_data.start_import() + +@cli.command() +def get_entities(): + print('Redirecting mbids to original entities...') + webserver.external.get_entities.get_mbids_from_gid_redirect_tables() + # Please keep additional sets of commands down there cli.add_command(db.dump_manage.cli, name="dump") diff --git a/webserver/external/get_entities.py b/webserver/external/get_entities.py new file mode 100644 index 000000000..f185145b5 --- /dev/null +++ b/webserver/external/get_entities.py @@ -0,0 +1,39 @@ +import db +from sqlalchemy import text +from brainzutils.musicbrainz_db import mb_session +from webserver.external.mbid_redirects import get_entities_by_gids +from mbdata.models import Recording + + +def get_original_entity(mbids): + with mb_session() as db: + query = db.query(Recording) + + recordings = get_entities_by_gids( + query=query, + entity_type='recording', + mbids=mbids, + ) + + recording_ids = [recording.id for recording in recordings.values()] + recording_gids = [key for key in recordings] + + gids_with_redirected_ids = dict(zip(recording_gids, recording_ids)) + + return gids_with_redirected_ids + + +def get_mbids_from_gid_redirect_tables(): + with db.engine.begin() as connection: + query = text(""" + SELECT gid + FROM musicbrainz.recording_gid_redirect + """) + result = connection.execute(query) + mbids = result.fetchall() + + recording_mbids = [] + for mbid in mbids: + recording_mbids.append(str(mbid[0])) + + gids_with_redirected_ids = get_original_entity(recording_mbids) diff --git a/webserver/external/mbid_redirects.py b/webserver/external/mbid_redirects.py new file mode 100644 index 000000000..a17e8b850 --- /dev/null +++ b/webserver/external/mbid_redirects.py @@ -0,0 +1,44 @@ +import brainzutils.musicbrainz_db.exceptions as mb_exceptions +from mbdata import models + + +# Entity models +ENTITY_MODELS = { + 'recording': models.Recording, +} + +# Redirect models +REDIRECT_MODELS = { + 'recording': models.RecordingGIDRedirect, +} + + +def get_entities_by_gids(query, entity_type, mbids): + """Get entities using their MBIDs. + An entity can have multiple MBIDs. This function may be passed another + MBID of an entity, in which case, it is redirected to the original entity. + Note that the query may be modified before passing it to this + function in order to save queries made to the database. + Args: + query (Query): SQLAlchemy Query object. + entity_type (str): Type of entity being queried. + mbids (list): IDs of the target entities. + Returns: + Dictionary of objects of target entities keyed by their MBID. + """ + entity_model = ENTITY_MODELS[entity_type] + results = query.filter(entity_model.gid.in_(mbids)).all() + remaining_gids = list(set(mbids) - {entity.gid for entity in results}) + entities = {str(entity.gid): entity for entity in results} + if remaining_gids: + redirect_model = REDIRECT_MODELS[entity_type] + query = query.add_entity(redirect_model).join(redirect_model) + results = query.filter(redirect_model.gid.in_(remaining_gids)) + for entity, redirect_obj in results: + entities[redirect_obj.gid] = entity + remaining_gids = list(set(remaining_gids) - {redirect_obj.gid for entity, redirect_obj in results}) + + if remaining_gids: + raise mb_exceptions.NoDataFoundException("Couldn't find entities with IDs: {mbids}".format(mbids=remaining_gids)) + + return entities From 4532eb1957c5bc47345ce7c8ce020a1ed046e372 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 29 Jul 2018 03:55:59 +0530 Subject: [PATCH 077/125] Add docstrings to the function definitions in get_entities.py script --- webserver/external/get_entities.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/webserver/external/get_entities.py b/webserver/external/get_entities.py index f185145b5..d78194b18 100644 --- a/webserver/external/get_entities.py +++ b/webserver/external/get_entities.py @@ -6,6 +6,16 @@ def get_original_entity(mbids): + """Get original entity information after applying MBID redirect + to many mbids. + + Args: + mbids (list): list of uuid (MBID(gid)) of the recordings. + Returns: + Dictionary containing the redirected original entity ids with MBIDs as keys. + - mbid: Recording mbids of the entities + - id: Original redirected ids of the entities after mbid redirect + """ with mb_session() as db: query = db.query(Recording) @@ -24,6 +34,14 @@ def get_original_entity(mbids): def get_mbids_from_gid_redirect_tables(): + """Fetch mbids from recording gid redirect table and calls function + get_original_entity to get the redirected result. + + Returns: + Dictionary containing the redirected original entity ids with MBIDs as keys. + - mbid: Recording mbids of the entities + - id: Original redirected ids of the entities after mbid redirect + """ with db.engine.begin() as connection: query = text(""" SELECT gid From 82708fe10ed73d17fd8c461d990a13c2be86534c Mon Sep 17 00:00:00 2001 From: rsh7 Date: Thu, 2 Aug 2018 16:11:45 +0530 Subject: [PATCH 078/125] Upgrade to recent BrainzUtils version and import utils.py in the script --- requirements.txt | 2 +- webserver/external/get_entities.py | 2 +- webserver/external/mbid_redirects.py | 44 ---------------------------- 3 files changed, 2 insertions(+), 46 deletions(-) delete mode 100644 webserver/external/mbid_redirects.py diff --git a/requirements.txt b/requirements.txt index e4883704f..563869aee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/metabrainz/brainzutils-python.git@v1.5.0 +git+https://github.com/metabrainz/brainzutils-python.git@v1.6.0 click == 6.7 coverage == 4.5.1 Fabric == 1.14.0 diff --git a/webserver/external/get_entities.py b/webserver/external/get_entities.py index d78194b18..63f363cad 100644 --- a/webserver/external/get_entities.py +++ b/webserver/external/get_entities.py @@ -1,7 +1,7 @@ import db from sqlalchemy import text from brainzutils.musicbrainz_db import mb_session -from webserver.external.mbid_redirects import get_entities_by_gids +from brainzutils.musicbrainz_db.utils import get_entities_by_gids from mbdata.models import Recording diff --git a/webserver/external/mbid_redirects.py b/webserver/external/mbid_redirects.py deleted file mode 100644 index a17e8b850..000000000 --- a/webserver/external/mbid_redirects.py +++ /dev/null @@ -1,44 +0,0 @@ -import brainzutils.musicbrainz_db.exceptions as mb_exceptions -from mbdata import models - - -# Entity models -ENTITY_MODELS = { - 'recording': models.Recording, -} - -# Redirect models -REDIRECT_MODELS = { - 'recording': models.RecordingGIDRedirect, -} - - -def get_entities_by_gids(query, entity_type, mbids): - """Get entities using their MBIDs. - An entity can have multiple MBIDs. This function may be passed another - MBID of an entity, in which case, it is redirected to the original entity. - Note that the query may be modified before passing it to this - function in order to save queries made to the database. - Args: - query (Query): SQLAlchemy Query object. - entity_type (str): Type of entity being queried. - mbids (list): IDs of the target entities. - Returns: - Dictionary of objects of target entities keyed by their MBID. - """ - entity_model = ENTITY_MODELS[entity_type] - results = query.filter(entity_model.gid.in_(mbids)).all() - remaining_gids = list(set(mbids) - {entity.gid for entity in results}) - entities = {str(entity.gid): entity for entity in results} - if remaining_gids: - redirect_model = REDIRECT_MODELS[entity_type] - query = query.add_entity(redirect_model).join(redirect_model) - results = query.filter(redirect_model.gid.in_(remaining_gids)) - for entity, redirect_obj in results: - entities[redirect_obj.gid] = entity - remaining_gids = list(set(remaining_gids) - {redirect_obj.gid for entity, redirect_obj in results}) - - if remaining_gids: - raise mb_exceptions.NoDataFoundException("Couldn't find entities with IDs: {mbids}".format(mbids=remaining_gids)) - - return entities From 96d5f1be85b93b350bd0a872f9e76991d637b20e Mon Sep 17 00:00:00 2001 From: rsh7 Date: Mon, 6 Aug 2018 21:47:33 +0530 Subject: [PATCH 079/125] Add a return statement to return gids_with_redirect_ids dict --- webserver/external/get_entities.py | 1 + 1 file changed, 1 insertion(+) diff --git a/webserver/external/get_entities.py b/webserver/external/get_entities.py index 63f363cad..a96fae56b 100644 --- a/webserver/external/get_entities.py +++ b/webserver/external/get_entities.py @@ -55,3 +55,4 @@ def get_mbids_from_gid_redirect_tables(): recording_mbids.append(str(mbid[0])) gids_with_redirected_ids = get_original_entity(recording_mbids) + return gids_with_redirected_ids From 5e215843ee984bb3f3c17b2d92049861b8d84ed5 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Mon, 6 Aug 2018 22:56:28 +0530 Subject: [PATCH 080/125] Move the query to the db module in data.py file --- db/data.py | 23 +++++++++++++++++++++++ manage.py | 2 +- webserver/external/get_entities.py | 29 +++-------------------------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/db/data.py b/db/data.py index b2a80e28c..d45477f64 100644 --- a/db/data.py +++ b/db/data.py @@ -544,3 +544,26 @@ def get_new_recordings_from_lowlevel(): gids_in_AB = [value[0] for value in gids] return gids_in_AB + + +def get_mbids_from_gid_redirect_tables(): + """Fetch mbids from recording gid redirect table and calls function + get_original_entity to get the redirected result. + + Returns: + Dictionary containing the redirected original entity ids with MBIDs as keys. + - mbid: Recording mbids of the entities + - id: Original redirected ids of the entities after mbid redirect + """ + with db.engine.begin() as connection: + query = text(""" + SELECT gid + FROM musicbrainz.recording_gid_redirect + """) + result = connection.execute(query) + mbids = result.fetchall() + + recording_mbids = [] + for mbid in mbids: + recording_mbids.append(str(mbid[0])) + return recording_mbids diff --git a/manage.py b/manage.py index 3b4a6b773..771bd7ed8 100644 --- a/manage.py +++ b/manage.py @@ -199,7 +199,7 @@ def import_musicbrainz_db(): @cli.command() def get_entities(): print('Redirecting mbids to original entities...') - webserver.external.get_entities.get_mbids_from_gid_redirect_tables() + webserver.external.get_entities.get_original_entity() # Please keep additional sets of commands down there cli.add_command(db.dump_manage.cli, name="dump") diff --git a/webserver/external/get_entities.py b/webserver/external/get_entities.py index a96fae56b..9b81ffdd7 100644 --- a/webserver/external/get_entities.py +++ b/webserver/external/get_entities.py @@ -1,11 +1,12 @@ import db +import db.data from sqlalchemy import text from brainzutils.musicbrainz_db import mb_session from brainzutils.musicbrainz_db.utils import get_entities_by_gids from mbdata.models import Recording -def get_original_entity(mbids): +def get_original_entity(): """Get original entity information after applying MBID redirect to many mbids. @@ -16,6 +17,7 @@ def get_original_entity(mbids): - mbid: Recording mbids of the entities - id: Original redirected ids of the entities after mbid redirect """ + mbids = db.data.get_mbids_from_gid_redirect_tables() with mb_session() as db: query = db.query(Recording) @@ -30,29 +32,4 @@ def get_original_entity(mbids): gids_with_redirected_ids = dict(zip(recording_gids, recording_ids)) - return gids_with_redirected_ids - - -def get_mbids_from_gid_redirect_tables(): - """Fetch mbids from recording gid redirect table and calls function - get_original_entity to get the redirected result. - - Returns: - Dictionary containing the redirected original entity ids with MBIDs as keys. - - mbid: Recording mbids of the entities - - id: Original redirected ids of the entities after mbid redirect - """ - with db.engine.begin() as connection: - query = text(""" - SELECT gid - FROM musicbrainz.recording_gid_redirect - """) - result = connection.execute(query) - mbids = result.fetchall() - - recording_mbids = [] - for mbid in mbids: - recording_mbids.append(str(mbid[0])) - - gids_with_redirected_ids = get_original_entity(recording_mbids) return gids_with_redirected_ids From bf133c0cb35fc83ac09ce6013c5260efcf7776c8 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Mon, 6 Aug 2018 22:58:51 +0530 Subject: [PATCH 081/125] Rename musicbrainz db variable to mb_db to avoid confusion --- webserver/external/get_entities.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webserver/external/get_entities.py b/webserver/external/get_entities.py index 9b81ffdd7..e9ccb4bfc 100644 --- a/webserver/external/get_entities.py +++ b/webserver/external/get_entities.py @@ -18,8 +18,8 @@ def get_original_entity(): - id: Original redirected ids of the entities after mbid redirect """ mbids = db.data.get_mbids_from_gid_redirect_tables() - with mb_session() as db: - query = db.query(Recording) + with mb_session() as mb_db: + query = mb_db.query(Recording) recordings = get_entities_by_gids( query=query, From b11e2a7c5f535e57267275a08d5f6a7c269d2ae2 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Thu, 5 Jul 2018 23:59:57 +0530 Subject: [PATCH 082/125] Add replication_control table, config variables & call statement --- admin/sql/create_musicbrainz_tables.sql | 6 ++++++ config.py.example | 6 ++++++ manage.py | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/admin/sql/create_musicbrainz_tables.sql b/admin/sql/create_musicbrainz_tables.sql index fdf187422..706b338db 100644 --- a/admin/sql/create_musicbrainz_tables.sql +++ b/admin/sql/create_musicbrainz_tables.sql @@ -266,4 +266,10 @@ CREATE TABLE musicbrainz.artist_type ( gid uuid NOT NULL ); +CREATE TABLE musicbrainz.replication_control ( + id SERIAL, + current_replication_sequence INTEGER, + last_replication_date TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + COMMIT; diff --git a/config.py.example b/config.py.example index ec0e6ec91..5138c0234 100644 --- a/config.py.example +++ b/config.py.example @@ -71,3 +71,9 @@ RECORDINGS_FETCHED_PER_BATCH = 10000 # between every 2 batches SLEEP_DURATION = 30 # number of seconds to wait between runs BATCH_SLEEP_DURATION = 5 # number of seconds to wait between batches + +# Base url to download the replication packets +REPLICATION_PACKETS_URL = "https://metabrainz.org/api/musicbrainz/" + +# Token to access any MetaBrainz API +# ACCESS_TOKEN = "" diff --git a/manage.py b/manage.py index 771bd7ed8..4236998af 100644 --- a/manage.py +++ b/manage.py @@ -19,6 +19,7 @@ import webserver from brainzutils import musicbrainz_db from db.testing import DatabaseTestCase +import musicbrainz_importer.apply_replication_changes import webserver.external.get_entities @@ -201,6 +202,11 @@ def get_entities(): print('Redirecting mbids to original entities...') webserver.external.get_entities.get_original_entity() + +def apply_replication_changes(): + print("\nUpdating musicbrainz schema by applying replication packets...") + musicbrainz_importer.apply_replication_changes.main() + # Please keep additional sets of commands down there cli.add_command(db.dump_manage.cli, name="dump") From e0b6be7fd099d5639b6a94c70e740cdb5bc8c714 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sat, 7 Jul 2018 21:37:44 +0530 Subject: [PATCH 083/125] Add a script to apply replication packets in musicbrainz schema --- db/import_mb_data.py | 28 ++ .../apply_replication_changes.py | 264 ++++++++++++++++++ 2 files changed, 292 insertions(+) create mode 100644 musicbrainz_importer/apply_replication_changes.py diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 3b01fb1e4..a78eec24a 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -14,6 +14,34 @@ def load_musicbrainz_schema_data(connection, table_name): result = connection.execute(query) return result.fetchall() +def join_columns(columns): + columns[0] = ':' + columns[0] + return ',:'.join(columns) + +def insert_data_into_musicbrainz_schema(connection, transaction, table_name, columns, values): + trans = connection.begin() + query = text(""" + INSERT INTO musicbrainz.{table_name} ({columns}) + VALUES ({value_str}) + """.format(table_name=table_name, + columns=','.join(columns), + value_str=join_columns(columns))) + + result = connection.execute(query, values) + transaction.commit() + +def get_data_from_musicbrainz(table_name, data, column='id'): + with musicbrainz_db.engine.begin() as connection: + query = text(""" + SELECT * + FROM %s + WHERE %s=%s + """ % (table_name, column, data)) + + result = connection.execute(query) + values = dict(result.fetchone()) + columns = [key for key in values] + return table_name, columns, value def load_artist_credit(connection, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data, artist_credit_from_recording): """Fetch artist_credit table data from MusicBrainz database for the diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py new file mode 100644 index 000000000..03a3a6243 --- /dev/null +++ b/musicbrainz_importer/apply_replication_changes.py @@ -0,0 +1,264 @@ +import tarfile +import os +import re +import urllib2 +import shutil +import tempfile +from flask import current_app +import db +from brainzutils import musicbrainz_db +from sqlalchemy import text +from sqlalchemy.exc import IntegrityError +import db.import_mb_data + +include_tables = ['language', 'artist_credit_name', 'artist', 'artist_gid_redirect', 'area', 'area_type', 'recording_gid_redirect', \ + 'script', 'release_gid_redirect', 'recording', 'track', 'artist_credit', 'release_group_primary_type', 'release_group', \ + 'release_group_gid_redirect', 'release', 'medium', 'medium_format', 'release_status', 'release_packaging', 'gender', \ + 'artist_type'] + +ESCAPES = (('\\b', '\b'), ('\\f', '\f'), ('\\n', '\n'), ('\\r', '\r'), + ('\\t', '\t'), ('\\v', '\v'), ('\\\\', '\\')) + +def parse_name(table): + if '.' in table: + schema, table = table.split('.', 1) + schema = 'musicbrainz' + table = table.strip('"') + return schema, table + + +def parse_data_fields(s): + fields = {} + for name, value in re.findall(r'''"([^"]+)"=('(?:''|[^'])*')? ''', s): + if not value: + value = None + else: + value = value[1:-1].replace("''", "'").replace("\\\\", "\\") + fields[name] = value + return fields + + +def parse_bool(s): + return s == 't' + + +def unescape(s): + if s == '\\N': + return None + for orig, repl in ESCAPES: + s = s.replace(orig, repl) + return s + + +def read_psql_dump(fp, types): + for line in fp: + values = map(unescape, line.rstrip('\r\n').split('\t')) + for i, value in enumerate(values): + if value is not None: + values[i] = types[i](value) + yield values + + +def get_table_and_data(message): + mess = message.split(' ') + word = mess.index('Key') + 1 + column, data = mess[word].split('=') + column, data = column.strip('()'), data.strip('()') + return column, data + + +def insert_new_row(table, data, main_connection, main_transaction, sql, params, todo_list=None): + if todo_list is None: + todo_list = [] + table_name, columns, values = db.import_mb_data.get_data_from_musicbrainz(table, data) + with db.engine.connect() as conn: + trans = conn.begin() + try: + db.import_mb_data.insert_data_into_musicbrainz_schema(conn, trans, table_name, columns, values) + if len(todo_list): + todo_list.remove((table, data)) + table = todo_list[len(todo_list)-1][0] + data = todo_list[len(todo_list)-1][1] + insert_new_row(table, data, main_connection, main_transaction, sql, params, todo_list) + else: + update_row(sql, params, main_connection, main_transaction) + except IntegrityError as e: + trans.rollback() + table, data = get_table_and_data(e.message) + todo_list.append((table, data)) + insert_new_row(table, data, main_connection, main_transaction, sql, params, todo_list) + + +def update_row(sql, params, main_connection, main_transaction): + try: + main_connection.execute(sql, params) + main_transaction.commit() + except IntegrityError as e: + main_transaction.rollback() + table, data = get_table_and_data(e.message) + insert_new_row(table, data, main_connection, main_transaction, sql, params) + + +class PacketImporter(object): + + def __init__(self, replication_seq): + self._data = {} + self._transactions = {} + self._replication_seq = replication_seq + + def load_pending_data(self, fp): + dump = read_psql_dump(fp, [int, parse_bool, parse_data_fields]) + for id, key, values in dump: + self._data[(id, key)] = values + + def load_pending(self, fp): + dump = read_psql_dump(fp, [int, str, str, int]) + for id, table, type, xid in dump: + schema, table = parse_name(table) + transaction = self._transactions.setdefault(xid, []) + transaction.append((id, schema, table, type)) + + def process(self): + with db.engine.connect() as connection: + stats = {} + for xid in sorted(self._transactions.keys()): + transaction = self._transactions[xid] + print ' - Running transaction', xid + for id, schema, table, type in sorted(transaction): + trans = connection.begin() + if schema == 'musicbrainz' and table in include_tables: + fulltable = '%s.%s' % (schema, table) + if fulltable not in stats: + stats[fulltable] = {'d': 0, 'u': 0} + + if type == 'u' or type == 'd': + stats[fulltable][type] += 1 + keys = self._data.get((id, True), {}) + values = self._data.get((id, False), {}) + + params = [] + if type == 'd': + sql = 'DELETE FROM %s' % (fulltable,) + elif type == 'u': + sql_values = ', '.join('%s=%%s' % i for i in values) + sql = 'UPDATE %s SET %s' % (fulltable, sql_values) + params = values.values() + + if type == 'd' or type == 'u': + sql += ' WHERE ' + ' AND '.join('%s%s%%s' % (value, ' IS ' if keys[value] is None else '=') for value in keys.keys()) + params.extend(keys.values()) + + if type == 'd': + if keys or values: + try: + connection.execute(sql, params) + trans.commit() + print 'Deleted rows from ' + table + ' table' + except IntegrityError as e: + trans.rollback() + if type == 'u': + if keys or values: + update_row(sql, params, connection, trans) + print 'Updated rows in ' + table + ' table' + print 'COMMIT; --', xid + # print ' - Statistics:' + # for table in sorted(stats.keys()): + # print ' * %-30s\t%d\t%d' % (table, stats[table]['u'], stats[table]['d']) + print secsy + + +def process_tar(fileobj, expected_schema_seq, replication_seq): + print "Processing", fileobj.name + tar = tarfile.open(fileobj=fileobj, mode='r:bz2') + importer = PacketImporter(replication_seq) + for member in tar: + if member.name == 'SCHEMA_SEQUENCE': + schema_seq = int(tar.extractfile(member).read().strip()) + if schema_seq != expected_schema_seq: + raise Exception("Mismatched schema sequence, %d (database) vs %d (replication packet)" % (expected_schema_seq, schema_seq)) + elif member.name == 'TIMESTAMP': + ts = tar.extractfile(member).read().strip() + print ' - Packet was produced at', ts + elif member.name in ('mbdump/Pending', 'mbdump/dbmirror_pending'): + importer.load_pending(tar.extractfile(member)) + elif member.name in ('mbdump/PendingData', 'mbdump/dbmirror_pendingdata'): + importer.load_pending_data(tar.extractfile(member)) + importer.process() + + +def download_packet(base_url, token, replication_seq): + url = base_url.rstrip("/") + "/replication-%d.tar.bz2" % replication_seq + if token: + url += '?token=' + token + print "Downloading", url + try: + data = urllib2.urlopen(url, timeout=60) + except urllib2.HTTPError, e: + if e.code == 404: + return None + raise + tmp = tempfile.NamedTemporaryFile(suffix='.tar.bz2') + shutil.copyfileobj(data, tmp) + data.close() + tmp.seek(0) + return tmp + + +def update_replication_sequence(replication_seq): + with db.engine.begin() as connection: + query = text(""" + UPDATE musicbrainz.replication_control + SET current_replication_sequence = %s""" % (replication_seq) + ) + connection.execute(query) + + +def write_replication_control(replication_seq): + with db.engine.begin() as connection: + query = text(""" + INSERT INTO musicbrainz.replication_control (current_replication_sequence) + VALUES (:replication_seq) + """) + connection.execute(query, {'replication_seq': replication_seq}) + + +def main(): + base_url = current_app.config['REPLICATION_PACKETS_URL'] + if current_app.config['ACCESS_TOKEN']: + token = current_app.config['ACCESS_TOKEN'] + else: + token = None + + with musicbrainz_db.engine.begin() as connection: + query = text(""" + SELECT current_schema_sequence, current_replication_sequence + FROM replication_control + """) + result = connection.execute(query) + schema_seq, mb_replication_seq = result.fetchone() + print schema_seq, mb_replication_seq + + with db.engine.begin() as connection: + query = text(""" + SELECT current_replication_sequence + FROM musicbrainz.replication_control + """) + result = connection.execute(query) + sequence = result.fetchone() + ab_replication_seq = sequence[0] + + if ab_replication_seq is None or ab_replication_seq < mb_replication_seq: + replication_seq = mb_replication_seq + write_replication_control(replication_seq) + else: + replication_seq = ab_replication_seq + while True: + replication_seq += 1 + tmp = download_packet(base_url, token, replication_seq) + if tmp is None: + print 'Not found, stopping' + break + process_tar(tmp, schema_seq, replication_seq) + tmp.close() + update_replication_sequence(replication_seq) + print 'Done applying all the replication packets till last hour' From 088e6eb734d36c7f22a92c3937c71096ad9bcb4a Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 8 Jul 2018 00:48:19 +0530 Subject: [PATCH 084/125] Add a query to drop the replication control table for testing --- db/testing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/db/testing.py b/db/testing.py index 27eb3549f..3bfd009ff 100644 --- a/db/testing.py +++ b/db/testing.py @@ -89,6 +89,7 @@ def drop_tables(self): connection.execute('DROP TABLE IF EXISTS musicbrainz.script CASCADE;') connection.execute('DROP TABLE IF EXISTS musicbrainz.gender CASCADE;') connection.execute('DROP TABLE IF EXISTS musicbrainz.artist_type CASCADE;') + connection.execute('DROP TABLE IF EXISTS musicbrainz.replication_control CASCADE;') def drop_types(self): with db.engine.connect() as connection: From 81704b8dbe479ced3a4a269bd6d9a2dc3454c486 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Wed, 25 Jul 2018 02:02:16 +0530 Subject: [PATCH 085/125] Include one more new line between functions --- db/import_mb_data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index a78eec24a..36b86c8e9 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -18,6 +18,7 @@ def join_columns(columns): columns[0] = ':' + columns[0] return ',:'.join(columns) + def insert_data_into_musicbrainz_schema(connection, transaction, table_name, columns, values): trans = connection.begin() query = text(""" @@ -30,6 +31,7 @@ def insert_data_into_musicbrainz_schema(connection, transaction, table_name, col result = connection.execute(query, values) transaction.commit() + def get_data_from_musicbrainz(table_name, data, column='id'): with musicbrainz_db.engine.begin() as connection: query = text(""" @@ -43,6 +45,7 @@ def get_data_from_musicbrainz(table_name, data, column='id'): columns = [key for key in values] return table_name, columns, value + def load_artist_credit(connection, MB_release_data, MB_release_group_data, MB_track_data, MB_artist_credit_name_data, artist_credit_from_recording): """Fetch artist_credit table data from MusicBrainz database for the recording MBIDs in AcousticBrainz database. From 85cc90553dbc14e5ae4e09a8ae3c74ce6e7c544d Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 27 Jul 2018 15:23:46 +0530 Subject: [PATCH 086/125] Use print_function to ease future python3 changes --- .../apply_replication_changes.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py index 03a3a6243..152195826 100644 --- a/musicbrainz_importer/apply_replication_changes.py +++ b/musicbrainz_importer/apply_replication_changes.py @@ -1,3 +1,4 @@ +from __future__ import print_function import tarfile import os import re @@ -123,7 +124,7 @@ def process(self): stats = {} for xid in sorted(self._transactions.keys()): transaction = self._transactions[xid] - print ' - Running transaction', xid + print (' - Running transaction', xid) for id, schema, table, type in sorted(transaction): trans = connection.begin() if schema == 'musicbrainz' and table in include_tables: @@ -153,22 +154,21 @@ def process(self): try: connection.execute(sql, params) trans.commit() - print 'Deleted rows from ' + table + ' table' + print ('Deleted rows from ' + table + ' table') except IntegrityError as e: trans.rollback() if type == 'u': if keys or values: update_row(sql, params, connection, trans) - print 'Updated rows in ' + table + ' table' - print 'COMMIT; --', xid + print ('Updated rows in ' + table + ' table') + print ('COMMIT; --', xid) # print ' - Statistics:' # for table in sorted(stats.keys()): # print ' * %-30s\t%d\t%d' % (table, stats[table]['u'], stats[table]['d']) - print secsy def process_tar(fileobj, expected_schema_seq, replication_seq): - print "Processing", fileobj.name + print ("Processing", fileobj.name) tar = tarfile.open(fileobj=fileobj, mode='r:bz2') importer = PacketImporter(replication_seq) for member in tar: @@ -178,7 +178,7 @@ def process_tar(fileobj, expected_schema_seq, replication_seq): raise Exception("Mismatched schema sequence, %d (database) vs %d (replication packet)" % (expected_schema_seq, schema_seq)) elif member.name == 'TIMESTAMP': ts = tar.extractfile(member).read().strip() - print ' - Packet was produced at', ts + print (' - Packet was produced at', ts) elif member.name in ('mbdump/Pending', 'mbdump/dbmirror_pending'): importer.load_pending(tar.extractfile(member)) elif member.name in ('mbdump/PendingData', 'mbdump/dbmirror_pendingdata'): @@ -190,7 +190,7 @@ def download_packet(base_url, token, replication_seq): url = base_url.rstrip("/") + "/replication-%d.tar.bz2" % replication_seq if token: url += '?token=' + token - print "Downloading", url + print ("Downloading", url) try: data = urllib2.urlopen(url, timeout=60) except urllib2.HTTPError, e: @@ -236,7 +236,7 @@ def main(): """) result = connection.execute(query) schema_seq, mb_replication_seq = result.fetchone() - print schema_seq, mb_replication_seq + print (schema_seq, mb_replication_seq) with db.engine.begin() as connection: query = text(""" @@ -256,9 +256,9 @@ def main(): replication_seq += 1 tmp = download_packet(base_url, token, replication_seq) if tmp is None: - print 'Not found, stopping' + print ('Not found, stopping') break process_tar(tmp, schema_seq, replication_seq) tmp.close() update_replication_sequence(replication_seq) - print 'Done applying all the replication packets till last hour' + print ('Done applying all the replication packets till last hour') From 1a2fffe35e5e6c54e18461ac09122c8a06369aa5 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 27 Jul 2018 15:26:16 +0530 Subject: [PATCH 087/125] Remove un useful print statements --- musicbrainz_importer/apply_replication_changes.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py index 152195826..24e69a897 100644 --- a/musicbrainz_importer/apply_replication_changes.py +++ b/musicbrainz_importer/apply_replication_changes.py @@ -162,9 +162,6 @@ def process(self): update_row(sql, params, connection, trans) print ('Updated rows in ' + table + ' table') print ('COMMIT; --', xid) - # print ' - Statistics:' - # for table in sorted(stats.keys()): - # print ' * %-30s\t%d\t%d' % (table, stats[table]['u'], stats[table]['d']) def process_tar(fileobj, expected_schema_seq, replication_seq): From e8760541ec29617a767a7374a25d5b6d2d007657 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 27 Jul 2018 15:35:08 +0530 Subject: [PATCH 088/125] passing variables in the query and getting values from corresponding dict --- db/import_mb_data.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 36b86c8e9..696e63eec 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -36,9 +36,13 @@ def get_data_from_musicbrainz(table_name, data, column='id'): with musicbrainz_db.engine.begin() as connection: query = text(""" SELECT * - FROM %s - WHERE %s=%s - """ % (table_name, column, data)) + FROM :table_name + WHERE :column = :data + """), { + 'table_name': table_name, + 'column': column, + 'data': data, + } result = connection.execute(query) values = dict(result.fetchone()) From 2db838be36654e0d1b1617aa0791ddb6b6123cad Mon Sep 17 00:00:00 2001 From: rsh7 Date: Fri, 27 Jul 2018 20:20:20 +0530 Subject: [PATCH 089/125] Add MIT license for apply_replication_script with copyright --- .../apply_replication_changes.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py index 24e69a897..91a3ba200 100644 --- a/musicbrainz_importer/apply_replication_changes.py +++ b/musicbrainz_importer/apply_replication_changes.py @@ -1,3 +1,28 @@ +""" +The MIT License for apply_replication_changes script + +Copyright (c) 2018 Rashi Sah +Copyright (c) 2018 Lukas Lalinsky + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + from __future__ import print_function import tarfile import os From 15f407f0f12eb847c15eb0c94599a0d80f6ce900 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Tue, 31 Jul 2018 01:56:09 +0530 Subject: [PATCH 090/125] Add docstrings containing the explanations for the working of functions --- .../apply_replication_changes.py | 99 ++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py index 91a3ba200..d600fc8ea 100644 --- a/musicbrainz_importer/apply_replication_changes.py +++ b/musicbrainz_importer/apply_replication_changes.py @@ -46,6 +46,14 @@ ('\\t', '\t'), ('\\v', '\v'), ('\\\\', '\\')) def parse_name(table): + """Store schema name and table name separately in different variables. + + Args: + table: A combined schema and table name of the form - schema.table + + Returns: + separate schema and table names. + """ if '.' in table: schema, table = table.split('.', 1) schema = 'musicbrainz' @@ -86,6 +94,15 @@ def read_psql_dump(fp, types): def get_table_and_data(message): + """Get table name and data values from the IntegrityError message (if any) due to + foreign key constraints. + + Args: + message: SqlAlchemy integrity error message. + + Returns: + column name and data values to be updated for a table. + """ mess = message.split(' ') word = mess.index('Key') + 1 column, data = mess[word].split('=') @@ -94,6 +111,19 @@ def get_table_and_data(message): def insert_new_row(table, data, main_connection, main_transaction, sql, params, todo_list=None): + """This function insert new rows in the tables after we get any IntegrityError due to foreign + key constraints. + + Args: + table: name of the table in which the data is to be inserted. + data: values to be inserted. + main_connection: sql connection to write into the database. + main_transaction: transaction for every write operation. + sql: insert query. + params: values for the query. + todo_list: a list of tuples of type (table, data) used to insert new data + in the respective tables. + """ if todo_list is None: todo_list = [] table_name, columns, values = db.import_mb_data.get_data_from_musicbrainz(table, data) @@ -116,6 +146,15 @@ def insert_new_row(table, data, main_connection, main_transaction, sql, params, def update_row(sql, params, main_connection, main_transaction): + """This function is a part of processing the replication packet to update + the data present in database. + + Args: + sql: update query. + params: parameter values for the query. + main_connection: sql connection to write into the database. + main_transaction: transaction for every write operation. + """ try: main_connection.execute(sql, params) main_transaction.commit() @@ -126,18 +165,34 @@ def update_row(sql, params, main_connection, main_transaction): class PacketImporter(object): - + """PacketImporter class to process the replication packets for proper changes + in the database. + """ def __init__(self, replication_seq): + """Initialization of the class objects. + """ self._data = {} self._transactions = {} self._replication_seq = replication_seq def load_pending_data(self, fp): + """Load id, key and values from dbmirror_pending data files + and stores them in data dictionary. + + Args: + fp: tar file of replication packet. + """ dump = read_psql_dump(fp, [int, parse_bool, parse_data_fields]) for id, key, values in dump: self._data[(id, key)] = values def load_pending(self, fp): + """Load schema name, table names from dbmirror_pending file and + maintain a transaction dictionary for the data specified in the files. + + Args: + fp: tar file of replication packet. + """ dump = read_psql_dump(fp, [int, str, str, int]) for id, table, type, xid in dump: schema, table = parse_name(table) @@ -145,6 +200,10 @@ def load_pending(self, fp): transaction.append((id, schema, table, type)) def process(self): + """Process a replication packet and apply update and deletion + for the data present in the database by running a acousticbrainz + db connection. + """ with db.engine.connect() as connection: stats = {} for xid in sorted(self._transactions.keys()): @@ -152,6 +211,8 @@ def process(self): print (' - Running transaction', xid) for id, schema, table, type in sorted(transaction): trans = connection.begin() + # Applying the changes for the tables present in musicbrainz + # schema in acousticbrainz db if schema == 'musicbrainz' and table in include_tables: fulltable = '%s.%s' % (schema, table) if fulltable not in stats: @@ -190,6 +251,17 @@ def process(self): def process_tar(fileobj, expected_schema_seq, replication_seq): + """Processes the compressed replication packet, call the functions to load the data + from mbdump/dbmirror_pending and mbdump.dbmirror_pendingdata files. + Then call the 'process' function from PacketImporter class to apply the changes to + the database. + + Args: + fileobj: tar file of the replication packet. + expected_schema_seq: The expected schema sequence that should be matched with the + one listed in replication packets. + replication_seq: The number of the replication packet. + """ print ("Processing", fileobj.name) tar = tarfile.open(fileobj=fileobj, mode='r:bz2') importer = PacketImporter(replication_seq) @@ -209,6 +281,16 @@ def process_tar(fileobj, expected_schema_seq, replication_seq): def download_packet(base_url, token, replication_seq): + """Download the replication packet for the specified replication sequence + and convert the packet into a tar.bz2 file. + + Args: + base_url: The URL to download the replication packets from. + token: An access token to allow download of the packets from MetaBrainz + website. For more information, visit - https://metabrainz.org/api/ + + Returns: tar file of the downloaded replication packet. + """ url = base_url.rstrip("/") + "/replication-%d.tar.bz2" % replication_seq if token: url += '?token=' + token @@ -227,6 +309,12 @@ def download_packet(base_url, token, replication_seq): def update_replication_sequence(replication_seq): + """Store new replication sequence into replication_control table for future + updates and deletes from replication packets. + + Args: + replication_seq: Current replication sequence to replace the old one. + """ with db.engine.begin() as connection: query = text(""" UPDATE musicbrainz.replication_control @@ -236,6 +324,11 @@ def update_replication_sequence(replication_seq): def write_replication_control(replication_seq): + """Insert first replication sequence into replication_control table. + + Args: + replication_seq: first replication sequence to start the download of packets from. + """ with db.engine.begin() as connection: query = text(""" INSERT INTO musicbrainz.replication_control (current_replication_sequence) @@ -245,6 +338,10 @@ def write_replication_control(replication_seq): def main(): + """Fetch the replication sequence from the database and call the function + to download all the replication packets from last replication sequence until + the previous hour. + """ base_url = current_app.config['REPLICATION_PACKETS_URL'] if current_app.config['ACCESS_TOKEN']: token = current_app.config['ACCESS_TOKEN'] From ebca22b13e8c26a24fe7f3ecb65bf53bae1479f8 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Wed, 1 Aug 2018 03:04:17 +0530 Subject: [PATCH 091/125] change formatting style of get_data_from_musicbrainz function --- db/import_mb_data.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 696e63eec..3d7428c31 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -36,13 +36,12 @@ def get_data_from_musicbrainz(table_name, data, column='id'): with musicbrainz_db.engine.begin() as connection: query = text(""" SELECT * - FROM :table_name - WHERE :column = :data - """), { - 'table_name': table_name, - 'column': column, - 'data': data, - } + FROM {table_name} + WHERE {column} = {data} + """.format(table_name=table_name, + column=column, + data=data + )) result = connection.execute(query) values = dict(result.fetchone()) From 608782b08c372efb9a6fdca6cffb690004791a40 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Wed, 1 Aug 2018 03:07:27 +0530 Subject: [PATCH 092/125] Remove print statement for commit transacrtion message and add newlines --- musicbrainz_importer/apply_replication_changes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py index d600fc8ea..d0b770bc1 100644 --- a/musicbrainz_importer/apply_replication_changes.py +++ b/musicbrainz_importer/apply_replication_changes.py @@ -211,6 +211,7 @@ def process(self): print (' - Running transaction', xid) for id, schema, table, type in sorted(transaction): trans = connection.begin() + # Applying the changes for the tables present in musicbrainz # schema in acousticbrainz db if schema == 'musicbrainz' and table in include_tables: @@ -247,7 +248,6 @@ def process(self): if keys or values: update_row(sql, params, connection, trans) print ('Updated rows in ' + table + ' table') - print ('COMMIT; --', xid) def process_tar(fileobj, expected_schema_seq, replication_seq): @@ -371,6 +371,7 @@ def main(): write_replication_control(replication_seq) else: replication_seq = ab_replication_seq + while True: replication_seq += 1 tmp = download_packet(base_url, token, replication_seq) From e5749dd7cb883e144c3fff66b0a621bb4874c998 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Wed, 1 Aug 2018 03:08:54 +0530 Subject: [PATCH 093/125] Move print message of replication sequence number just before download iteration --- musicbrainz_importer/apply_replication_changes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py index d0b770bc1..d234a13ca 100644 --- a/musicbrainz_importer/apply_replication_changes.py +++ b/musicbrainz_importer/apply_replication_changes.py @@ -355,7 +355,7 @@ def main(): """) result = connection.execute(query) schema_seq, mb_replication_seq = result.fetchone() - print (schema_seq, mb_replication_seq) + print (schema_seq) with db.engine.begin() as connection: query = text(""" @@ -374,6 +374,7 @@ def main(): while True: replication_seq += 1 + print ("Replication Sequence:", replication_seq) tmp = download_packet(base_url, token, replication_seq) if tmp is None: print ('Not found, stopping') From cf1660086ecb1744239693bd7c257af554380d80 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Wed, 1 Aug 2018 03:10:40 +0530 Subject: [PATCH 094/125] Include a skip message for tables not in the AB database --- musicbrainz_importer/apply_replication_changes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py index d234a13ca..e09530936 100644 --- a/musicbrainz_importer/apply_replication_changes.py +++ b/musicbrainz_importer/apply_replication_changes.py @@ -248,6 +248,8 @@ def process(self): if keys or values: update_row(sql, params, connection, trans) print ('Updated rows in ' + table + ' table') + else: + print ('Skipping changes, ' + table + ' table not found in the database') def process_tar(fileobj, expected_schema_seq, replication_seq): From d7089ea332b5f983af92df0794546c567809de38 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Wed, 1 Aug 2018 03:28:49 +0530 Subject: [PATCH 095/125] Add more documentation to the functions and improve a print message --- .../apply_replication_changes.py | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py index e09530936..c6e26f1b4 100644 --- a/musicbrainz_importer/apply_replication_changes.py +++ b/musicbrainz_importer/apply_replication_changes.py @@ -62,6 +62,12 @@ def parse_name(table): def parse_data_fields(s): + """Parses the data present in mbdump files to specific variables for their use. + Removes useless quotes and other punctuations. + + Returns: + Proper string with names of the data and corresponding values. + """ fields = {} for name, value in re.findall(r'''"([^"]+)"=('(?:''|[^'])*')? ''', s): if not value: @@ -77,6 +83,11 @@ def parse_bool(s): def unescape(s): + """Remove extra escapes from the data. + + Returns: + unescaped string. + """ if s == '\\N': return None for orig, repl in ESCAPES: @@ -85,6 +96,13 @@ def unescape(s): def read_psql_dump(fp, types): + """Read mbdump data, split the values present in rows in mbdump/dbmirror_pending + and mbdata/dbmirror_pendingdata. + + Args: + fp: tar file of replication packet. + types: data types of all data of the rows. + """ for line in fp: values = map(unescape, line.rstrip('\r\n').split('\t')) for i, value in enumerate(values): @@ -208,7 +226,7 @@ def process(self): stats = {} for xid in sorted(self._transactions.keys()): transaction = self._transactions[xid] - print (' - Running transaction', xid) + print ('Running transaction' + xid + '...') for id, schema, table, type in sorted(transaction): trans = connection.begin() From 90ff6eb74354e2e27a6661aa401e0fa8e4041d1c Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 5 Aug 2018 02:40:42 +0530 Subject: [PATCH 096/125] Add a new line between 2 functions --- db/import_mb_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 3d7428c31..c5fffff49 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -8,12 +8,12 @@ BATCH_SLEEP_DURATION = 5 # number of seconds to wait between batches logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) - def load_musicbrainz_schema_data(connection, table_name): query = text("""SELECT * FROM musicbrainz.%s""" % (table_name)) result = connection.execute(query) return result.fetchall() + def join_columns(columns): columns[0] = ':' + columns[0] return ',:'.join(columns) From 1e76653d084ceef632d1738a756e51482ebc6b1b Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 5 Aug 2018 02:55:46 +0530 Subject: [PATCH 097/125] Add docstrings for the general functions to load and insert data --- db/import_mb_data.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index c5fffff49..7de390633 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -9,17 +9,46 @@ logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) def load_musicbrainz_schema_data(connection, table_name): + """General function to load all the data from the specified + musicbrainz schema table name. + + Args: + connection: database connection to execute the query. + table_name: Name of the table from musicbrainz schema. + + Returns: + Specified table data fetched from the database. + """ query = text("""SELECT * FROM musicbrainz.%s""" % (table_name)) result = connection.execute(query) return result.fetchall() def join_columns(columns): + """Join the column names of the tables by a comma in between and + a colon as prefix to pass the values in the insert query. + + Args: + columns: A list of all the columns of any table. + + Returns: + A string of column names separated by commas. + """ columns[0] = ':' + columns[0] return ',:'.join(columns) def insert_data_into_musicbrainz_schema(connection, transaction, table_name, columns, values): + """Insert data into musicbrainz schema tables whose table_name, column names and + data values are specified. + + Args: + connection: database connection to execute the query. + transaction: transaction for every write operation. + table_name: Name of the table to apply the insert query on. + columns: Name of all the columns of the given table. + values: Data values of the rows to insert into the tables. + """ trans = connection.begin() query = text(""" INSERT INTO musicbrainz.{table_name} ({columns}) @@ -33,6 +62,19 @@ def insert_data_into_musicbrainz_schema(connection, transaction, table_name, col def get_data_from_musicbrainz(table_name, data, column='id'): + """Fetch data from main MusicBrainz database for the given column name, + data value and table name. + + Args: + table_name: Table name whose data is to be fetched. + data: data value whose corresponding row is fetched. + column: Column names for the tables. Take default as 'id' if not + specified. + + Returns: + Table name, columns and data values fetched from the MusicBrainz + database. + """ with musicbrainz_db.engine.begin() as connection: query = text(""" SELECT * From 2fdfd374a11ee1c6f4e5b1f842d8a9addde00040 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 5 Aug 2018 14:53:11 +0530 Subject: [PATCH 098/125] Correct indentation in insert function and convert int2str in a print message --- db/import_mb_data.py | 20 +++++++++---------- .../apply_replication_changes.py | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 7de390633..e17df1874 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -49,16 +49,16 @@ def insert_data_into_musicbrainz_schema(connection, transaction, table_name, col columns: Name of all the columns of the given table. values: Data values of the rows to insert into the tables. """ - trans = connection.begin() - query = text(""" - INSERT INTO musicbrainz.{table_name} ({columns}) - VALUES ({value_str}) - """.format(table_name=table_name, - columns=','.join(columns), - value_str=join_columns(columns))) - - result = connection.execute(query, values) - transaction.commit() + trans = connection.begin() + query = text(""" + INSERT INTO musicbrainz.{table_name} ({columns}) + VALUES ({value_str}) + """.format(table_name=table_name, + columns=','.join(columns), + value_str=join_columns(columns))) + + result = connection.execute(query, values) + transaction.commit() def get_data_from_musicbrainz(table_name, data, column='id'): diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py index c6e26f1b4..e8e0f01ef 100644 --- a/musicbrainz_importer/apply_replication_changes.py +++ b/musicbrainz_importer/apply_replication_changes.py @@ -226,7 +226,7 @@ def process(self): stats = {} for xid in sorted(self._transactions.keys()): transaction = self._transactions[xid] - print ('Running transaction' + xid + '...') + print ('Running transaction' + str(xid) + '...') for id, schema, table, type in sorted(transaction): trans = connection.begin() From 0653a4b89822a148385ff9429191dbf48dbe393f Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 5 Aug 2018 14:54:12 +0530 Subject: [PATCH 099/125] Add a space in a print message --- musicbrainz_importer/apply_replication_changes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py index e8e0f01ef..7d88015a2 100644 --- a/musicbrainz_importer/apply_replication_changes.py +++ b/musicbrainz_importer/apply_replication_changes.py @@ -226,7 +226,7 @@ def process(self): stats = {} for xid in sorted(self._transactions.keys()): transaction = self._transactions[xid] - print ('Running transaction' + str(xid) + '...') + print ('Running transaction ' + str(xid) + '...') for id, schema, table, type in sorted(transaction): trans = connection.begin() From 7e5190bc56d92f6a80da1e88404b0ef706458461 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 5 Aug 2018 14:55:16 +0530 Subject: [PATCH 100/125] Move skip changes message to the outer loop --- musicbrainz_importer/apply_replication_changes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py index 7d88015a2..8181c0f25 100644 --- a/musicbrainz_importer/apply_replication_changes.py +++ b/musicbrainz_importer/apply_replication_changes.py @@ -266,8 +266,8 @@ def process(self): if keys or values: update_row(sql, params, connection, trans) print ('Updated rows in ' + table + ' table') - else: - print ('Skipping changes, ' + table + ' table not found in the database') + else: + print ('Skipping changes, ' + table + ' table not found in the database') def process_tar(fileobj, expected_schema_seq, replication_seq): From 9f190aacd57e1d911cb1718369e0b5ff291817cb Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 12 Aug 2018 19:43:48 +0530 Subject: [PATCH 101/125] Remove a useless print statement --- musicbrainz_importer/apply_replication_changes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py index 8181c0f25..9ba34b268 100644 --- a/musicbrainz_importer/apply_replication_changes.py +++ b/musicbrainz_importer/apply_replication_changes.py @@ -375,7 +375,6 @@ def main(): """) result = connection.execute(query) schema_seq, mb_replication_seq = result.fetchone() - print (schema_seq) with db.engine.begin() as connection: query = text(""" From 4b615bcfa7f319699d448fa7ae3242680103ba60 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 12 Aug 2018 19:57:08 +0530 Subject: [PATCH 102/125] Move the queries to the db module --- db/data.py | 22 +++++++++++++++++++ .../apply_replication_changes.py | 17 ++------------ 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/db/data.py b/db/data.py index d45477f64..fc955aa08 100644 --- a/db/data.py +++ b/db/data.py @@ -567,3 +567,25 @@ def get_mbids_from_gid_redirect_tables(): for mbid in mbids: recording_mbids.append(str(mbid[0])) return recording_mbids + + +def get_current_schema_and_replication_sequence(): + with musicbrainz_db.engine.begin() as connection: + query = text(""" + SELECT current_schema_sequence, current_replication_sequence + FROM replication_control + """) + result = connection.execute(query) + schema_seq, mb_replication_seq = result.fetchone() + return schema_seq, mb_replication_seq + + +def get_replication_sequence_from_mb_schema(): + with db.engine.begin() as connection: + query = text(""" + SELECT current_replication_sequence + FROM musicbrainz.replication_control + """) + result = connection.execute(query) + sequence = result.fetchone() + return sequence[0] diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py index 9ba34b268..2b3f72d02 100644 --- a/musicbrainz_importer/apply_replication_changes.py +++ b/musicbrainz_importer/apply_replication_changes.py @@ -368,22 +368,9 @@ def main(): else: token = None - with musicbrainz_db.engine.begin() as connection: - query = text(""" - SELECT current_schema_sequence, current_replication_sequence - FROM replication_control - """) - result = connection.execute(query) - schema_seq, mb_replication_seq = result.fetchone() + schema_seq, mb_replication_seq = get_current_schema_and_replication_sequence() - with db.engine.begin() as connection: - query = text(""" - SELECT current_replication_sequence - FROM musicbrainz.replication_control - """) - result = connection.execute(query) - sequence = result.fetchone() - ab_replication_seq = sequence[0] + ab_replication_seq = get_replication_sequence_from_mb_schema() if ab_replication_seq is None or ab_replication_seq < mb_replication_seq: replication_seq = mb_replication_seq From 0a43c08d2085f99602b4573b05a3a4d9c768fe9e Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 12 Aug 2018 20:00:51 +0530 Subject: [PATCH 103/125] Move update and write replication sequence queries to db module --- db/data.py | 29 +++++++++++++++ .../apply_replication_changes.py | 37 ++----------------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/db/data.py b/db/data.py index fc955aa08..9baf4c311 100644 --- a/db/data.py +++ b/db/data.py @@ -589,3 +589,32 @@ def get_replication_sequence_from_mb_schema(): result = connection.execute(query) sequence = result.fetchone() return sequence[0] + + +def update_replication_sequence(replication_seq): + """Store new replication sequence into replication_control table for future + updates and deletes from replication packets. + + Args: + replication_seq: Current replication sequence to replace the old one. + """ + with db.engine.begin() as connection: + query = text(""" + UPDATE musicbrainz.replication_control + SET current_replication_sequence = %s""" % (replication_seq) + ) + connection.execute(query) + + +def write_replication_control(replication_seq): + """Insert first replication sequence into replication_control table. + + Args: + replication_seq: first replication sequence to start the download of packets from. + """ + with db.engine.begin() as connection: + query = text(""" + INSERT INTO musicbrainz.replication_control (current_replication_sequence) + VALUES (:replication_seq) + """) + connection.execute(query, {'replication_seq': replication_seq}) diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py index 2b3f72d02..1aed3c850 100644 --- a/musicbrainz_importer/apply_replication_changes.py +++ b/musicbrainz_importer/apply_replication_changes.py @@ -328,35 +328,6 @@ def download_packet(base_url, token, replication_seq): return tmp -def update_replication_sequence(replication_seq): - """Store new replication sequence into replication_control table for future - updates and deletes from replication packets. - - Args: - replication_seq: Current replication sequence to replace the old one. - """ - with db.engine.begin() as connection: - query = text(""" - UPDATE musicbrainz.replication_control - SET current_replication_sequence = %s""" % (replication_seq) - ) - connection.execute(query) - - -def write_replication_control(replication_seq): - """Insert first replication sequence into replication_control table. - - Args: - replication_seq: first replication sequence to start the download of packets from. - """ - with db.engine.begin() as connection: - query = text(""" - INSERT INTO musicbrainz.replication_control (current_replication_sequence) - VALUES (:replication_seq) - """) - connection.execute(query, {'replication_seq': replication_seq}) - - def main(): """Fetch the replication sequence from the database and call the function to download all the replication packets from last replication sequence until @@ -368,13 +339,13 @@ def main(): else: token = None - schema_seq, mb_replication_seq = get_current_schema_and_replication_sequence() + schema_seq, mb_replication_seq = db.data.get_current_schema_and_replication_sequence() - ab_replication_seq = get_replication_sequence_from_mb_schema() + ab_replication_seq = db.data.get_replication_sequence_from_mb_schema() if ab_replication_seq is None or ab_replication_seq < mb_replication_seq: replication_seq = mb_replication_seq - write_replication_control(replication_seq) + db.data.write_replication_control(replication_seq) else: replication_seq = ab_replication_seq @@ -387,5 +358,5 @@ def main(): break process_tar(tmp, schema_seq, replication_seq) tmp.close() - update_replication_sequence(replication_seq) + db.data.update_replication_sequence(replication_seq) print ('Done applying all the replication packets till last hour') From 0566d77d9decb16e7fae9ebf9af1576fbac9ce5f Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 12 Aug 2018 20:15:44 +0530 Subject: [PATCH 104/125] Close the tar file --- db/data.py | 1 + manage.py | 2 +- musicbrainz_importer/apply_replication_changes.py | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/db/data.py b/db/data.py index 9baf4c311..5e468ec0d 100644 --- a/db/data.py +++ b/db/data.py @@ -10,6 +10,7 @@ from sqlalchemy import text import sqlalchemy.exc +from brainzutils import musicbrainz_db _whitelist_file = os.path.join(os.path.dirname(__file__), "tagwhitelist.json") _whitelist_tags = set(json.load(open(_whitelist_file))) diff --git a/manage.py b/manage.py index 4236998af..81788178c 100644 --- a/manage.py +++ b/manage.py @@ -202,7 +202,7 @@ def get_entities(): print('Redirecting mbids to original entities...') webserver.external.get_entities.get_original_entity() - +@cli.command() def apply_replication_changes(): print("\nUpdating musicbrainz schema by applying replication packets...") musicbrainz_importer.apply_replication_changes.main() diff --git a/musicbrainz_importer/apply_replication_changes.py b/musicbrainz_importer/apply_replication_changes.py index 1aed3c850..f7f4d8c09 100644 --- a/musicbrainz_importer/apply_replication_changes.py +++ b/musicbrainz_importer/apply_replication_changes.py @@ -36,6 +36,7 @@ from sqlalchemy import text from sqlalchemy.exc import IntegrityError import db.import_mb_data +import db.data include_tables = ['language', 'artist_credit_name', 'artist', 'artist_gid_redirect', 'area', 'area_type', 'recording_gid_redirect', \ 'script', 'release_gid_redirect', 'recording', 'track', 'artist_credit', 'release_group_primary_type', 'release_group', \ @@ -298,6 +299,7 @@ def process_tar(fileobj, expected_schema_seq, replication_seq): elif member.name in ('mbdump/PendingData', 'mbdump/dbmirror_pendingdata'): importer.load_pending_data(tar.extractfile(member)) importer.process() + tar.close() def download_packet(base_url, token, replication_seq): From acae8b742d30c414bad2b60bf56e037715527bcc Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 12 Aug 2018 22:20:00 +0530 Subject: [PATCH 105/125] Passing the variables in query and their values in a dictionary --- db/data.py | 6 +++--- db/import_mb_data.py | 34 ++++++++++++++++++---------------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/db/data.py b/db/data.py index 5e468ec0d..3edcc48ce 100644 --- a/db/data.py +++ b/db/data.py @@ -602,9 +602,9 @@ def update_replication_sequence(replication_seq): with db.engine.begin() as connection: query = text(""" UPDATE musicbrainz.replication_control - SET current_replication_sequence = %s""" % (replication_seq) - ) - connection.execute(query) + SET current_replication_sequence = :replication_seq + """) + connection.execute(query, {'replication_seq': replication_seq}) def write_replication_control(replication_seq): diff --git a/db/import_mb_data.py b/db/import_mb_data.py index e17df1874..53aa409fc 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -19,8 +19,8 @@ def load_musicbrainz_schema_data(connection, table_name): Returns: Specified table data fetched from the database. """ - query = text("""SELECT * FROM musicbrainz.%s""" % (table_name)) - result = connection.execute(query) + query = text("""SELECT * FROM musicbrainz.:table_name""") + result = connection.execute(query, {'table_name': table_name}) return result.fetchall() @@ -51,13 +51,14 @@ def insert_data_into_musicbrainz_schema(connection, transaction, table_name, col """ trans = connection.begin() query = text(""" - INSERT INTO musicbrainz.{table_name} ({columns}) - VALUES ({value_str}) - """.format(table_name=table_name, - columns=','.join(columns), - value_str=join_columns(columns))) + INSERT INTO musicbrainz.:table_name (:columns) + VALUES (:column_values) + """) - result = connection.execute(query, values) + result = connection.execute(query, {'table_name': table_name, + 'columns': ','.join(columns), + 'column_values': join_columns(columns)} + ) transaction.commit() @@ -78,14 +79,15 @@ def get_data_from_musicbrainz(table_name, data, column='id'): with musicbrainz_db.engine.begin() as connection: query = text(""" SELECT * - FROM {table_name} - WHERE {column} = {data} - """.format(table_name=table_name, - column=column, - data=data - )) - - result = connection.execute(query) + FROM :table_name + WHERE :column = :data + """) + + result = connection.execute(query, {'table_name': table_name, + 'column': column, + 'data': data} + ) + values = dict(result.fetchone()) columns = [key for key in values] return table_name, columns, value From 302ab501a7f8b0680897f9f417aa9dffc1591d00 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 12 Aug 2018 22:25:07 +0530 Subject: [PATCH 106/125] Add docstrings to get sequence functions in data.py --- db/data.py | 14 ++++++++++++++ db/import_mb_data.py | 1 + 2 files changed, 15 insertions(+) diff --git a/db/data.py b/db/data.py index 3edcc48ce..f0c367d35 100644 --- a/db/data.py +++ b/db/data.py @@ -571,6 +571,13 @@ def get_mbids_from_gid_redirect_tables(): def get_current_schema_and_replication_sequence(): + """Fetch current schema sequence and current replication number + from the musicbrainz database. + + Returns: + schema_seq: last schema sequence number. + mb_replication_seq: last updated replication sequence. + """ with musicbrainz_db.engine.begin() as connection: query = text(""" SELECT current_schema_sequence, current_replication_sequence @@ -582,6 +589,13 @@ def get_current_schema_and_replication_sequence(): def get_replication_sequence_from_mb_schema(): + """Fetch current replication sequence last updated in replication + control table in musicbrainz schema in AB database. + + Returns: + sequence[0]: current replication sequence number from a sqlachemy + type object. + """ with db.engine.begin() as connection: query = text(""" SELECT current_replication_sequence diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 53aa409fc..43c1ca8fa 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -8,6 +8,7 @@ BATCH_SLEEP_DURATION = 5 # number of seconds to wait between batches logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) + def load_musicbrainz_schema_data(connection, table_name): """General function to load all the data from the specified musicbrainz schema table name. From 8a0794315a2473f15d64db9fb51cca61da0c5262 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 12 Aug 2018 23:33:44 +0530 Subject: [PATCH 107/125] Add proper formatting in query functions --- db/import_mb_data.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 43c1ca8fa..11c6bbacd 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -20,7 +20,7 @@ def load_musicbrainz_schema_data(connection, table_name): Returns: Specified table data fetched from the database. """ - query = text("""SELECT * FROM musicbrainz.:table_name""") + query = text("""SELECT * FROM musicbrainz.{table_name}""".format(table_name=table_name)) result = connection.execute(query, {'table_name': table_name}) return result.fetchall() @@ -52,14 +52,13 @@ def insert_data_into_musicbrainz_schema(connection, transaction, table_name, col """ trans = connection.begin() query = text(""" - INSERT INTO musicbrainz.:table_name (:columns) - VALUES (:column_values) - """) + INSERT INTO musicbrainz.{table_name} ({columns}) + VALUES ({column_values}) + """.format(table_name=table_name, + columns=','.join(columns), + value_str=join_columns(columns))) - result = connection.execute(query, {'table_name': table_name, - 'columns': ','.join(columns), - 'column_values': join_columns(columns)} - ) + result = connection.execute(query) transaction.commit() From 13ada58506b933fb9546c30d7f5055d6eb4d73d6 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sat, 11 Aug 2018 18:03:40 +0530 Subject: [PATCH 108/125] Evaluate both db access methods by simply getting data from lowlevel and recording --- manage.py | 8 ++++ .../external/evaluate_mbdatabase_access.py | 43 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 webserver/external/evaluate_mbdatabase_access.py diff --git a/manage.py b/manage.py index 81788178c..43ee1e534 100644 --- a/manage.py +++ b/manage.py @@ -22,6 +22,7 @@ import musicbrainz_importer.apply_replication_changes import webserver.external.get_entities +import webserver.external.evaluate_db ADMIN_SQL_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'admin', 'sql') @@ -202,11 +203,18 @@ def get_entities(): print('Redirecting mbids to original entities...') webserver.external.get_entities.get_original_entity() + @cli.command() def apply_replication_changes(): print("\nUpdating musicbrainz schema by applying replication packets...") musicbrainz_importer.apply_replication_changes.main() + +@cli.command() +def evaluate_access_methods(): + print('Evaluating both MusicBrainz database access methods...') + webserver.external.evaluate_db.get() + # Please keep additional sets of commands down there cli.add_command(db.dump_manage.cli, name="dump") diff --git a/webserver/external/evaluate_mbdatabase_access.py b/webserver/external/evaluate_mbdatabase_access.py new file mode 100644 index 000000000..8d7b359cc --- /dev/null +++ b/webserver/external/evaluate_mbdatabase_access.py @@ -0,0 +1,43 @@ +import db +import db.data +from sqlalchemy import text +from brainzutils import musicbrainz_db +import time +import logging + + +def get(): + s_t = time.time() + print "Query directly from AcousticBrainz database for Import db method" + with db.engine.begin() as connection: + query = text(""" + SELECT * + FROM lowlevel + INNER JOIN musicbrainz.recording + ON musicbrainz.recording.gid = lowlevel.gid""" + ) + result = connection.execute(query) + data = result.fetchall() + print time.time()-s_t + + print "Separate queries from AcousticBrainz and MusicBrainz database over the direct connection" + n_t = time.time() + lowlevel_data = 0 + with db.engine.begin() as connection: + query = text(""" + SELECT * + FROM lowlevel + """) + result = connection.execute(query) + lowlevel_data = result.fetchall() + + lowlevel_data = list({value['gid'] for value in lowlevel_data}) + with musicbrainz_db.engine.begin() as connection: + query = text(""" + SELECT * + FROM recording + WHERE recording.gid in :gids + """) + result = connection.execute(query, {"gids": tuple(lowlevel_data)}) + rec_data = result.fetchall() + print time.time() - n_t From 8884214d7a4b75d9704f51af3aa8dce6fc456501 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sat, 11 Aug 2018 18:55:50 +0530 Subject: [PATCH 109/125] Add a limit to the number of recordings fetched --- webserver/external/evaluate_mbdatabase_access.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/webserver/external/evaluate_mbdatabase_access.py b/webserver/external/evaluate_mbdatabase_access.py index 8d7b359cc..25f1d5399 100644 --- a/webserver/external/evaluate_mbdatabase_access.py +++ b/webserver/external/evaluate_mbdatabase_access.py @@ -14,8 +14,9 @@ def get(): SELECT * FROM lowlevel INNER JOIN musicbrainz.recording - ON musicbrainz.recording.gid = lowlevel.gid""" - ) + ON musicbrainz.recording.gid = lowlevel.gid + LIMIT 10000 + """) result = connection.execute(query) data = result.fetchall() print time.time()-s_t @@ -27,6 +28,7 @@ def get(): query = text(""" SELECT * FROM lowlevel + LIMIT 10000 """) result = connection.execute(query) lowlevel_data = result.fetchall() From e58f15a23a9529e1c754488abf3a625b945e75ea Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 12 Aug 2018 02:03:38 +0530 Subject: [PATCH 110/125] Add logging instead of print and change file name in manage.py --- manage.py | 4 ++-- webserver/external/evaluate_mbdatabase_access.py | 14 ++++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/manage.py b/manage.py index 43ee1e534..d03a7a21d 100644 --- a/manage.py +++ b/manage.py @@ -22,7 +22,7 @@ import musicbrainz_importer.apply_replication_changes import webserver.external.get_entities -import webserver.external.evaluate_db +import webserver.external.evaluate_mbdatabase_access ADMIN_SQL_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'admin', 'sql') @@ -213,7 +213,7 @@ def apply_replication_changes(): @cli.command() def evaluate_access_methods(): print('Evaluating both MusicBrainz database access methods...') - webserver.external.evaluate_db.get() + webserver.external.evaluate_mbdatabase_access.get() # Please keep additional sets of commands down there cli.add_command(db.dump_manage.cli, name="dump") diff --git a/webserver/external/evaluate_mbdatabase_access.py b/webserver/external/evaluate_mbdatabase_access.py index 25f1d5399..8405dd910 100644 --- a/webserver/external/evaluate_mbdatabase_access.py +++ b/webserver/external/evaluate_mbdatabase_access.py @@ -7,8 +7,8 @@ def get(): - s_t = time.time() - print "Query directly from AcousticBrainz database for Import db method" + logging.info("Querying directly from AcousticBrainz database for import MB database method...") + start_time = time.time() with db.engine.begin() as connection: query = text(""" SELECT * @@ -19,10 +19,11 @@ def get(): """) result = connection.execute(query) data = result.fetchall() - print time.time()-s_t + first_time_taken = time.time() - start_time + logging.info('Data imported from AcousticBrainz database in %.2f seconds.' % first_time_taken) - print "Separate queries from AcousticBrainz and MusicBrainz database over the direct connection" - n_t = time.time() + logging.info("Separate queries from AcousticBrainz and MusicBrainz databases over the direct connection...") + start_time = time.time() lowlevel_data = 0 with db.engine.begin() as connection: query = text(""" @@ -42,4 +43,5 @@ def get(): """) result = connection.execute(query, {"gids": tuple(lowlevel_data)}) rec_data = result.fetchall() - print time.time() - n_t + second_time_taken = time.time() - start_time + logging.info('Data imported from direct connection to MusicBrainz database in %.2f seconds.' % second_time_taken) From 5adf1a5b33a11408fc427c6ff24ef5ff52340ec9 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 12 Aug 2018 15:38:12 +0530 Subject: [PATCH 111/125] Move queries inside the db module --- db/data.py | 39 +++++++++++++++++++ .../external/evaluate_mbdatabase_access.py | 38 +++++------------- 2 files changed, 48 insertions(+), 29 deletions(-) diff --git a/db/data.py b/db/data.py index f0c367d35..f902f0ddf 100644 --- a/db/data.py +++ b/db/data.py @@ -7,6 +7,7 @@ import db import db.exceptions from flask import current_app +from brainzutils import musicbrainz_db from sqlalchemy import text import sqlalchemy.exc @@ -633,3 +634,41 @@ def write_replication_control(replication_seq): VALUES (:replication_seq) """) connection.execute(query, {'replication_seq': replication_seq}) + + +def load_lowlevel_and_recording_data(): + with db.engine.begin() as connection: + query = text(""" + SELECT * + FROM lowlevel + INNER JOIN musicbrainz.recording + ON musicbrainz.recording.gid = lowlevel.gid + LIMIT 10000 + """) + result = connection.execute(query) + data = result.fetchall() + return data + + +def load_lowlevel_data(): + with db.engine.begin() as connection: + query = text(""" + SELECT * + FROM lowlevel + LIMIT 10000 + """) + result = connection.execute(query) + lowlevel_data = result.fetchall() + return lowlevel_data + + +def load_recording_data_from_MB_db(lowlevel_data): + with musicbrainz_db.engine.begin() as connection: + query = text(""" + SELECT * + FROM recording + WHERE recording.gid in :gids + """) + result = connection.execute(query, {"gids": tuple(lowlevel_data)}) + rec_data = result.fetchall() + return rec_data diff --git a/webserver/external/evaluate_mbdatabase_access.py b/webserver/external/evaluate_mbdatabase_access.py index 8405dd910..d20a633f7 100644 --- a/webserver/external/evaluate_mbdatabase_access.py +++ b/webserver/external/evaluate_mbdatabase_access.py @@ -1,47 +1,27 @@ import db import db.data -from sqlalchemy import text -from brainzutils import musicbrainz_db import time import logging +from sqlalchemy import text +from brainzutils import musicbrainz_db def get(): logging.info("Querying directly from AcousticBrainz database for import MB database method...") start_time = time.time() - with db.engine.begin() as connection: - query = text(""" - SELECT * - FROM lowlevel - INNER JOIN musicbrainz.recording - ON musicbrainz.recording.gid = lowlevel.gid - LIMIT 10000 - """) - result = connection.execute(query) - data = result.fetchall() + + data = db.data.load_lowlevel_and_recording_data() + first_time_taken = time.time() - start_time logging.info('Data imported from AcousticBrainz database in %.2f seconds.' % first_time_taken) logging.info("Separate queries from AcousticBrainz and MusicBrainz databases over the direct connection...") start_time = time.time() - lowlevel_data = 0 - with db.engine.begin() as connection: - query = text(""" - SELECT * - FROM lowlevel - LIMIT 10000 - """) - result = connection.execute(query) - lowlevel_data = result.fetchall() + lowlevel_data = db.data.load_lowlevel_data() lowlevel_data = list({value['gid'] for value in lowlevel_data}) - with musicbrainz_db.engine.begin() as connection: - query = text(""" - SELECT * - FROM recording - WHERE recording.gid in :gids - """) - result = connection.execute(query, {"gids": tuple(lowlevel_data)}) - rec_data = result.fetchall() + + recording_data = db.data.load_recording_data_from_MB_db(lowlevel_data) + second_time_taken = time.time() - start_time logging.info('Data imported from direct connection to MusicBrainz database in %.2f seconds.' % second_time_taken) From bf6873a6c68029335b3b8392c7c0205576c16430 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 12 Aug 2018 15:58:31 +0530 Subject: [PATCH 112/125] Add docstrings for the functions moved to db module --- db/data.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/db/data.py b/db/data.py index f902f0ddf..bac9f655c 100644 --- a/db/data.py +++ b/db/data.py @@ -637,6 +637,13 @@ def write_replication_control(replication_seq): def load_lowlevel_and_recording_data(): + """Fetch data in which gid column value is present in both lowlevel + and musicbrainz.recording table from AcousticBrainz database. + + Returns: + data (of type - sqlalchemy.resultproxy): data retrieved + from the lowlevel and recording table. + """ with db.engine.begin() as connection: query = text(""" SELECT * @@ -651,6 +658,12 @@ def load_lowlevel_and_recording_data(): def load_lowlevel_data(): + """Fetch lowlevel data from AcousticBrainz database. + + Returns: + lowlevel_data (of type - sqlalchemy.resultproxy): data retrieved + from lowlevel table. + """ with db.engine.begin() as connection: query = text(""" SELECT * @@ -663,6 +676,17 @@ def load_lowlevel_data(): def load_recording_data_from_MB_db(lowlevel_data): + """Fetch recording data from MusicBrainz database over the + direct connection whose gid matches with those in lowlevel + table in AB database. + + Args: + lowlevel_data: list of gids of the data present in lowlevel table. + + Returns: + rec_data (of type - sqlalchemy.resultproxy): data retrieved + from recording table of MusicBrainz database. + """ with musicbrainz_db.engine.begin() as connection: query = text(""" SELECT * From f7fa0766f87243c3f7814191bc99010126df42e0 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 12 Aug 2018 18:20:37 +0530 Subject: [PATCH 113/125] Evaluate a functionality of mbid redirects for both db access methods --- db/data.py | 24 ++++++++++++++++++ manage.py | 4 +-- .../external/evaluate_mbdatabase_access.py | 2 +- webserver/external/get_entities.py | 25 +++++++++++++++++-- 4 files changed, 50 insertions(+), 5 deletions(-) diff --git a/db/data.py b/db/data.py index bac9f655c..cfd167ae1 100644 --- a/db/data.py +++ b/db/data.py @@ -636,6 +636,30 @@ def write_replication_control(replication_seq): connection.execute(query, {'replication_seq': replication_seq}) +def get_mbids_from_gid_redirect_tables_from_MB_db(): + """Fetch mbids from recording gid redirect table of MusicBrainz + database over the direct connection and calls function + get_original_entity to get the redirected result. + + Returns: + Dictionary containing the redirected original entity ids with MBIDs as keys. + - mbid: Recording mbids of the entities + - id: Original redirected ids of the entities after mbid redirect + """ + with musicbrainz_db.engine.begin() as connection: + query = text(""" + SELECT gid + FROM musicbrainz.recording_gid_redirect + """) + result = connection.execute(query) + mbids = result.fetchall() + + recording_mbids = [] + for mbid in mbids: + recording_mbids.append(str(mbid[0])) + return recording_mbids + + def load_lowlevel_and_recording_data(): """Fetch data in which gid column value is present in both lowlevel and musicbrainz.recording table from AcousticBrainz database. diff --git a/manage.py b/manage.py index d03a7a21d..33e64f996 100644 --- a/manage.py +++ b/manage.py @@ -201,7 +201,7 @@ def import_musicbrainz_db(): @cli.command() def get_entities(): print('Redirecting mbids to original entities...') - webserver.external.get_entities.get_original_entity() + webserver.external.get_entities.main() @cli.command() @@ -213,7 +213,7 @@ def apply_replication_changes(): @cli.command() def evaluate_access_methods(): print('Evaluating both MusicBrainz database access methods...') - webserver.external.evaluate_mbdatabase_access.get() + webserver.external.evaluate_mbdatabase_access.get_AB_and_MB_tables_data() # Please keep additional sets of commands down there cli.add_command(db.dump_manage.cli, name="dump") diff --git a/webserver/external/evaluate_mbdatabase_access.py b/webserver/external/evaluate_mbdatabase_access.py index d20a633f7..9027fd536 100644 --- a/webserver/external/evaluate_mbdatabase_access.py +++ b/webserver/external/evaluate_mbdatabase_access.py @@ -6,7 +6,7 @@ from brainzutils import musicbrainz_db -def get(): +def get_AB_and_MB_tables_data(): logging.info("Querying directly from AcousticBrainz database for import MB database method...") start_time = time.time() diff --git a/webserver/external/get_entities.py b/webserver/external/get_entities.py index e9ccb4bfc..722c6c940 100644 --- a/webserver/external/get_entities.py +++ b/webserver/external/get_entities.py @@ -1,12 +1,14 @@ import db import db.data +import time +import logging from sqlalchemy import text from brainzutils.musicbrainz_db import mb_session from brainzutils.musicbrainz_db.utils import get_entities_by_gids from mbdata.models import Recording -def get_original_entity(): +def get_original_entity(database): """Get original entity information after applying MBID redirect to many mbids. @@ -17,7 +19,10 @@ def get_original_entity(): - mbid: Recording mbids of the entities - id: Original redirected ids of the entities after mbid redirect """ - mbids = db.data.get_mbids_from_gid_redirect_tables() + if database == 'MB': + mbids = db.data.get_mbids_from_gid_redirect_tables_from_MB_db() + else: + mbids = db.data.get_mbids_from_gid_redirect_tables() with mb_session() as mb_db: query = mb_db.query(Recording) @@ -33,3 +38,19 @@ def get_original_entity(): gids_with_redirected_ids = dict(zip(recording_gids, recording_ids)) return gids_with_redirected_ids + + +def main(): + start_time = time.time() + + gids_with_redirected_ids = get_original_entity('AB') + + first_time_taken = time.time() - start_time + logging.info('Data imported from AcousticBrainz database in %.2f seconds.' % first_time_taken) + + start_time = time.time() + + gids_with_redirected_ids = get_original_entity('AB') + + second_time_taken = time.time() - start_time + logging.info('Data imported from direct connection to MusicBrainz database in %.2f seconds.' % second_time_taken) From 6c42c4a468797244e60e22577b19fd2db919c346 Mon Sep 17 00:00:00 2001 From: rsh7 Date: Sun, 12 Aug 2018 19:27:27 +0530 Subject: [PATCH 114/125] Add comments stating the explainations of some lines of code --- webserver/external/evaluate_mbdatabase_access.py | 2 ++ webserver/external/get_entities.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/webserver/external/evaluate_mbdatabase_access.py b/webserver/external/evaluate_mbdatabase_access.py index 9027fd536..ed1ba8358 100644 --- a/webserver/external/evaluate_mbdatabase_access.py +++ b/webserver/external/evaluate_mbdatabase_access.py @@ -7,6 +7,7 @@ def get_AB_and_MB_tables_data(): + # Testing with the AcousticBrainz database tables (import MB db method). logging.info("Querying directly from AcousticBrainz database for import MB database method...") start_time = time.time() @@ -15,6 +16,7 @@ def get_AB_and_MB_tables_data(): first_time_taken = time.time() - start_time logging.info('Data imported from AcousticBrainz database in %.2f seconds.' % first_time_taken) + # Testing with both AcousticBrainz & MusicBrainz database tables (the direct connection method). logging.info("Separate queries from AcousticBrainz and MusicBrainz databases over the direct connection...") start_time = time.time() diff --git a/webserver/external/get_entities.py b/webserver/external/get_entities.py index 722c6c940..0cf8b0847 100644 --- a/webserver/external/get_entities.py +++ b/webserver/external/get_entities.py @@ -41,6 +41,7 @@ def get_original_entity(database): def main(): + # Testing with the MusicBrainz schema in AB start_time = time.time() gids_with_redirected_ids = get_original_entity('AB') @@ -48,6 +49,7 @@ def main(): first_time_taken = time.time() - start_time logging.info('Data imported from AcousticBrainz database in %.2f seconds.' % first_time_taken) + # Testing with the original MusicBrainz database over the direct connection start_time = time.time() gids_with_redirected_ids = get_original_entity('AB') From f64afa39e2f3db8bb411576976ba68d4435aa937 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Wed, 9 Jun 2021 14:09:42 +0530 Subject: [PATCH 115/125] Fix requirements.txt after merge --- requirements.txt | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5babfdd1a..724b08214 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,43 +1,27 @@ -git+https://github.com/metabrainz/brainzutils-python.git@v1.6.0 +git+https://github.com/metabrainz/brainzutils-python.git@v1.18.1 click == 6.7 coverage == 4.5.1 Fabric == 1.14.0 -Flask-Admin == 1.5.1 -Flask-Login == 0.4.1 -Flask-SQLAlchemy == 2.3.2 -Flask-Testing == 0.7.1 -Flask-UUID == 0.2 -Flask-WTF == 0.14.2 -Jinja2 == 2.10 -mock == 2.0.0 -musicbrainzngs == 0.6 -ndg-httpsclient==0.4.4 -psycopg2 == 2.7.4 -pytz==2018.3 -pytest==3.4.2 -pytest-cov==2.5.1 -pyyaml == 3.11 -git+https://github.com/metabrainz/brainzutils-python.git@v1.18.1 Flask-Admin==1.5.6 Flask-Login==0.5.0 Flask-SQLAlchemy==2.4.1 Flask-Testing==0.8.0 +Flask-UUID==0.2 Flask-WTF == 0.14.3 -futures==3.3.0 +Jinja2==2.11.2 mock==3.0.5 musicbrainzngs==0.7.1 ndg-httpsclient==0.5.1 psycopg2-binary==2.8.5 pytz==2019.3 pyyaml==5.3.1 +futures==3.3.0 rauth == 0.7.3 setproctitle == 1.1.10 six==1.14.0 Flask==1.1.2 -Jinja2==2.11.2 werkzeug==1.0.1 Flask-DebugToolbar==0.11.0 -Flask-UUID==0.2 sentry-sdk[flask]==0.20.3 certifi redis==3.4.1 From 8f78f6065acbb56b1a0fc5276f2d4ae9f1ded8df Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Wed, 9 Jun 2021 14:54:43 +0530 Subject: [PATCH 116/125] Fix command name --- db/data.py | 2 +- manage.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/db/data.py b/db/data.py index 65bce89d0..c93348e3e 100644 --- a/db/data.py +++ b/db/data.py @@ -1057,7 +1057,7 @@ def load_recording_data_from_MB_db(lowlevel_data): Args: lowlevel_data: list of gids of the data present in lowlevel table. - Returns: + Returns:0 rec_data (of type - sqlalchemy.resultproxy): data retrieved from recording table of MusicBrainz database. """ diff --git a/manage.py b/manage.py index 636d17554..bdbca0066 100644 --- a/manage.py +++ b/manage.py @@ -94,7 +94,7 @@ def init_db(archive, force, skip_create_db=False): current_app.logger.info("Done!") -@cli.command(name='import_data') +@cli.command(name='init_mb_db') @click.option("--drop-constraints", "-d", is_flag=True, help="Drop primary and foreign keys before importing.") @click.option("--force", "-f", is_flag=True, help="Drop existing MusicBrainz schema and tables.") def init_mb_db(force): From a3ab871913dd097e50f7f0e76ed9390e26062f44 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Wed, 9 Jun 2021 14:58:26 +0530 Subject: [PATCH 117/125] Add missing keyword argument to command --- manage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manage.py b/manage.py index bdbca0066..ddb7f5585 100644 --- a/manage.py +++ b/manage.py @@ -97,7 +97,7 @@ def init_db(archive, force, skip_create_db=False): @cli.command(name='init_mb_db') @click.option("--drop-constraints", "-d", is_flag=True, help="Drop primary and foreign keys before importing.") @click.option("--force", "-f", is_flag=True, help="Drop existing MusicBrainz schema and tables.") -def init_mb_db(force): +def init_mb_db(drop_constraints, force): """Initialize the MusicBrainz database. This process involves several steps: From dd3c834090de37ec32abd19fe2ad96420b7139b0 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Wed, 9 Jun 2021 18:04:50 +0530 Subject: [PATCH 118/125] Fix write_language query --- db/import_mb_data.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index 11c6bbacd..bf29b0027 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -1420,12 +1420,13 @@ def write_language(connection, MB_language_data): ON CONFLICT (iso_code_2b) DO NOTHING """) values = [{ - "iso_code_2t": value[0], - "iso_code_2b": value[1], - "iso_code_1": value[2], - "name": value[3], - "frequency": value[4], - "iso_code_3": value[5]} for value in MB_language_data + "id": value[0], + "iso_code_2t": value[1], + "iso_code_2b": value[2], + "iso_code_1": value[3], + "name": value[4], + "frequency": value[5], + "iso_code_3": value[6]} for value in MB_language_data ] connection.execute(language_query, values) logging.info('Inserted %d rows in language table!' % len(MB_language_data)) From 92f3ab5ebe0785dc8098b7f1ac4cdeb55ab697f7 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Wed, 9 Jun 2021 18:22:15 +0530 Subject: [PATCH 119/125] Add :id to language insert query --- db/import_mb_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/import_mb_data.py b/db/import_mb_data.py index bf29b0027..352b46a62 100644 --- a/db/import_mb_data.py +++ b/db/import_mb_data.py @@ -1416,7 +1416,7 @@ def write_language(connection, MB_language_data): """ language_query = text(""" INSERT INTO musicbrainz.language - VALUES (:iso_code_2t, :iso_code_2b, :iso_code_1, :name, :frequency, :iso_code_3) + VALUES (:id, :iso_code_2t, :iso_code_2b, :iso_code_1, :name, :frequency, :iso_code_3) ON CONFLICT (iso_code_2b) DO NOTHING """) values = [{ From 97f24c82955717e7cb1957f78a946e614f728ede Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Wed, 9 Jun 2021 20:08:40 +0530 Subject: [PATCH 120/125] Drop constraints before importing data --- manage.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/manage.py b/manage.py index ddb7f5585..5286fd8ab 100644 --- a/manage.py +++ b/manage.py @@ -121,12 +121,13 @@ def init_mb_db(drop_constraints, force): print('Creating MusicBrainz tables...') db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_tables.sql')) - print('Creating MusicBrainz primary and foreign keys...') - db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_primary_keys.sql')) - db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_foreign_keys.sql')) + if not drop_constraints: + print('Creating MusicBrainz primary and foreign keys...') + db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_primary_keys.sql')) + db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_foreign_keys.sql')) - print('Creating MusicBrainz indexes...') - db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_indexes.sql')) + print('Creating MusicBrainz indexes...') + db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_indexes.sql')) print("Done!") From f9c80c958292f646af8a83a80ae59f3f194f254a Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Wed, 9 Jun 2021 20:34:44 +0530 Subject: [PATCH 121/125] Create PKs always --- manage.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/manage.py b/manage.py index 5286fd8ab..0bebfc730 100644 --- a/manage.py +++ b/manage.py @@ -121,9 +121,11 @@ def init_mb_db(drop_constraints, force): print('Creating MusicBrainz tables...') db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_tables.sql')) + print('Creating MusicBrainz primary keys...') + db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_primary_keys.sql')) + if not drop_constraints: - print('Creating MusicBrainz primary and foreign keys...') - db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_primary_keys.sql')) + print('Creating MusicBrainz foreign keys...') db.run_sql_script(os.path.join(ADMIN_SQL_DIR, 'create_musicbrainz_foreign_keys.sql')) print('Creating MusicBrainz indexes...') From 8624df1ba3352113d7c786567fb43d68b0095765 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Thu, 10 Jun 2021 17:08:32 +0530 Subject: [PATCH 122/125] Separate access methods to evaluate separately --- manage.py | 13 ++++++++++++- webserver/external/evaluate_mbdatabase_access.py | 8 +++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/manage.py b/manage.py index 0bebfc730..d8f1cb2dd 100644 --- a/manage.py +++ b/manage.py @@ -321,7 +321,18 @@ def apply_replication_changes(): @cli.command() def evaluate_access_methods(): print('Evaluating both MusicBrainz database access methods...') - webserver.external.evaluate_mbdatabase_access.get_AB_and_MB_tables_data() + webserver.external.evaluate_mbdatabase_access.get_AB_and_MB_imported() + webserver.external.evaluate_mbdatabase_access.get_AB_and_MB_direct() + + +@cli.command() +def evaluate_import(): + webserver.external.evaluate_mbdatabase_access.get_AB_and_MB_imported() + + +@cli.command() +def evaluate_direct(): + webserver.external.evaluate_mbdatabase_access.get_AB_and_MB_direct() # Please keep additional sets of commands down there cli.add_command(db.dump_manage.cli, name="dump") diff --git a/webserver/external/evaluate_mbdatabase_access.py b/webserver/external/evaluate_mbdatabase_access.py index ed1ba8358..82a673891 100644 --- a/webserver/external/evaluate_mbdatabase_access.py +++ b/webserver/external/evaluate_mbdatabase_access.py @@ -6,7 +6,7 @@ from brainzutils import musicbrainz_db -def get_AB_and_MB_tables_data(): +def get_AB_and_MB_imported(): # Testing with the AcousticBrainz database tables (import MB db method). logging.info("Querying directly from AcousticBrainz database for import MB database method...") start_time = time.time() @@ -14,8 +14,10 @@ def get_AB_and_MB_tables_data(): data = db.data.load_lowlevel_and_recording_data() first_time_taken = time.time() - start_time - logging.info('Data imported from AcousticBrainz database in %.2f seconds.' % first_time_taken) + logging.info('Data imported from AcousticBrainz database in %.2f seconds.' % first_time_taken) + +def get_AB_and_MB_direct(): # Testing with both AcousticBrainz & MusicBrainz database tables (the direct connection method). logging.info("Separate queries from AcousticBrainz and MusicBrainz databases over the direct connection...") start_time = time.time() @@ -26,4 +28,4 @@ def get_AB_and_MB_tables_data(): recording_data = db.data.load_recording_data_from_MB_db(lowlevel_data) second_time_taken = time.time() - start_time - logging.info('Data imported from direct connection to MusicBrainz database in %.2f seconds.' % second_time_taken) + logging.info('Data imported from direct connection to MusicBrainz database in %.2f seconds.' % second_time_taken) From afcf8268d77451e06fc62e28cbecca19fcd01a5f Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Thu, 10 Jun 2021 18:08:29 +0530 Subject: [PATCH 123/125] Add method to fetch data directly from AB as well --- db/data.py | 23 +++++++++++++++++++ manage.py | 12 +++++++--- .../external/evaluate_mbdatabase_access.py | 14 +++++++++++ 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/db/data.py b/db/data.py index c93348e3e..6c9b7ccc2 100644 --- a/db/data.py +++ b/db/data.py @@ -1070,3 +1070,26 @@ def load_recording_data_from_MB_db(lowlevel_data): result = connection.execute(query, {"gids": tuple(lowlevel_data)}) rec_data = result.fetchall() return rec_data + + +def load_recording_data_from_AB_db(lowlevel_data): + """Fetch recording data from MusicBrainz database over the + direct connection whose gid matches with those in lowlevel + table in AB database. + + Args: + lowlevel_data: list of gids of the data present in lowlevel table. + + Returns:0 + rec_data (of type - sqlalchemy.resultproxy): data retrieved + from recording table of MusicBrainz database. + """ + with db.engine.begin() as connection: + query = text(""" + SELECT * + FROM musicbrainz.recording + WHERE musicbrainz.recording.gid in :gids + """) + result = connection.execute(query, {"gids": tuple(lowlevel_data)}) + rec_data = result.fetchall() + return rec_data diff --git a/manage.py b/manage.py index d8f1cb2dd..5c43a1986 100644 --- a/manage.py +++ b/manage.py @@ -318,22 +318,28 @@ def apply_replication_changes(): musicbrainz_importer.apply_replication_changes.main() -@cli.command() +@cli.command(help="Time imported data from AB first, then time data by directly accessing AB and MB") def evaluate_access_methods(): print('Evaluating both MusicBrainz database access methods...') webserver.external.evaluate_mbdatabase_access.get_AB_and_MB_imported() webserver.external.evaluate_mbdatabase_access.get_AB_and_MB_direct() -@cli.command() +@cli.command(help="Time imported data from AB") def evaluate_import(): webserver.external.evaluate_mbdatabase_access.get_AB_and_MB_imported() -@cli.command() +@cli.command(help="Time data by directly accessing AB and MB") def evaluate_direct(): webserver.external.evaluate_mbdatabase_access.get_AB_and_MB_direct() + +@cli.command(help="Time data by directly accessing but only AB") +def evaluate_direct_AB_only(): + webserver.external.evaluate_mbdatabase_access.get_AB_only_direct() + + # Please keep additional sets of commands down there cli.add_command(db.dump_manage.cli, name="dump") diff --git a/webserver/external/evaluate_mbdatabase_access.py b/webserver/external/evaluate_mbdatabase_access.py index 82a673891..65f79d707 100644 --- a/webserver/external/evaluate_mbdatabase_access.py +++ b/webserver/external/evaluate_mbdatabase_access.py @@ -29,3 +29,17 @@ def get_AB_and_MB_direct(): second_time_taken = time.time() - start_time logging.info('Data imported from direct connection to MusicBrainz database in %.2f seconds.' % second_time_taken) + + +def get_AB_only_direct(): + # Testing with both AcousticBrainz & MusicBrainz database tables (the direct connection method). + logging.info("Separate queries from AcousticBrainz and MusicBrainz databases over the direct connection...") + start_time = time.time() + + lowlevel_data = db.data.load_lowlevel_data() + lowlevel_data = list({value['gid'] for value in lowlevel_data}) + + recording_data = db.data.load_recording_data_from_AB_db(lowlevel_data) + + second_time_taken = time.time() - start_time + logging.info('Data imported from direct connection to MusicBrainz database in %.2f seconds.' % second_time_taken) From de998f5d472d4558f3408e28179317931441b506 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Thu, 10 Jun 2021 18:40:09 +0530 Subject: [PATCH 124/125] Add method to evaluate fetch using exists --- db/data.py | 24 +++++++++++++++++++ manage.py | 5 ++++ .../external/evaluate_mbdatabase_access.py | 11 +++++++++ 3 files changed, 40 insertions(+) diff --git a/db/data.py b/db/data.py index 6c9b7ccc2..7d3f2e952 100644 --- a/db/data.py +++ b/db/data.py @@ -1093,3 +1093,27 @@ def load_recording_data_from_AB_db(lowlevel_data): result = connection.execute(query, {"gids": tuple(lowlevel_data)}) rec_data = result.fetchall() return rec_data + + +def load_lowlevel_and_recording_data_using_exists(): + """Fetch data in which gid column value is present in both lowlevel + and musicbrainz.recording table from AcousticBrainz database. + + Returns: + data (of type - sqlalchemy.resultproxy): data retrieved + from the lowlevel and recording table. + """ + with db.engine.begin() as connection: + query = text(""" + SELECT * + FROM lowlevel + WHERE EXISTS ( + SELECT musicbrainz.recording.gid + FROM musicbrainz.recording + WHERE musicbrainz.recording.gid = lowlevel.gid + ) + LIMIT 10000 + """) + result = connection.execute(query) + data = result.fetchall() + return data diff --git a/manage.py b/manage.py index 5c43a1986..dae4e8d4f 100644 --- a/manage.py +++ b/manage.py @@ -340,6 +340,11 @@ def evaluate_direct_AB_only(): webserver.external.evaluate_mbdatabase_access.get_AB_only_direct() +@cli.command(help="Time data by importing but using exists clause") +def evaluate_import_exists(): + webserver.external.evaluate_mbdatabase_access.get_AB_only_direct() + + # Please keep additional sets of commands down there cli.add_command(db.dump_manage.cli, name="dump") diff --git a/webserver/external/evaluate_mbdatabase_access.py b/webserver/external/evaluate_mbdatabase_access.py index 65f79d707..a12c5fb30 100644 --- a/webserver/external/evaluate_mbdatabase_access.py +++ b/webserver/external/evaluate_mbdatabase_access.py @@ -43,3 +43,14 @@ def get_AB_only_direct(): second_time_taken = time.time() - start_time logging.info('Data imported from direct connection to MusicBrainz database in %.2f seconds.' % second_time_taken) + + +def get_AB_and_MB_imported_with_exists(): + # Testing with both AcousticBrainz & MusicBrainz database tables (the direct connection method). + logging.info("Querying directly from AcousticBrainz database for import MB database method using EXISTS clause...") + start_time = time.time() + + data = db.data.load_lowlevel_and_recording_data_using_exists() + + second_time_taken = time.time() - start_time + logging.info('Data imported from direct connection to MusicBrainz database in %.2f seconds.' % second_time_taken) From d5f3250cfc7aa7ca25435d2bfbf8684b2eb520de Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Wed, 16 Jun 2021 12:21:00 +0530 Subject: [PATCH 125/125] Add commands to test with a dataset --- db/data.py | 36 +++++++++++++++++ manage.py | 17 ++++++++ .../external/evaluate_mbdatabase_access.py | 39 +++++++++++++++++++ 3 files changed, 92 insertions(+) diff --git a/db/data.py b/db/data.py index 7d3f2e952..f20a7f2bc 100644 --- a/db/data.py +++ b/db/data.py @@ -1117,3 +1117,39 @@ def load_lowlevel_and_recording_data_using_exists(): result = connection.execute(query) data = result.fetchall() return data + +def load_lowlevel_data_from_dataset(dataset_id): + with db.engine.begin() as connection: + query = text(""" + SELECT lowlevel.*, musicbrainz.recording.* + FROM lowlevel + INNER JOIN musicbrainz.recording + ON musicbrainz.recording.gid = lowlevel.gid + INNER JOIN dataset_class_member + ON dataset_class_member.mbid = lowlevel.gid + INNER JOIN dataset_class + ON dataset_class.id = dataset_class_member.class + INNER JOIN dataset + ON dataset.id = dataset_class.dataset + WHERE dataset.id = :id + LIMIT 10000 + """) + result = connection.execute(query, {"id": dataset_id}) + data = result.fetchall() + return data + +def get_all_recordings_in_dataset(dataset_id): + with db.engine.begin() as connection: + query = text(""" + SELECT * + FROM dataset_class_member + INNER JOIN dataset_class + ON dataset_class.id = dataset_class_member.class + INNER JOIN dataset + ON dataset.id = dataset_class.dataset + WHERE dataset.id = :id + LIMIT 10000 + """) + result = connection.execute(query, {"id": dataset_id}) + data = result.fetchall() + return data diff --git a/manage.py b/manage.py index dae4e8d4f..ca7513332 100644 --- a/manage.py +++ b/manage.py @@ -344,6 +344,23 @@ def evaluate_direct_AB_only(): def evaluate_import_exists(): webserver.external.evaluate_mbdatabase_access.get_AB_only_direct() +@cli.command(help="Time imported data from AB using given dataset") +@click.argument("dataset", required=True) +def evaluate_import_dataset(dataset): + webserver.external.evaluate_mbdatabase_access.get_AB_and_MB_imported_from_dataset(dataset) + + +@cli.command(help="Time data by directly accessing AB and MB using given dataset") +@click.argument("dataset", required=True) +def evaluate_direct_dataset(dataset): + webserver.external.evaluate_mbdatabase_access.get_AB_and_MB_direct_from_dataset(dataset) + + +@cli.command(help="Time data by directly accessing but only AB using given dataset") +@click.argument("dataset", required=True) +def evaluate_direct_AB_only_dataset(dataset): + webserver.external.evaluate_mbdatabase_access.get_AB_only_direct_from_dataset(dataset) + # Please keep additional sets of commands down there cli.add_command(db.dump_manage.cli, name="dump") diff --git a/webserver/external/evaluate_mbdatabase_access.py b/webserver/external/evaluate_mbdatabase_access.py index a12c5fb30..3306bdd33 100644 --- a/webserver/external/evaluate_mbdatabase_access.py +++ b/webserver/external/evaluate_mbdatabase_access.py @@ -54,3 +54,42 @@ def get_AB_and_MB_imported_with_exists(): second_time_taken = time.time() - start_time logging.info('Data imported from direct connection to MusicBrainz database in %.2f seconds.' % second_time_taken) + + +def get_AB_and_MB_imported_from_dataset(dataset): + # Testing with the AcousticBrainz database tables (import MB db method). + logging.info("Querying directly from AcousticBrainz database for import MB database method...") + start_time = time.time() + + data = db.data.load_lowlevel_data_from_dataset(dataset) + + first_time_taken = time.time() - start_time + logging.info('Data imported from AcousticBrainz database in %.2f seconds.' % first_time_taken) + + +def get_AB_and_MB_direct_from_dataset(dataset): + # Testing with both AcousticBrainz & MusicBrainz database tables (the direct connection method). + logging.info("Separate queries from AcousticBrainz and MusicBrainz databases over the direct connection...") + start_time = time.time() + + lowlevel_data = db.data.get_all_recordings_in_dataset(dataset) + lowlevel_data = list({value['mbid'] for value in lowlevel_data}) + + recording_data = db.data.load_recording_data_from_MB_db(lowlevel_data) + + second_time_taken = time.time() - start_time + logging.info('Data imported from direct connection to MusicBrainz database in %.2f seconds.' % second_time_taken) + + +def get_AB_only_direct_from_dataset(dataset): + # Testing with both AcousticBrainz & MusicBrainz database tables (the direct connection method). + logging.info("Separate queries from AcousticBrainz and MusicBrainz databases over the direct connection...") + start_time = time.time() + + lowlevel_data = db.data.get_all_recordings_in_dataset(dataset) + lowlevel_data = list({value['mbid'] for value in lowlevel_data}) + + recording_data = db.data.load_recording_data_from_AB_db(lowlevel_data) + + second_time_taken = time.time() - start_time + logging.info('Data imported from direct connection to MusicBrainz database in %.2f seconds.' % second_time_taken)