diff --git a/openapi.yaml b/openapi.yaml index 4ba06ad..d50f003 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -373,8 +373,10 @@ components: description: Information for a given tag example: tag: - - date_time: '2022-04-01T04:58:21.319061+00:00' + - dataset_id: 1 + date_time: '2022-04-01T04:58:21.319061+00:00' filename: 'eTUFF-sailfish-117259_2.txt' + hash_sha256: 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b977' metadata: person_owner: John Do owner_contect: john@do.net @@ -385,8 +387,10 @@ components: submission_id: 5 tag_id: 3 version: '1' - - date_time: '2022-06-01T05:39:46.896088+00:00' + - dataset_id: 1 + date_time: '2022-06-01T05:39:46.896088+00:00' filename: 'eTUFF-sailfish-117259_2.txt' + hash_sha256: 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' metadata: person_owner: Jane Do owner_contect: jane@do.net @@ -437,6 +441,10 @@ components: TagSubmission: type: object properties: + dataset_id: + description: The primary key from the Dataset relation + example: 1 + type: integer date_time: description: Local datetime stamp at the time of eTUFF tag data file ingestion example: '2022-04-01T04:58:21.319061+00:00' @@ -445,6 +453,10 @@ components: type: string filename: $ref: '#/components/schemas/filename' + hash_sha256: + description: SHA256 hash representing the contents of the submission eTUFF file + example: 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' + type: string metadata: $ref: '#/components/schemas/Metadata' notes: diff --git a/services/postgis/tagbase_schema.sql b/services/postgis/tagbase_schema.sql index ce2d45c..78f5689 100644 --- a/services/postgis/tagbase_schema.sql +++ b/services/postgis/tagbase_schema.sql @@ -175,8 +175,7 @@ CREATE TABLE data_position ( submission_id bigint NOT NULL, tag_id bigint NOT NULL, argos_location_class character varying(1), - solution_id integer NOT NULL DEFAULT 1, - flag_as_reference integer NOT NULL DEFAULT 0 + flag_as_reference boolean NOT NULL DEFAULT False ); @@ -228,7 +227,7 @@ COMMENT ON COLUMN data_position.lon_err IS 'Error associated with the tag record -- Name: COLUMN data_position.submission_id; Type: COMMENT; Schema: public; Owner: postgres -- -COMMENT ON COLUMN data_position.submission_id IS 'Unique numeric ID assigned upon submission of a tag eTUFF data file for ingest/importation into Tagbase'; +COMMENT ON COLUMN data_position.submission_id IS 'PROXY FOR SUBMISSION - Unique numeric ID assigned upon submission of a tag eTUFF data file for ingest/importation into Tagbase'; -- @@ -247,18 +246,11 @@ COMMENT ON COLUMN data_position.argos_location_class IS 'ARGOS Location Class co https://www.argos-system.org/wp-content/uploads/2016/08/r363_9_argos_users_manual-v1.6.6.pdf , page 13.'; --- --- Name: COLUMN data_position.solution_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_position.solution_id IS 'Unique numeric identifier for a given tag geolocation dataset solution. solution_id=1 is assigned to the primary or approved solution. Incremented solution_id''s assigned to other positional dataset solutions for a given tag_id and submission_id'; - - -- -- Name: COLUMN data_position.flag_as_reference; Type: COMMENT; Schema: public; Owner: postgres -- -COMMENT ON COLUMN data_position.flag_as_reference IS 'Integer (representing psudo boolean value) flag field which identifies whether positional data associated with a given Tag and Track solution are considered to be coordinates of the "Reference" track (ie. best solution currently). Coordinate record takes 1 if it is part of the Reference track or 0 if it is not.'; +COMMENT ON COLUMN data_position.flag_as_reference IS 'Identifies whether positional data associated with a given Tag and Track solution are considered to be coordinates of the "Reference" track (ie. best solution currently). The coordinate record makes used of pseudo-boolean values of 1 if it is part of the Reference track or 0 if it is not.'; -- -- Name: data_profile; Type: TABLE; Schema: public; Owner: postgres @@ -447,63 +439,6 @@ COMMENT ON COLUMN metadata.attribute_value IS 'Value associated with the given e COMMENT ON COLUMN metadata.tag_id IS 'Unique numeric Tag ID associated with the ingested tag data file'; --- --- Name: metadata_position; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE metadata_position ( - submission_id bigint NOT NULL, - attribute_id bigint NOT NULL, - attribute_value text NOT NULL, - tag_id bigint NOT NULL, - solution_id integer NOT NULL DEFAULT 1 -); - - -ALTER TABLE metadata_position OWNER TO postgres; - --- --- Name: TABLE metadata_position; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON TABLE metadata_position IS 'Contains the ingested tag metadata consistent with the eTUFF specification'; - - --- --- Name: COLUMN metadata_position.submission_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_position.submission_id IS 'Unique numeric ID assigned upon submission of a tag eTUFF data file for ingest/importation into Tagbase'; - - --- --- Name: COLUMN metadata_position.attribute_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_position.attribute_id IS 'Unique numeric metadata attribute ID based on the eTUFF metadata specification'; - - --- --- Name: COLUMN metadata_position.attribute_value; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_position.attribute_value IS 'Value associated with the given eTUFF metadata attribute'; - - --- --- Name: COLUMN metadata_position.tag_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_position.tag_id IS 'Unique numeric Tag ID associated with the ingested tag data file'; - - --- --- Name: COLUMN metadata_position.solution_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_position.solution_id IS 'Unique numeric identifier for a given tag geolocation dataset solution. solution_id=1 is assigned to the primary or approved solution. Incremented solution_id''s assigned to other positional dataset solutions for a given tag_id and submission_id'; - - -- -- Name: metadata_types; Type: TABLE; Schema: public; Owner: postgres -- @@ -727,6 +662,50 @@ COMMENT ON COLUMN proc_observations.submission_id IS 'Unique numeric ID assigned COMMENT ON COLUMN proc_observations.tag_id IS 'Unique numeric Tag ID associated with the ingested tag data file'; +CREATE TABLE dataset ( + dataset_id bigint NOT NULL, + instrument_name character varying(50), + serial_number character varying(50), + ptt character varying(50), + platform character varying(50) +); + +ALTER TABLE dataset OWNER TO postgres; + + +COMMENT ON COLUMN dataset.dataset_id IS 'Unique numeric ID assigned upon insertion of the other unique attributes belonging to this entity'; +COMMENT ON COLUMN dataset.instrument_name IS 'A unique instrument name, made clear to the end user that it is the primary identifier, e.g., iccat_gbyp0008'; +COMMENT ON COLUMN dataset.serial_number IS 'A the device internal ID, e.g., 18P0201'; +COMMENT ON COLUMN dataset.ptt IS 'A satellite platform ID, e.g., 62342'; +COMMENT ON COLUMN dataset.platform IS 'The species code/common name on which the device was deployed, e.g., Thunnus thynnus'; + +-- +-- Name: TABLE dataset; Type: COMMENT; Schema: public; Owner: postgres +-- + +COMMENT ON TABLE dataset IS 'Contains the attributes for defining a dataset'; + +-- +-- Name: dataset_dataset_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres +-- + +CREATE SEQUENCE dataset_dataset_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER TABLE dataset_dataset_id_seq OWNER TO postgres; + +-- +-- Name: dataset_dataset_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres +-- + +ALTER SEQUENCE dataset_dataset_id_seq OWNED BY dataset.dataset_id; + +ALTER TABLE ONLY dataset ALTER COLUMN dataset_id SET DEFAULT nextval('dataset_dataset_id_seq'::regclass); -- -- Name: submission; Type: TABLE; Schema: public; Owner: postgres @@ -739,7 +718,8 @@ CREATE TABLE submission ( filename text NOT NULL, version character varying(50), notes text, - hash_sha256 character varying(64) NOT NULL + hash_sha256 character varying(64) NOT NULL, + dataset_id bigint NOT NULL ); @@ -801,6 +781,13 @@ COMMENT ON COLUMN submission.notes IS 'Free-form text field where details of sub COMMENT ON COLUMN submission.hash_sha256 IS 'SHA256 hash representing the contents of the submission eTUFF file.'; +-- +-- Name: COLUMN submission.dataset_id; Type: COMMENT; Schema: public; Owner: postgres +-- + +COMMENT ON COLUMN submission.dataset_id IS 'The primary key from the Dataset relation'; + + -- -- Name: submission_submission_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres -- @@ -852,78 +839,6 @@ ALTER TABLE ONLY observation_types ALTER COLUMN variable_id SET DEFAULT nextval( ALTER TABLE ONLY submission ALTER COLUMN submission_id SET DEFAULT nextval('submission_submission_id_seq'::regclass); --- --- Data for Name: data_histogram_bin_data; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY data_histogram_bin_data (submission_id, tag_id, bin_id, bin_class, date_time, variable_value, position_date_time, variable_id) FROM stdin; -\. - - --- --- Data for Name: data_histogram_bin_info; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY data_histogram_bin_info (bin_id, bin_class, min_value, max_value, variable_id) FROM stdin; -\. - - --- --- Data for Name: data_position; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY data_position (date_time, lat, lon, lat_err, lon_err, submission_id, tag_id, argos_location_class, solution_id) FROM stdin; -\. - - --- --- Data for Name: data_profile; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY data_profile (submission_id, tag_id, variable_id, date_time, depth, variable_value, position_date_time) FROM stdin; -\. - - --- --- Data for Name: data_time_series; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY data_time_series (date_time, variable_id, variable_value, submission_id, tag_id, position_date_time) FROM stdin; -\. - - --- --- Data for Name: metadata; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY metadata (submission_id, attribute_id, attribute_value, tag_id) FROM stdin; -\. - - --- --- Data for Name: metadata_position; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY metadata_position (submission_id, attribute_id, attribute_value, tag_id, solution_id) FROM stdin; -\. - - --- --- Data for Name: proc_observations; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY proc_observations (date_time, variable_id, variable_value, submission_id, tag_id) FROM stdin; -\. - - --- --- Data for Name: submission; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY submission (submission_id, tag_id, date_time, filename, version, notes) FROM stdin; -\. - - -- -- Name: observation_types_variable_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres -- @@ -974,7 +889,7 @@ ALTER TABLE ONLY data_histogram_bin_info -- ALTER TABLE ONLY data_position - ADD CONSTRAINT data_position_pkey PRIMARY KEY (submission_id, tag_id, solution_id, date_time) WITH (fillfactor='100'); + ADD CONSTRAINT data_position_pkey PRIMARY KEY (submission_id, tag_id, date_time) WITH (fillfactor='100'); -- @@ -1001,14 +916,6 @@ ALTER TABLE ONLY metadata ADD CONSTRAINT metadata_pkey PRIMARY KEY (submission_id, attribute_id); --- --- Name: metadata_position metadata_pkey01; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY metadata_position - ADD CONSTRAINT metadata_pkey01 PRIMARY KEY (submission_id, attribute_id, tag_id, solution_id) WITH (fillfactor='100'); - - -- -- Name: metadata_types metadata_types_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres -- @@ -1049,6 +956,20 @@ ALTER TABLE ONLY submission ADD CONSTRAINT submission_pkey PRIMARY KEY (submission_id); +-- +-- Name: submission dataset_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY dataset + ADD CONSTRAINT dataset_pkey PRIMARY KEY (dataset_id); + +-- +-- Name: data_histogram_bin_data data_histogram_bin_data_submission_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY submission + ADD CONSTRAINT submission_dataset_id_fkey FOREIGN KEY (dataset_id) REFERENCES dataset(dataset_id) ON DELETE CASCADE; + -- -- Name: data_histogram_bin_data_date_time_index; Type: INDEX; Schema: public; Owner: postgres -- @@ -1074,7 +995,7 @@ CREATE INDEX data_position_date_time ON data_position USING btree (date_time); -- Name: data_position_latlontime_index; Type: INDEX; Schema: public; Owner: postgres -- -CREATE INDEX data_position_latlontime_index ON data_position USING btree (submission_id, tag_id, solution_id, date_time, lat, lon, argos_location_class) WITH (fillfactor='100'); +CREATE INDEX data_position_latlontime_index ON data_position USING btree (submission_id, tag_id, date_time, lat, lon, argos_location_class) WITH (fillfactor='100'); -- @@ -1185,14 +1106,6 @@ ALTER TABLE ONLY metadata ADD CONSTRAINT metadata_attribute_id_fkey FOREIGN KEY (attribute_id) REFERENCES metadata_types(attribute_id); --- --- Name: metadata_position metadata_attribute_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY metadata_position - ADD CONSTRAINT metadata_attribute_id_fkey FOREIGN KEY (attribute_id) REFERENCES metadata_types(attribute_id); - - -- -- Name: metadata metadata_submission_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres -- @@ -1201,14 +1114,6 @@ ALTER TABLE ONLY metadata ADD CONSTRAINT metadata_submission_id_fkey FOREIGN KEY (submission_id) REFERENCES submission(submission_id) ON DELETE CASCADE; --- --- Name: metadata_position metadata_submission_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY metadata_position - ADD CONSTRAINT metadata_submission_id_fkey FOREIGN KEY (submission_id) REFERENCES submission(submission_id) ON DELETE CASCADE; - - -- -- Name: proc_observations proc_observations_submission_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres -- @@ -1244,233 +1149,242 @@ TRUNCATE submission CASCADE $$; -- --- The following TRIGGER ensures that upon ingestion of an eTUFF file into tagbase-server, --- the data migration procedure is executed. The only remaining manual database administration --- involves the creation of the materialized views. This can simply be done by executing --- 'tagbase-materialized-view.sql' in the pgAdmin4 Web application Query Tool. - CREATE OR REPLACE FUNCTION execute_data_migration() RETURNS trigger AS $BODY$ - BEGIN - --\connect tagbase - -- data_time_series - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING submission b, - observation_types c - WHERE c.variable_name IN ('datetime', - 'depth', - 'temperature', - 'light', - 'internal temperature') - AND a.submission_id = b.submission_id - AND a.variable_id = c.variable_id RETURNING a.date_time, +-- The following PROCEDURE ensures that data migration (from the proc_observation's +-- table to other target tables) is executed. f +CREATE PROCEDURE sp_execute_data_migration(submission_id_param integer, is_reference_track_param boolean) +LANGUAGE SQL +AS $BODY$ + --\connect tagbase + -- data_time_series + WITH moved_rows AS + ( DELETE + FROM proc_observations a USING submission b, + observation_types c + WHERE c.variable_name IN ('datetime', + 'depth', + 'temperature', + 'light', + 'internal temperature') + AND a.submission_id = b.submission_id + AND b.submission_id = submission_id_param + AND a.variable_id = c.variable_id RETURNING a.date_time, + a.variable_id, + a.variable_value, + a.submission_id, + b.tag_id) + INSERT INTO data_time_series + SELECT * + FROM moved_rows; + -- -- data_position + WITH moved_rows AS + ( DELETE + FROM proc_observations a USING submission b, + observation_types c + WHERE c.variable_name = 'longitude' + AND a.submission_id = b.submission_id + AND b.submission_id = submission_id_param + AND a.variable_id = c.variable_id RETURNING a.date_time, + a.variable_id, + a.variable_value, + a.submission_id, + b.tag_id, + cast(('0.0') AS double precision) AS initial_lat, + is_reference_track_param) + INSERT INTO data_position (date_time, lat, lon, submission_id, tag_id, flag_as_reference) + SELECT date_time, + initial_lat, + variable_value, + submission_id, + tag_id, + is_reference_track_param + FROM moved_rows; + WITH moved_rows AS + ( DELETE + FROM proc_observations a USING data_position b, + observation_types c + WHERE a.submission_id = b.submission_id + AND b.submission_id = submission_id_param + AND a.date_time = b.date_time + AND a.variable_id = c.variable_id + AND c.variable_name = 'latitude' RETURNING a.date_time, + a.variable_id, + a.variable_value, + a.submission_id) + UPDATE data_position + SET lat = moved_rows.variable_value + FROM moved_rows + WHERE data_position.date_time = moved_rows.date_time + AND data_position.submission_id = moved_rows.submission_id; + WITH moved_rows AS + ( DELETE + FROM proc_observations a USING data_position b, + observation_types c + WHERE a.submission_id = b.submission_id + AND b.submission_id = submission_id_param + AND a.date_time = b.date_time + AND a.variable_id = c.variable_id + AND c.variable_name = 'longitudeError' RETURNING a.date_time, + a.variable_id, + a.variable_value, + a.submission_id) + UPDATE data_position + SET lon_err = moved_rows.variable_value + FROM moved_rows + WHERE data_position.date_time = moved_rows.date_time + AND data_position.submission_id = moved_rows.submission_id; + WITH moved_rows AS + ( DELETE + FROM proc_observations a USING data_position b, + observation_types c + WHERE a.submission_id = b.submission_id + AND b.submission_id = submission_id_param + AND a.date_time = b.date_time + AND a.variable_id = c.variable_id + AND c.variable_name = 'latitudeError' RETURNING a.date_time, + a.variable_id, + a.variable_value, + a.submission_id) + UPDATE data_position + SET lat_err = moved_rows.variable_value + FROM moved_rows + WHERE data_position.date_time = moved_rows.date_time + AND data_position.submission_id = moved_rows.submission_id; + -- -- data_histogram_bin_info + WITH moved_rows AS + ( DELETE + FROM proc_observations a USING observation_types b, + submission c + WHERE a.variable_id = b.variable_id + AND b.variable_name LIKE 'HistDepthBinMin%' + AND c.submission_id = submission_id_param + AND a.submission_id = c.submission_id RETURNING a.submission_id AS bin_id, + cast(substring(variable_name, '(\d+)') AS int) AS bin_class, + a.variable_value AS min_value, + '', + a.variable_id AS variable_value) + INSERT INTO data_histogram_bin_info + SELECT * + FROM moved_rows ON CONFLICT DO NOTHING; + WITH moved_rows AS + ( DELETE + FROM proc_observations a USING observation_types b, + submission c + WHERE a.variable_id = b.variable_id + AND b.variable_name LIKE 'HistDepthBinMax%' + AND c.submission_id = submission_id_param + AND a.submission_id = c.submission_id RETURNING a.submission_id AS bin_id, + cast(substring(variable_name, '(\d+)') AS int) AS bin_class, + a.variable_value AS max_value) + UPDATE data_histogram_bin_info + SET max_value = moved_rows.max_value + FROM moved_rows + WHERE data_histogram_bin_info.bin_id = moved_rows.bin_id + AND data_histogram_bin_info.bin_class = moved_rows.bin_class; + -- data_histogram_bin_data + WITH moved_rows AS + ( DELETE + FROM proc_observations a USING observation_types b, + submission c, + data_time_series d + WHERE a.variable_id = b.variable_id + AND b.variable_name LIKE 'TimeAt%' + AND c.submission_id = submission_id_param + AND a.submission_id = c.submission_id RETURNING a.submission_id, + c.tag_id, + a.submission_id AS bin_id, + cast(substring(variable_name, '(\d+)') AS int) AS bin_class, + a.date_time, + a.variable_value, + d.position_date_time, + a.variable_id) + INSERT INTO data_histogram_bin_data + SELECT * + FROM moved_rows; + -- data_profile + WITH moved_rows AS + ( DELETE + FROM proc_observations a USING observation_types b, + submission c + WHERE a.variable_id = b.variable_id + AND b.variable_name LIKE 'PdtDepth%' + AND c.submission_id = submission_id_param + AND a.submission_id = c.submission_id RETURNING a.submission_id, + c.tag_id, a.variable_id, - a.variable_value, - a.submission_id, - b.tag_id) - INSERT INTO data_time_series - SELECT * - FROM moved_rows; - -- -- data_position - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING submission b, - observation_types c - WHERE c.variable_name = 'longitude' - AND a.submission_id = b.submission_id - AND a.variable_id = c.variable_id RETURNING a.date_time, + a.date_time, + a.variable_value) + INSERT INTO data_profile (submission_id, tag_id, variable_id, date_time, depth) + SELECT submission_id, + tag_id, + variable_id, + date_time, + variable_value + FROM moved_rows; + WITH moved_rows AS + ( DELETE + FROM proc_observations a USING observation_types b, + data_profile c, + submission e + WHERE a.variable_id = b.variable_id + AND b.variable_name LIKE 'PdtTempMin%' + AND a.submission_id = c.submission_id + AND a.date_time = c.date_time + AND e.submission_id = submission_id_param + AND e.submission_id = a.submission_id RETURNING a.date_time, + a.variable_id, + a.variable_value AS variable_value, + a.submission_id) + UPDATE data_profile + SET variable_value = moved_rows.variable_value + FROM moved_rows + WHERE data_profile.date_time = moved_rows.date_time + AND data_profile.submission_id = moved_rows.submission_id; + WITH moved_rows AS + ( DELETE + FROM proc_observations a USING observation_types b, + data_profile c, + submission e + WHERE a.variable_id = b.variable_id + AND b.variable_name LIKE 'PdtTempMax%' + AND a.submission_id = c.submission_id + AND a.date_time = c.date_time + AND e.submission_id = submission_id_param + AND e.submission_id = a.submission_id RETURNING a.date_time, a.variable_id, a.variable_value, - a.submission_id, - b.tag_id, - cast(('0.0') AS double precision) AS initial_lat) - INSERT INTO data_position (date_time, lat, lon, submission_id, tag_id) - SELECT date_time, - initial_lat, - variable_value, - submission_id, - tag_id - FROM moved_rows; - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING data_position b, - observation_types c - WHERE a.submission_id = b.submission_id - AND a.date_time = b.date_time - AND a.variable_id = c.variable_id - AND c.variable_name = 'latitude' RETURNING a.date_time, - a.variable_id, - a.variable_value, - a.submission_id) - UPDATE data_position - SET lat = moved_rows.variable_value - FROM moved_rows - WHERE data_position.date_time = moved_rows.date_time - AND data_position.submission_id = moved_rows.submission_id; - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING data_position b, - observation_types c - WHERE a.submission_id = b.submission_id - AND a.date_time = b.date_time - AND a.variable_id = c.variable_id - AND c.variable_name = 'longitudeError' RETURNING a.date_time, - a.variable_id, - a.variable_value, - a.submission_id) - UPDATE data_position - SET lon_err = moved_rows.variable_value - FROM moved_rows - WHERE data_position.date_time = moved_rows.date_time - AND data_position.submission_id = moved_rows.submission_id; - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING data_position b, - observation_types c - WHERE a.submission_id = b.submission_id - AND a.date_time = b.date_time - AND a.variable_id = c.variable_id - AND c.variable_name = 'latitudeError' RETURNING a.date_time, - a.variable_id, - a.variable_value, - a.submission_id) - UPDATE data_position - SET lat_err = moved_rows.variable_value - FROM moved_rows - WHERE data_position.date_time = moved_rows.date_time - AND data_position.submission_id = moved_rows.submission_id; - -- -- data_histogram_bin_info - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING observation_types b, - submission c - WHERE a.variable_id = b.variable_id - AND b.variable_name LIKE 'HistDepthBinMin%' - AND a.submission_id = c.submission_id RETURNING a.submission_id AS bin_id, - cast(substring(variable_name, '(\d+)') AS int) AS bin_class, - a.variable_value AS min_value, - '', - a.variable_id AS variable_value) - INSERT INTO data_histogram_bin_info - SELECT * - FROM moved_rows ON CONFLICT DO NOTHING; - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING observation_types b, - submission c - WHERE a.variable_id = b.variable_id - AND b.variable_name LIKE 'HistDepthBinMax%' - AND a.submission_id = c.submission_id RETURNING a.submission_id AS bin_id, - cast(substring(variable_name, '(\d+)') AS int) AS bin_class, - a.variable_value AS max_value) - UPDATE data_histogram_bin_info - SET max_value = moved_rows.max_value - FROM moved_rows - WHERE data_histogram_bin_info.bin_id = moved_rows.bin_id - AND data_histogram_bin_info.bin_class = moved_rows.bin_class; - -- data_histogram_bin_data - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING observation_types b, - submission c, - data_time_series d - WHERE a.variable_id = b.variable_id - AND b.variable_name LIKE 'TimeAt%' - AND a.submission_id = c.submission_id RETURNING a.submission_id, - c.tag_id, - a.submission_id AS bin_id, - cast(substring(variable_name, '(\d+)') AS int) AS bin_class, - a.date_time, - a.variable_value, - d.position_date_time, - a.variable_id) - INSERT INTO data_histogram_bin_data - SELECT * - FROM moved_rows; - -- data_profile - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING observation_types b, - submission c - WHERE a.variable_id = b.variable_id - AND b.variable_name LIKE 'PdtDepth%' - AND a.submission_id = c.submission_id RETURNING a.submission_id, - c.tag_id, - a.variable_id, - a.date_time, - a.variable_value) - INSERT INTO data_profile (submission_id, tag_id, variable_id, date_time, depth) - SELECT submission_id, - tag_id, - variable_id, - date_time, - variable_value - FROM moved_rows; - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING observation_types b, - data_profile c, - submission e - WHERE a.variable_id = b.variable_id - AND b.variable_name LIKE 'PdtTempMin%' - AND a.submission_id = c.submission_id - AND a.date_time = c.date_time - AND e.submission_id = a.submission_id RETURNING a.date_time, - a.variable_id, - a.variable_value AS variable_value, - a.submission_id) - UPDATE data_profile - SET variable_value = moved_rows.variable_value - FROM moved_rows - WHERE data_profile.date_time = moved_rows.date_time - AND data_profile.submission_id = moved_rows.submission_id; - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING observation_types b, - data_profile c, - submission e - WHERE a.variable_id = b.variable_id - AND b.variable_name LIKE 'PdtTempMax%' - AND a.submission_id = c.submission_id - AND a.date_time = c.date_time - AND e.submission_id = a.submission_id RETURNING a.date_time, - a.variable_id, - a.variable_value, - a.submission_id) - UPDATE data_profile - SET variable_value = moved_rows.variable_value - FROM moved_rows - WHERE data_profile.date_time = moved_rows.date_time - AND data_profile.submission_id = moved_rows.submission_id; - -- SQL update statements to link measurement date time with position date time - UPDATE data_time_series - SET position_date_time = - (SELECT date_time - FROM data_position - WHERE data_time_series.submission_id = data_position.submission_id - AND data_time_series.date_time >= data_position.date_time - ORDER BY data_position.date_time DESC - LIMIT 1) - WHERE position_date_time IS NULL; - UPDATE data_histogram_bin_data - SET position_date_time = - (SELECT date_time - FROM data_position - WHERE data_histogram_bin_data.submission_id = data_position.submission_id - AND data_histogram_bin_data.date_time >= data_position.date_time - ORDER BY data_position.date_time DESC - LIMIT 1) - WHERE position_date_time IS NULL; - UPDATE data_profile - SET position_date_time = - (SELECT date_time - FROM data_position - WHERE data_profile.submission_id = data_position.submission_id - AND data_profile.date_time >= data_position.date_time - ORDER BY data_position.date_time DESC - LIMIT 1) - WHERE position_date_time IS NULL; - RETURN NULL; - END; - $BODY$ LANGUAGE plpgsql; - CREATE TRIGGER data_migration AFTER INSERT OR UPDATE ON proc_observations FOR EACH STATEMENT - EXECUTE PROCEDURE execute_data_migration(); + a.submission_id) + UPDATE data_profile + SET variable_value = moved_rows.variable_value + FROM moved_rows + WHERE data_profile.date_time = moved_rows.date_time + AND data_profile.submission_id = moved_rows.submission_id; + -- SQL update statements to link measurement date time with position date time + UPDATE data_time_series + SET position_date_time = + (SELECT date_time + FROM data_position + WHERE data_time_series.submission_id = data_position.submission_id + AND data_time_series.date_time >= data_position.date_time + ORDER BY data_position.date_time DESC + LIMIT 1) + WHERE position_date_time IS NULL; + UPDATE data_histogram_bin_data + SET position_date_time = + (SELECT date_time + FROM data_position + WHERE data_histogram_bin_data.submission_id = data_position.submission_id + AND data_histogram_bin_data.date_time >= data_position.date_time + ORDER BY data_position.date_time DESC + LIMIT 1) + WHERE position_date_time IS NULL; + UPDATE data_profile + SET position_date_time = + (SELECT date_time + FROM data_position + WHERE data_profile.submission_id = data_position.submission_id + AND data_profile.date_time >= data_position.date_time + ORDER BY data_position.date_time DESC + LIMIT 1) + WHERE position_date_time IS NULL; +$BODY$; + diff --git a/services/postgres/Dockerfile b/services/postgres/Dockerfile deleted file mode 100644 index 3d1a2db..0000000 --- a/services/postgres/Dockerfile +++ /dev/null @@ -1,3 +0,0 @@ -FROM postgres:buster - -COPY ./tagbase_schema.sql /docker-entrypoint-initdb.d/ diff --git a/services/postgres/tagbase_materialized_views.sql b/services/postgres/tagbase_materialized_views.sql deleted file mode 100644 index 7ec701a..0000000 --- a/services/postgres/tagbase_materialized_views.sql +++ /dev/null @@ -1,138 +0,0 @@ --- Uncomment the line below if you run this from the terminal. ---\connect tagbase - --- MATERIALIZED VIEW - -CREATE MATERIALIZED VIEW mview_vis_data -AS - SELECT - variable.submission_id AS source_id, - variable.variable_value AS measurement_value, - variable.variable_name AS measurement_name, - variable.variable_units AS measurement_units, - depth.depth, - variable.date_time AS measurement_date_time, - data_position.date_time AS position_date_time, - data_position.lat, - CASE WHEN data_position.lon > 180 THEN data_position.lon - 360 ELSE data_position.lon END, - data_position.lat_err, - data_position.lon_err - FROM ( SELECT x.variable_value, - y.variable_name, - x.date_time, - x.submission_id, - y.variable_units, - x.position_date_time - FROM data_time_series x, - observation_types y - WHERE x.variable_id = y.variable_id AND y.variable_name <> 'depth' AND y.variable_name <> 'datetime') variable, - data_position, - ( SELECT x.variable_value AS depth, - x.date_time, - x.submission_id - FROM data_time_series x, - observation_types y - WHERE x.variable_id = y.variable_id AND y.variable_name = 'depth') depth - WHERE variable.submission_id = data_position.submission_id AND variable.submission_id = depth.submission_id AND variable.position_date_time = data_position.date_time AND depth.date_time = variable.date_time -WITH DATA; - - -CREATE MATERIALIZED VIEW mview_vis_data_histogram -AS - SELECT - data.submission_id AS source_id, - data.min_value AS bin_class, - data.variable_value AS measurement_value, - data.date_time AS measurement_date_time, - data_position.date_time AS position_date_time, - data_position.lat, - CASE WHEN data_position.lon > 180 THEN data_position.lon - 360 ELSE data_position.lon END, - data_position.lat_err, - data_position.lon_err - FROM ( SELECT data_histogram_bin_info.min_value, - data_histogram_bin_data.submission_id, - data_histogram_bin_data.date_time, - data_histogram_bin_data.variable_value, - data_histogram_bin_data.position_date_time - FROM data_histogram_bin_info, - data_histogram_bin_data - WHERE data_histogram_bin_info.bin_id = data_histogram_bin_data.bin_id AND data_histogram_bin_info.bin_class = data_histogram_bin_data.bin_class) data, - data_position - WHERE data.submission_id = data_position.submission_id AND data.position_date_time = data_position.date_time -WITH DATA; - - - -CREATE MATERIALIZED VIEW mview_vis_data_profile -AS - SELECT - data.submission_id AS source_id, - data.depth, - data.variable_value AS measurement_value, - data.date_time AS measurement_date_time, - data_position.date_time AS position_date_time, - data_position.lat, - CASE WHEN data_position.lon > 180 THEN data_position.lon - 360 ELSE data_position.lon END, - data_position.lat_err, - data_position.lon_err - FROM ( SELECT data_profile.submission_id, - data_profile.date_time, - data_profile.depth, - data_profile.variable_value, - data_profile.position_date_time - FROM data_profile) data, data_position - WHERE data.submission_id = data_position.submission_id AND data.position_date_time = data_position.date_time -WITH DATA; - - -CREATE MATERIALIZED VIEW mview_vis_metadata -AS - SELECT metadata.submission_id AS source_id, - 'Global Attributes'::text AS attribute_type, - NULL::character varying AS variable, - metadata_types.category, - metadata_types.attribute_name, - "left"("right"(metadata.attribute_value, length(metadata.attribute_value) - 1), '-1'::integer) AS attribute_value - FROM metadata_types, - metadata - WHERE metadata_types.attribute_id = metadata.attribute_id AND (metadata_types.category::text = 'instrument'::text AND (metadata_types.attribute_name::text = ANY (ARRAY['instrument_name'::character varying, 'instrument_type'::character varying, 'firmware'::character varying, 'manufacturer'::character varying, 'model'::character varying, 'owner_contact'::character varying, 'person_owner'::character varying, 'serial_number'::character varying]::text[])) OR metadata_types.category::text = 'programming'::text AND (metadata_types.attribute_name::text = ANY (ARRAY['programming_report'::character varying, 'programming_software'::character varying]::text[])) OR metadata_types.category::text = 'attachment'::text AND metadata_types.attribute_name::text = 'attachment_method'::text OR metadata_types.category::text = 'deployment'::text AND (metadata_types.attribute_name::text = ANY (ARRAY['geospatial_lat_start'::character varying, 'geospatial_lon_start'::character varying, 'person_tagger_capture'::character varying, 'time_coverage_start'::character varying]::text[])) OR metadata_types.category::text = 'animal'::text AND (metadata_types.attribute_name::text = ANY (ARRAY['condition_capture'::character varying, 'length_capture'::character varying, 'length_method_capture'::character varying, 'length_type_capture'::character varying, 'length_unit_capture'::character varying, 'platform'::character varying, 'taxonomic_serial_number'::character varying]::text[])) OR metadata_types.category::text = 'end_of_mission'::text AND (metadata_types.attribute_name::text = ANY (ARRAY['time_coverage_end'::character varying, 'end_details'::character varying, 'end_type'::character varying, 'geospatial_lat_end'::character varying, 'geospatial_lon_end'::character varying]::text[])) OR metadata_types.category::text = 'waypoints'::text AND metadata_types.attribute_name::text = 'waypoints_source'::text OR metadata_types.category::text = 'quality'::text AND (metadata_types.attribute_name::text = ANY (ARRAY['found_problem'::character varying, 'person_qc'::character varying]::text[]))) -UNION - SELECT data_time_series.submission_id AS source_id, - 'Variable Attributes'::text AS attribute_type, - observation_types.standard_name AS variable, - NULL::character varying AS category, - 'units'::character varying AS attribute_name, - observation_types.variable_units AS attribute_value - FROM observation_types, - ( SELECT data_time_series_1.variable_id, - data_time_series_1.submission_id - FROM data_time_series data_time_series_1 - GROUP BY data_time_series_1.variable_id, data_time_series_1.submission_id) data_time_series - WHERE observation_types.standard_name IS NOT NULL AND observation_types.variable_id = data_time_series.variable_id -UNION - SELECT data_time_series.submission_id AS source_id, - 'Variable Attributes'::text AS attribute_type, - observation_types.standard_name AS variable, - NULL::character varying AS category, - 'standard_name'::character varying AS attribute_name, - observation_types.standard_name AS attribute_value - FROM observation_types, - ( SELECT data_time_series_1.variable_id, - data_time_series_1.submission_id - FROM data_time_series data_time_series_1 - GROUP BY data_time_series_1.variable_id, data_time_series_1.submission_id) data_time_series - WHERE observation_types.standard_name IS NOT NULL AND observation_types.variable_id = data_time_series.variable_id -UNION - SELECT data_time_series.submission_id AS source_id, - 'Variable Attributes'::text AS attribute_type, - observation_types.standard_name AS variable, - NULL::character varying AS category, - 'long_name'::character varying AS attribute_name, - observation_types.variable_name AS attribute_value - FROM observation_types, - ( SELECT data_time_series_1.variable_id, - data_time_series_1.submission_id - FROM data_time_series data_time_series_1 - GROUP BY data_time_series_1.variable_id, data_time_series_1.submission_id) data_time_series - WHERE observation_types.standard_name IS NOT NULL AND observation_types.variable_id = data_time_series.variable_id -WITH DATA; diff --git a/services/postgres/tagbase_schema.sql b/services/postgres/tagbase_schema.sql deleted file mode 100644 index 8c32ced..0000000 --- a/services/postgres/tagbase_schema.sql +++ /dev/null @@ -1,1472 +0,0 @@ -CREATE ROLE tagbase WITH SUPERUSER LOGIN; - -CREATE DATABASE tagbase WITH ENCODING = 'UTF8' OWNER = 'tagbase'; - -ALTER USER tagbase PASSWORD tagbase; - -\connect tagbase - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; --- SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; -SET search_path = public, pg_catalog; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - -SET default_with_oids = false; - --- --- Name: data_histogram_bin_data; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE data_histogram_bin_data ( - submission_id bigint NOT NULL, - tag_id bigint NOT NULL, - bin_id bigint NOT NULL, - bin_class integer NOT NULL, - date_time timestamp(6) with time zone NOT NULL, - variable_value character varying(30) NOT NULL, - position_date_time timestamp(6) with time zone, - variable_id bigint NOT NULL -); - - -ALTER TABLE data_histogram_bin_data OWNER TO postgres; - --- --- Name: TABLE data_histogram_bin_data; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON TABLE data_histogram_bin_data IS 'Contains the frequency for corresponding summary data binning schemes (migrated from proc_observations)'; - - --- --- Name: COLUMN data_histogram_bin_data.submission_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_histogram_bin_data.submission_id IS 'Unique numeric ID assigned upon submission of a tag eTUFF data file for ingest/importation into Tagbase'; - - --- --- Name: COLUMN data_histogram_bin_data.tag_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_histogram_bin_data.tag_id IS 'Unique numeric Tag ID associated with the ingested tag data file'; - - --- --- Name: COLUMN data_histogram_bin_data.bin_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_histogram_bin_data.bin_id IS 'Unique bin ID for the summary bin-frequency class'; - - --- --- Name: COLUMN data_histogram_bin_data.bin_class; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_histogram_bin_data.bin_class IS 'Sequential numeric bin class identifier related to either Depth or Temperature. Usually there are 12 (1-12) bin ranges (Min and Max Depth or Temperature respectively), however there are times the bin ranges will not be 12, but instead 14 or 16. The larger the number, the more recent the tag models are from tag manufacturers, as they make more bytes available for storage.'; - - --- --- Name: COLUMN data_histogram_bin_data.date_time; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_histogram_bin_data.date_time IS 'Date/time stamp of the tag summarized bin-frequency data record'; - - --- --- Name: COLUMN data_histogram_bin_data.variable_value; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_histogram_bin_data.variable_value IS 'Aggregate measure for the given bin-interval of the geophysical value of the observed tag variable record'; - - --- --- Name: COLUMN data_histogram_bin_data.position_date_time; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_histogram_bin_data.position_date_time IS 'Date/time stamp of nearest matched associated positional record'; - - --- --- Name: COLUMN data_histogram_bin_data.variable_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_histogram_bin_data.variable_id IS 'Unique variable identifier for the data record from the source eTUFF file ingested. The variable_id is based on observation or measurement variables listed in the observation_types table. Note that records in this table are NOT expected to be equivalent to those in the variable_id column of the data_histogram_bin_info table'; - - --- --- Name: data_histogram_bin_info; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE data_histogram_bin_info ( - bin_id bigint NOT NULL, - bin_class integer NOT NULL, - min_value character varying(30) NOT NULL, - max_value character varying(30) NOT NULL, - variable_id bigint NOT NULL -); - - -ALTER TABLE data_histogram_bin_info OWNER TO postgres; - --- --- Name: TABLE data_histogram_bin_info; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON TABLE data_histogram_bin_info IS 'Contains definitions of binning schemes for summary tag data (migrated from proc_observations)'; - - --- --- Name: COLUMN data_histogram_bin_info.bin_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_histogram_bin_info.bin_id IS 'Unique bin ID for the summary bin-frequency class'; - - --- --- Name: COLUMN data_histogram_bin_info.bin_class; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_histogram_bin_info.bin_class IS 'Sequential numeric bin class identifier related to either Depth or Temperature. Usually there are 12 (1-12) bin ranges (Min and Max Depth or Temperature respectively), however there are times the bin ranges will not be 12, but instead 14 or 16. The larger the number, the more recent the tag models are from tag manufacturers, as they make more bytes available for storage.'; - - --- --- Name: COLUMN data_histogram_bin_info.min_value; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_histogram_bin_info.min_value IS 'Value of minimum/lower bound of bin interval'; - - --- --- Name: COLUMN data_histogram_bin_info.max_value; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_histogram_bin_info.max_value IS 'Value of maximum/upper bound of bin interval'; - - --- --- Name: COLUMN data_histogram_bin_info.variable_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_histogram_bin_info.variable_id IS 'Unique variable identifier for the data record from the source eTUFF file ingested. The variable_id is based on observation or measurement variables listed in the observation_types table. Note that records in this table are NOT expected to be equivalent to those in the variable_id column of the data_histogram_bin_data table'; - - --- --- Name: data_position; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE data_position ( - date_time timestamp(6) with time zone NOT NULL, - lat character varying(30) NOT NULL, - lon character varying(30) NOT NULL, - lat_err character varying(30), - lon_err character varying(30), - submission_id bigint NOT NULL, - tag_id bigint NOT NULL, - argos_location_class character varying(1), - solution_id integer NOT NULL DEFAULT 1, - flag_as_reference integer NOT NULL DEFAULT 0 -); - - -ALTER TABLE data_position OWNER TO postgres; - --- --- Name: TABLE data_position; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON TABLE data_position IS 'Contains the tag positional data series with associated Lat/Lon error estimates where available (migrated from proc_observations)'; - - --- --- Name: COLUMN data_position.date_time; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_position.date_time IS 'Date/time stamp of the tag positional data record'; - - --- --- Name: COLUMN data_position.lat; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_position.lat IS 'Latitude in decimal degrees of the positional data tag record'; - - --- --- Name: COLUMN data_position.lon; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_position.lon IS 'Longitude in decimal degrees of the positional data tag record'; - - --- --- Name: COLUMN data_position.lat_err; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_position.lat_err IS 'Error associated with the tag record Latitudinal positional estimate'; - - --- --- Name: COLUMN data_position.lon_err; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_position.lon_err IS 'Error associated with the tag record Longitudinal positional estimate'; - - --- --- Name: COLUMN data_position.submission_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_position.submission_id IS 'Unique numeric ID assigned upon submission of a tag eTUFF data file for ingest/importation into Tagbase'; - - --- --- Name: COLUMN data_position.tag_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_position.tag_id IS 'Unique numeric Tag ID associated with the ingested tag data file'; - - --- --- Name: COLUMN data_position.argos_location_class; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_position.argos_location_class IS 'ARGOS Location Class code (G,3,2,1,0,A,B,Z) - -https://www.argos-system.org/wp-content/uploads/2016/08/r363_9_argos_users_manual-v1.6.6.pdf , page 13.'; - - --- --- Name: COLUMN data_position.solution_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_position.solution_id IS 'Unique numeric identifier for a given tag geolocation dataset solution. solution_id=1 is assigned to the primary or approved solution. Incremented solution_id''s assigned to other positional dataset solutions for a given tag_id and submission_id'; - - --- --- Name: COLUMN data_position.flag_as_reference; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_position.flag_as_reference IS 'Integer (representing psudo boolean value) flag field which identifies whether positional data associated with a given Tag and Track solution are considered to be coordinates of the "Reference" track (ie. best solution currently). Coordinate record takes 1 if it is part of the Reference track or 0 if it is not.'; - --- --- Name: data_profile; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE data_profile ( - submission_id bigint NOT NULL, - tag_id bigint NOT NULL, - variable_id bigint NOT NULL, - date_time timestamp(6) with time zone NOT NULL, - depth character varying(30) NOT NULL, - variable_value character varying(30) DEFAULT '', - position_date_time timestamp(6) with time zone -); - - -ALTER TABLE data_profile OWNER TO postgres; - --- --- Name: TABLE data_profile; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON TABLE data_profile IS 'Contains the summarized bin profile tag observations (migrated from proc_observations)'; - - --- --- Name: COLUMN data_profile.submission_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_profile.submission_id IS 'Unique numeric ID assigned upon submission of a tag eTUFF data file for ingest/importation into Tagbase'; - - --- --- Name: COLUMN data_profile.tag_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_profile.tag_id IS 'Unique numeric Tag ID associated with the ingested tag data file'; - - --- --- Name: COLUMN data_profile.variable_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_profile.variable_id IS 'Unique variable identifier for the data record from the source eTUFF file ingested. The variable_id is based on observation or measurement variables listed in the observation_types table'; - - --- --- Name: COLUMN data_profile.date_time; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_profile.date_time IS 'Date/time stamp of the tag data record'; - - --- --- Name: COLUMN data_profile.depth; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_profile.depth IS 'Depth of the tag data record'; - - --- --- Name: COLUMN data_profile.variable_value; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_profile.variable_value IS 'Geophysical value of the observed tag variable record'; - - --- --- Name: COLUMN data_profile.position_date_time; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_profile.position_date_time IS 'Date/time stamp of nearest matched associated positional record'; - - --- --- Name: data_time_series; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE data_time_series ( - date_time timestamp(6) with time zone NOT NULL, - variable_id bigint NOT NULL, - variable_value character varying(30) NOT NULL, - submission_id bigint NOT NULL, - tag_id bigint NOT NULL, - position_date_time timestamp(6) with time zone -); - - -ALTER TABLE data_time_series OWNER TO postgres; - --- --- Name: TABLE data_time_series; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON TABLE data_time_series IS 'Contains the continuous measurement archival time series of tag geophysical measurements (migrated from proc_observations)'; - - --- --- Name: COLUMN data_time_series.date_time; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_time_series.date_time IS 'Date/time stamp of the tag data record'; - - --- --- Name: COLUMN data_time_series.variable_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_time_series.variable_id IS 'Unique variable identifier for the data record from the source eTUFF file ingested. The variable_id is based on observation or measurement variables listed in the observation_types table'; - - --- --- Name: COLUMN data_time_series.variable_value; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_time_series.variable_value IS 'Geophysical value of the observed tag variable record'; - - --- --- Name: COLUMN data_time_series.submission_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_time_series.submission_id IS 'Unique numeric ID assigned upon submission of a tag eTUFF data file for ingest/importation into Tagbase'; - - --- --- Name: COLUMN data_time_series.tag_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_time_series.tag_id IS 'Unique numeric Tag ID associated with the ingested tag data file'; - - --- --- Name: COLUMN data_time_series.position_date_time; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN data_time_series.position_date_time IS 'Date/time stamp of nearest matched associated positional record'; - - --- --- Name: metadata; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE metadata ( - submission_id bigint NOT NULL, - attribute_id bigint NOT NULL, - attribute_value text NOT NULL, - tag_id bigint NOT NULL -); - - -ALTER TABLE metadata OWNER TO postgres; - --- --- Name: TABLE metadata; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON TABLE metadata IS 'Contains the ingested tag metadata consistent with the eTUFF specification'; - - --- --- Name: COLUMN metadata.submission_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata.submission_id IS 'Unique numeric ID assigned upon submission of a tag eTUFF data file for ingest/importation into Tagbase'; - - --- --- Name: COLUMN metadata.attribute_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata.attribute_id IS 'Unique numeric metadata attribute ID based on the eTUFF metadata specification'; - - --- --- Name: COLUMN metadata.attribute_value; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata.attribute_value IS 'Value associated with the given eTUFF metadata attribute'; - - --- --- Name: COLUMN metadata.tag_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata.tag_id IS 'Unique numeric Tag ID associated with the ingested tag data file'; - - --- --- Name: metadata_position; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE metadata_position ( - submission_id bigint NOT NULL, - attribute_id bigint NOT NULL, - attribute_value text NOT NULL, - tag_id bigint NOT NULL, - solution_id integer NOT NULL DEFAULT 1 -); - - -ALTER TABLE metadata_position OWNER TO postgres; - --- --- Name: TABLE metadata_position; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON TABLE metadata_position IS 'Contains the ingested tag metadata consistent with the eTUFF specification'; - - --- --- Name: COLUMN metadata_position.submission_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_position.submission_id IS 'Unique numeric ID assigned upon submission of a tag eTUFF data file for ingest/importation into Tagbase'; - - --- --- Name: COLUMN metadata_position.attribute_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_position.attribute_id IS 'Unique numeric metadata attribute ID based on the eTUFF metadata specification'; - - --- --- Name: COLUMN metadata_position.attribute_value; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_position.attribute_value IS 'Value associated with the given eTUFF metadata attribute'; - - --- --- Name: COLUMN metadata_position.tag_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_position.tag_id IS 'Unique numeric Tag ID associated with the ingested tag data file'; - - --- --- Name: COLUMN metadata_position.solution_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_position.solution_id IS 'Unique numeric identifier for a given tag geolocation dataset solution. solution_id=1 is assigned to the primary or approved solution. Incremented solution_id''s assigned to other positional dataset solutions for a given tag_id and submission_id'; - - --- --- Name: metadata_types; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE metadata_types ( - attribute_id bigint NOT NULL, - category character varying(1024) NOT NULL, - attribute_name character varying(1024) NOT NULL, - description text NOT NULL, - example text, - comments text, - necessity character varying(1024) NOT NULL -); - - -ALTER TABLE metadata_types OWNER TO postgres; - --- --- Name: TABLE metadata_types; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON TABLE metadata_types IS 'Contains descriptive information on tag metadata based on the eTUFF specification'; - - --- --- Name: COLUMN metadata_types.attribute_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_types.attribute_id IS 'Unique numeric metadata attribute ID based on the eTUFF metadata specification'; - - --- --- Name: COLUMN metadata_types.category; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_types.category IS 'Metadata attribute category or group based on the eTUFF metadata specification'; - - --- --- Name: COLUMN metadata_types.attribute_name; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_types.attribute_name IS 'Name of metadata attribute based on the eTUFF metadata specification'; - - --- --- Name: COLUMN metadata_types.description; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_types.description IS 'Description of metadata attribute based on the eTUFF metadata specification'; - - --- --- Name: COLUMN metadata_types.example; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_types.example IS 'Example value of metadata attribute on the eTUFF metadata specification'; - - --- --- Name: COLUMN metadata_types.comments; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_types.comments IS 'Comments or notes relating to the metadata attribute based on the eTUFF metadata specification'; - - --- --- Name: COLUMN metadata_types.necessity; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN metadata_types.necessity IS 'Designation of the metadata attribute as Required, Recommended, or Optional based on the eTUFF metadata specification'; - - --- --- Name: observation_types; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE observation_types ( - variable_id bigint NOT NULL, - variable_name character varying(255) NOT NULL, - standard_name character varying(255), - variable_source character varying(255), - variable_units character varying(255), - notes text, - standard_unit character varying(255) -); - - -ALTER TABLE observation_types OWNER TO postgres; - --- --- Name: TABLE observation_types; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON TABLE observation_types IS 'Contains listings and descriptions of observation variable types based on the eTUFF specification'; - - --- --- Name: COLUMN observation_types.variable_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN observation_types.variable_id IS 'Unique variable identifier based on the eTUFF tag data file specification'; - - --- --- Name: COLUMN observation_types.variable_name; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN observation_types.variable_name IS 'Variable name based on the eTUFF tag data file specification'; - - --- --- Name: COLUMN observation_types.standard_name; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN observation_types.standard_name IS 'CF Standard name for observation variable, if available'; - - --- --- Name: COLUMN observation_types.variable_source; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN observation_types.variable_source IS 'Source authority for the given variables'; - - --- --- Name: COLUMN observation_types.variable_units; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN observation_types.variable_units IS 'Units of the variable based on the eTUFF tag data file specification'; - - --- --- Name: COLUMN observation_types.notes; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN observation_types.notes IS 'Notes or comments relating to the variable'; - - --- --- Name: COLUMN observation_types.standard_unit; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN observation_types.standard_unit IS 'CF canonical standard unit for observation variable, if available'; - - --- --- Name: observation_types_variable_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE observation_types_variable_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE observation_types_variable_id_seq OWNER TO postgres; - --- --- Name: observation_types_variable_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE observation_types_variable_id_seq OWNED BY observation_types.variable_id; - - --- --- Name: proc_observations; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE UNLOGGED TABLE proc_observations ( - date_time timestamp(6) with time zone NOT NULL, - variable_id bigint NOT NULL, - variable_value character varying(30) NOT NULL, - submission_id bigint NOT NULL, - tag_id bigint NOT NULL -); - - -ALTER TABLE proc_observations OWNER TO postgres; - --- --- Name: TABLE proc_observations; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON TABLE proc_observations IS 'Contains staged source tag eTUFF data imported into Tagbase'; - - --- --- Name: COLUMN proc_observations.date_time; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN proc_observations.date_time IS 'Date/time stamp of data record from source eTUFF file ingested'; - - --- --- Name: COLUMN proc_observations.variable_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN proc_observations.variable_id IS 'Unique variable identifier for the data record from the source eTUFF file ingested. The variable_id is based on observation or measurement variables listed in the observation_types table'; - - --- --- Name: COLUMN proc_observations.variable_value; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN proc_observations.variable_value IS 'Value of the given observation_type variable for the eTUFF data record'; - - --- --- Name: COLUMN proc_observations.submission_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN proc_observations.submission_id IS 'Unique numeric ID assigned upon submission of a tag eTUFF data file for ingest/importation into Tagbase'; - - --- --- Name: COLUMN proc_observations.tag_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN proc_observations.tag_id IS 'Unique numeric Tag ID associated with the ingested tag data file'; - - --- --- Name: submission; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE submission ( - submission_id bigint NOT NULL, - tag_id bigint NOT NULL, - date_time timestamp(6) with time zone DEFAULT now() NOT NULL, - filename text NOT NULL, - version character varying(50), - notes text -); - - -ALTER TABLE submission OWNER TO postgres; - --- --- Name: TABLE submission; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON TABLE submission IS 'Contains information on source tag eTUFF files submitted for ingest into Tagbase'; - - --- --- Name: COLUMN submission.submission_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN submission.submission_id IS 'Unique numeric ID assigned upon submission of a tag eTUFF data file for ingest/importation into Tagbase'; - - --- --- Name: COLUMN submission.tag_id; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN submission.tag_id IS 'Unique numeric Tag ID associated with the ingested tag eTUFF data file'; - - --- --- Name: COLUMN submission.date_time; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN submission.date_time IS 'Local datetime stamp at the time of eTUFF tag data file ingestion'; - - --- --- Name: COLUMN submission.filename; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN submission.filename IS 'Full path, name and extension of the ingested eTUFF tag data file'; - - --- --- Name: COLUMN submission.version; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN submission.version IS 'Version identifier for the eTUFF tag data file ingested'; - - --- --- Name: COLUMN submission.notes; Type: COMMENT; Schema: public; Owner: postgres --- - -COMMENT ON COLUMN submission.notes IS 'Free-form text field where details of submitted eTUFF file for ingest can be provided e.g. submitter name, etuff data contents (tag metadata and measurements + primary position data, or just secondary solutionpositional meta/data)'; - - --- --- Name: submission_submission_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE submission_submission_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE submission_submission_id_seq OWNER TO postgres; - --- --- Name: submission_submission_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE submission_submission_id_seq OWNED BY submission.submission_id; - - --- --- Name: submission_tag_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE submission_tag_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE submission_tag_id_seq OWNER TO postgres; - -ALTER SEQUENCE submission_tag_id_seq OWNED BY submission.tag_id; - --- --- Name: observation_types variable_id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY observation_types ALTER COLUMN variable_id SET DEFAULT nextval('observation_types_variable_id_seq'::regclass); - - --- --- Name: submission submission_id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY submission ALTER COLUMN submission_id SET DEFAULT nextval('submission_submission_id_seq'::regclass); - - --- --- Data for Name: data_histogram_bin_data; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY data_histogram_bin_data (submission_id, tag_id, bin_id, bin_class, date_time, variable_value, position_date_time, variable_id) FROM stdin; -\. - - --- --- Data for Name: data_histogram_bin_info; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY data_histogram_bin_info (bin_id, bin_class, min_value, max_value, variable_id) FROM stdin; -\. - - --- --- Data for Name: data_position; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY data_position (date_time, lat, lon, lat_err, lon_err, submission_id, tag_id, argos_location_class, solution_id) FROM stdin; -\. - - --- --- Data for Name: data_profile; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY data_profile (submission_id, tag_id, variable_id, date_time, depth, variable_value, position_date_time) FROM stdin; -\. - - --- --- Data for Name: data_time_series; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY data_time_series (date_time, variable_id, variable_value, submission_id, tag_id, position_date_time) FROM stdin; -\. - - --- --- Data for Name: metadata; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY metadata (submission_id, attribute_id, attribute_value, tag_id) FROM stdin; -\. - - --- --- Data for Name: metadata_position; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY metadata_position (submission_id, attribute_id, attribute_value, tag_id, solution_id) FROM stdin; -\. - - --- --- Data for Name: proc_observations; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY proc_observations (date_time, variable_id, variable_value, submission_id, tag_id) FROM stdin; -\. - - --- --- Data for Name: submission; Type: TABLE DATA; Schema: public; Owner: postgres --- - -COPY submission (submission_id, tag_id, date_time, filename, version, notes) FROM stdin; -\. - - --- --- Name: observation_types_variable_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('observation_types_variable_id_seq', 1, false); - - --- --- Name: submission_submission_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('submission_submission_id_seq', 1, false); - - --- --- Name: submission_tag_id_seq; Type: SEQUENCE SET; Schema: public; Owner: postgres --- - -SELECT pg_catalog.setval('submission_tag_id_seq', 1, false); - - --- --- Name: data_histogram_bin_data data_histogram_bin_data_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY data_histogram_bin_data - ADD CONSTRAINT data_histogram_bin_data_pkey PRIMARY KEY (submission_id, tag_id, variable_id, bin_id, bin_class, date_time) WITH (fillfactor='100'); - - --- --- Name: data_histogram_bin_info data_histogram_bin_info_bin_id_bin_class_key; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY data_histogram_bin_info - ADD CONSTRAINT data_histogram_bin_info_bin_id_bin_class_key UNIQUE (bin_id, bin_class); - - --- --- Name: data_histogram_bin_info data_histogram_bin_info_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY data_histogram_bin_info - ADD CONSTRAINT data_histogram_bin_info_pkey PRIMARY KEY (variable_id, bin_id, bin_class) WITH (fillfactor='100'); - - --- --- Name: data_position data_position_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY data_position - ADD CONSTRAINT data_position_pkey PRIMARY KEY (submission_id, tag_id, solution_id, date_time) WITH (fillfactor='100'); - - --- --- Name: data_profile data_profile_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY data_profile - ADD CONSTRAINT data_profile_pkey PRIMARY KEY (submission_id, tag_id, date_time, depth, variable_id) WITH (fillfactor='100'); - - --- --- Name: data_time_series data_time_series_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY data_time_series - ADD CONSTRAINT data_time_series_pkey PRIMARY KEY (submission_id, tag_id, variable_id, date_time) WITH (fillfactor='100'); - - --- --- Name: metadata metadata_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY metadata - ADD CONSTRAINT metadata_pkey PRIMARY KEY (submission_id, attribute_id); - - --- --- Name: metadata_position metadata_pkey01; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY metadata_position - ADD CONSTRAINT metadata_pkey01 PRIMARY KEY (submission_id, attribute_id, tag_id, solution_id) WITH (fillfactor='100'); - - --- --- Name: metadata_types metadata_types_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY metadata_types - ADD CONSTRAINT metadata_types_pkey PRIMARY KEY (attribute_id); - - --- --- Name: observation_types observation_types_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY observation_types - ADD CONSTRAINT observation_types_pkey PRIMARY KEY (variable_id); - - --- --- Name: observation_types observation_types_variable_name_key; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY observation_types - ADD CONSTRAINT observation_types_variable_name_key UNIQUE (variable_name); - - --- --- Name: proc_observations proc_observations_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY proc_observations - ADD CONSTRAINT proc_observations_pkey PRIMARY KEY (date_time, variable_id, submission_id); - - --- --- Name: submission submission_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY submission - ADD CONSTRAINT submission_pkey PRIMARY KEY (submission_id); - - --- --- Name: data_histogram_bin_data_date_time_index; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE INDEX data_histogram_bin_data_date_time_index ON data_histogram_bin_data USING btree (date_time); - - --- --- Name: data_histogram_bin_data_pos_date_time_index; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE INDEX data_histogram_bin_data_pos_date_time_index ON data_histogram_bin_data USING btree (position_date_time); - - --- --- Name: data_position_date_time; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE INDEX data_position_date_time ON data_position USING btree (date_time); - - --- --- Name: data_position_latlontime_index; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE INDEX data_position_latlontime_index ON data_position USING btree (submission_id, tag_id, solution_id, date_time, lat, lon, argos_location_class) WITH (fillfactor='100'); - - --- --- Name: data_profile_date_time_index; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE INDEX data_profile_date_time_index ON data_profile USING btree (date_time); - - --- --- Name: data_profile_pos_date_time_index; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE INDEX data_profile_pos_date_time_index ON data_profile USING btree (position_date_time); - - --- --- Name: data_time_series_date_time_index; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE INDEX data_time_series_date_time_index ON data_time_series USING btree (date_time); - - --- --- Name: data_time_series_pos_date_time_index; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE INDEX data_time_series_pos_date_time_index ON data_time_series USING btree (position_date_time); - - --- --- Name: data_histogram_bin_data data_histogram_bin_data_submission_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY data_histogram_bin_data - ADD CONSTRAINT data_histogram_bin_data_submission_id_fkey FOREIGN KEY (submission_id) REFERENCES submission(submission_id) ON DELETE CASCADE; - - --- --- Name: data_histogram_bin_info data_histogram_bin_info; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY data_histogram_bin_info - ADD CONSTRAINT data_histogram_bin_info FOREIGN KEY (variable_id) REFERENCES observation_types(variable_id); - - --- --- Name: data_position data_position_submission_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY data_position - ADD CONSTRAINT data_position_submission_id_fkey FOREIGN KEY (submission_id) REFERENCES submission(submission_id) ON DELETE CASCADE; - - --- --- Name: data_time_series data_time_series_data_submission_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY data_time_series - ADD CONSTRAINT data_time_series_data_submission_id_fkey FOREIGN KEY (submission_id) REFERENCES submission(submission_id); - - --- --- Name: data_time_series data_time_series_variable_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY data_time_series - ADD CONSTRAINT data_time_series_variable_id_fkey FOREIGN KEY (variable_id) REFERENCES observation_types(variable_id); - - --- --- Name: data_histogram_bin_data datahistogrambindata_observationtypes_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY data_histogram_bin_data - ADD CONSTRAINT datahistogrambindata_observationtypes_fkey FOREIGN KEY (variable_id) REFERENCES observation_types(variable_id); - - --- --- Name: data_profile dataprofile_observationtypes_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY data_profile - ADD CONSTRAINT dataprofile_observationtypes_fkey FOREIGN KEY (variable_id) REFERENCES observation_types(variable_id); - - --- --- Name: data_profile dataprofile_submission_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY data_profile - ADD CONSTRAINT dataprofile_submission_fkey FOREIGN KEY (submission_id) REFERENCES submission(submission_id); - - --- --- Name: data_histogram_bin_data histogrambindata_histogrambininfo_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY data_histogram_bin_data - ADD CONSTRAINT histogrambindata_histogrambininfo_fkey FOREIGN KEY (bin_id, bin_class) REFERENCES data_histogram_bin_info(bin_id, bin_class); - - --- --- Name: metadata metadata_attribute_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY metadata - ADD CONSTRAINT metadata_attribute_id_fkey FOREIGN KEY (attribute_id) REFERENCES metadata_types(attribute_id); - - --- --- Name: metadata_position metadata_attribute_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY metadata_position - ADD CONSTRAINT metadata_attribute_id_fkey FOREIGN KEY (attribute_id) REFERENCES metadata_types(attribute_id); - - --- --- Name: metadata metadata_submission_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY metadata - ADD CONSTRAINT metadata_submission_id_fkey FOREIGN KEY (submission_id) REFERENCES submission(submission_id) ON DELETE CASCADE; - - --- --- Name: metadata_position metadata_submission_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY metadata_position - ADD CONSTRAINT metadata_submission_id_fkey FOREIGN KEY (submission_id) REFERENCES submission(submission_id) ON DELETE CASCADE; - - --- --- Name: proc_observations proc_observations_submission_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY proc_observations - ADD CONSTRAINT proc_observations_submission_id_fkey FOREIGN KEY (submission_id) REFERENCES submission(submission_id) ON DELETE CASCADE; - - --- --- Name: proc_observations proc_observations_variable_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY proc_observations - ADD CONSTRAINT proc_observations_variable_id_fkey FOREIGN KEY (variable_id) REFERENCES observation_types(variable_id); - - --- --- PostgreSQL database dump complete --- - -CREATE PROCEDURE sp_delete_submission(tag_id_param integer, submission_id_param integer) -LANGUAGE SQL -AS $$ -DELETE FROM submission WHERE tag_id = tag_id_param AND submission_id = submission_id_param -$$; - -CREATE PROCEDURE sp_delete_tag(tag_id_param integer) -LANGUAGE SQL -AS $$ -DELETE FROM submission WHERE tag_id = tag_id_param -$$; - -CREATE PROCEDURE sp_delete_all_tags() -LANGUAGE SQL -AS $$ -TRUNCATE submission CASCADE -$$; - --- --- The following TRIGGER ensures that upon ingestion of an eTUFF file into tagbase-server, --- the data migration procedure is executed. The only remaining manual database administration --- involves the creation of the materialized views. This can simply be done by executing --- 'tagbase-materialized-view.sql' in the pgAdmin4 Web application Query Tool. - CREATE OR REPLACE FUNCTION execute_data_migration() RETURNS trigger AS $BODY$ - BEGIN - --\connect tagbase - -- data_time_series - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING submission b, - observation_types c - WHERE c.variable_name IN ('datetime', - 'depth', - 'temperature', - 'light', - 'internal temperature') - AND a.submission_id = b.submission_id - AND a.variable_id = c.variable_id RETURNING a.date_time, - a.variable_id, - a.variable_value, - a.submission_id, - b.tag_id) - INSERT INTO data_time_series - SELECT * - FROM moved_rows; - -- -- data_position - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING submission b, - observation_types c - WHERE c.variable_name = 'longitude' - AND a.submission_id = b.submission_id - AND a.variable_id = c.variable_id RETURNING a.date_time, - a.variable_id, - a.variable_value, - a.submission_id, - b.tag_id, - cast(('0.0') AS double precision) AS initial_lat) - INSERT INTO data_position (date_time, lat, lon, submission_id, tag_id) - SELECT date_time, - initial_lat, - variable_value, - submission_id, - tag_id - FROM moved_rows; - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING data_position b, - observation_types c - WHERE a.submission_id = b.submission_id - AND a.date_time = b.date_time - AND a.variable_id = c.variable_id - AND c.variable_name = 'latitude' RETURNING a.date_time, - a.variable_id, - a.variable_value, - a.submission_id) - UPDATE data_position - SET lat = moved_rows.variable_value - FROM moved_rows - WHERE data_position.date_time = moved_rows.date_time - AND data_position.submission_id = moved_rows.submission_id; - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING data_position b, - observation_types c - WHERE a.submission_id = b.submission_id - AND a.date_time = b.date_time - AND a.variable_id = c.variable_id - AND c.variable_name = 'longitudeError' RETURNING a.date_time, - a.variable_id, - a.variable_value, - a.submission_id) - UPDATE data_position - SET lon_err = moved_rows.variable_value - FROM moved_rows - WHERE data_position.date_time = moved_rows.date_time - AND data_position.submission_id = moved_rows.submission_id; - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING data_position b, - observation_types c - WHERE a.submission_id = b.submission_id - AND a.date_time = b.date_time - AND a.variable_id = c.variable_id - AND c.variable_name = 'latitudeError' RETURNING a.date_time, - a.variable_id, - a.variable_value, - a.submission_id) - UPDATE data_position - SET lat_err = moved_rows.variable_value - FROM moved_rows - WHERE data_position.date_time = moved_rows.date_time - AND data_position.submission_id = moved_rows.submission_id; - -- -- data_histogram_bin_info - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING observation_types b, - submission c - WHERE a.variable_id = b.variable_id - AND b.variable_name LIKE 'HistDepthBinMin%' - AND a.submission_id = c.submission_id RETURNING a.submission_id AS bin_id, - cast(substring(variable_name, '(\d+)') AS int) AS bin_class, - a.variable_value AS min_value, - '', - a.variable_id AS variable_value) - INSERT INTO data_histogram_bin_info - SELECT * - FROM moved_rows ON CONFLICT DO NOTHING; - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING observation_types b, - submission c - WHERE a.variable_id = b.variable_id - AND b.variable_name LIKE 'HistDepthBinMax%' - AND a.submission_id = c.submission_id RETURNING a.submission_id AS bin_id, - cast(substring(variable_name, '(\d+)') AS int) AS bin_class, - a.variable_value AS max_value) - UPDATE data_histogram_bin_info - SET max_value = moved_rows.max_value - FROM moved_rows - WHERE data_histogram_bin_info.bin_id = moved_rows.bin_id - AND data_histogram_bin_info.bin_class = moved_rows.bin_class; - -- data_histogram_bin_data - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING observation_types b, - submission c, - data_time_series d - WHERE a.variable_id = b.variable_id - AND b.variable_name LIKE 'TimeAt%' - AND a.submission_id = c.submission_id RETURNING a.submission_id, - c.tag_id, - a.submission_id AS bin_id, - cast(substring(variable_name, '(\d+)') AS int) AS bin_class, - a.date_time, - a.variable_value, - d.position_date_time, - a.variable_id) - INSERT INTO data_histogram_bin_data - SELECT * - FROM moved_rows; - -- data_profile - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING observation_types b, - submission c - WHERE a.variable_id = b.variable_id - AND b.variable_name LIKE 'PdtDepth%' - AND a.submission_id = c.submission_id RETURNING a.submission_id, - c.tag_id, - a.variable_id, - a.date_time, - a.variable_value) - INSERT INTO data_profile (submission_id, tag_id, variable_id, date_time, depth) - SELECT submission_id, - tag_id, - variable_id, - date_time, - variable_value - FROM moved_rows; - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING observation_types b, - data_profile c, - submission e - WHERE a.variable_id = b.variable_id - AND b.variable_name LIKE 'PdtTempMin%' - AND a.submission_id = c.submission_id - AND a.date_time = c.date_time - AND e.submission_id = a.submission_id RETURNING a.date_time, - a.variable_id, - a.variable_value AS variable_value, - a.submission_id) - UPDATE data_profile - SET variable_value = moved_rows.variable_value - FROM moved_rows - WHERE data_profile.date_time = moved_rows.date_time - AND data_profile.submission_id = moved_rows.submission_id; - WITH moved_rows AS - ( DELETE - FROM proc_observations a USING observation_types b, - data_profile c, - submission e - WHERE a.variable_id = b.variable_id - AND b.variable_name LIKE 'PdtTempMax%' - AND a.submission_id = c.submission_id - AND a.date_time = c.date_time - AND e.submission_id = a.submission_id RETURNING a.date_time, - a.variable_id, - a.variable_value, - a.submission_id) - UPDATE data_profile - SET variable_value = moved_rows.variable_value - FROM moved_rows - WHERE data_profile.date_time = moved_rows.date_time - AND data_profile.submission_id = moved_rows.submission_id; - -- SQL update statements to link measurement date time with position date time - UPDATE data_time_series - SET position_date_time = - (SELECT date_time - FROM data_position - WHERE data_time_series.submission_id = data_position.submission_id - AND data_time_series.date_time >= data_position.date_time - ORDER BY data_position.date_time DESC - LIMIT 1) - WHERE position_date_time IS NULL; - UPDATE data_histogram_bin_data - SET position_date_time = - (SELECT date_time - FROM data_position - WHERE data_histogram_bin_data.submission_id = data_position.submission_id - AND data_histogram_bin_data.date_time >= data_position.date_time - ORDER BY data_position.date_time DESC - LIMIT 1) - WHERE position_date_time IS NULL; - UPDATE data_profile - SET position_date_time = - (SELECT date_time - FROM data_position - WHERE data_profile.submission_id = data_position.submission_id - AND data_profile.date_time >= data_position.date_time - ORDER BY data_position.date_time DESC - LIMIT 1) - WHERE position_date_time IS NULL; - RETURN NULL; - END; - $BODY$ LANGUAGE plpgsql; - CREATE TRIGGER data_migration AFTER INSERT OR UPDATE ON proc_observations FOR EACH STATEMENT - EXECUTE PROCEDURE execute_data_migration(); diff --git a/tagbase_server/tagbase_server/controllers/tags_controller.py b/tagbase_server/tagbase_server/controllers/tags_controller.py index 9e362d8..267af1b 100644 --- a/tagbase_server/tagbase_server/controllers/tags_controller.py +++ b/tagbase_server/tagbase_server/controllers/tags_controller.py @@ -41,7 +41,7 @@ def delete_tag(tag_id): # noqa: E501 conn = connect() with conn: with conn.cursor() as cur: - cur.execute("CALL sp_delete_tag(%s);", (int(tag_id))) + cur.execute("CALL sp_delete_tag(%s);", (tag_id,)) def delete_tags(): # noqa: E501 @@ -75,17 +75,19 @@ def get_tag(tag_id): # noqa: E501 "SELECT * FROM submission WHERE tag_id = %s ORDER BY submission_id", (tag_id,), ) - results = cur.fetchall() - tags = [] - for row in results: + subs_results = cur.fetchall() + subs = [] + for row in subs_results: cur.execute( "SELECT mt.attribute_name, md.attribute_value FROM metadata_types mt, metadata md " - "WHERE md.attribute_id = mt.attribute_id;" + "WHERE md.attribute_id = mt.attribute_id AND md.submission_id = %s;", + (row[0],), ) meta_dict = {} - for md_row in cur.fetchall(): - meta_dict[md_row[0]] = md_row[1].strip('"') - tags.append( + md_results = cur.fetchall() + for md_row in md_results: + meta_dict[md_row[0]] = md_row[1] + subs.append( { "submission_id": row[0], "tag_id": row[1], @@ -93,10 +95,12 @@ def get_tag(tag_id): # noqa: E501 "filename": row[3], "version": row[4], "notes": row[5], + "hash_sha256": row[6], + "dataset_id": row[7], "metadata": meta_dict, } ) - return Tag200.from_dict({"tag": tags}) + return Tag200.from_dict({"tag": subs}) def list_tags(): # noqa: E501 diff --git a/tagbase_server/tagbase_server/models/tag_submission.py b/tagbase_server/tagbase_server/models/tag_submission.py index 7eac196..44aa07c 100644 --- a/tagbase_server/tagbase_server/models/tag_submission.py +++ b/tagbase_server/tagbase_server/models/tag_submission.py @@ -17,8 +17,10 @@ class TagSubmission(Model): def __init__( self, + dataset_id=None, date_time=None, filename=None, + hash_sha256=None, metadata=None, notes=None, submission_id=None, @@ -27,10 +29,14 @@ def __init__( ): # noqa: E501 """TagSubmission - a model defined in OpenAPI + :param dataset_id: The dataset_id of this TagSubmission. # noqa: E501 + :type dataset_id: int :param date_time: The date_time of this TagSubmission. # noqa: E501 :type date_time: str :param filename: The filename of this TagSubmission. # noqa: E501 :type filename: str + :param hash_sha256: The hash_sha256 of this TagSubmission. # noqa: E501 + :type hash_sha256: str :param metadata: The metadata of this TagSubmission. # noqa: E501 :type metadata: Dict[str, str] :param notes: The notes of this TagSubmission. # noqa: E501 @@ -43,8 +49,10 @@ def __init__( :type version: str """ self.openapi_types = { + "dataset_id": int, "date_time": str, "filename": str, + "hash_sha256": str, "metadata": Dict[str, str], "notes": str, "submission_id": int, @@ -53,8 +61,10 @@ def __init__( } self.attribute_map = { + "dataset_id": "dataset_id", "date_time": "date_time", "filename": "filename", + "hash_sha256": "hash_sha256", "metadata": "metadata", "notes": "notes", "submission_id": "submission_id", @@ -62,8 +72,10 @@ def __init__( "version": "version", } + self._dataset_id = dataset_id self._date_time = date_time self._filename = filename + self._hash_sha256 = hash_sha256 self._metadata = metadata self._notes = notes self._submission_id = submission_id @@ -81,6 +93,29 @@ def from_dict(cls, dikt) -> "TagSubmission": """ return util.deserialize_model(dikt, cls) + @property + def dataset_id(self): + """Gets the dataset_id of this TagSubmission. + + The primary key from the Dataset relation # noqa: E501 + + :return: The dataset_id of this TagSubmission. + :rtype: int + """ + return self._dataset_id + + @dataset_id.setter + def dataset_id(self, dataset_id): + """Sets the dataset_id of this TagSubmission. + + The primary key from the Dataset relation # noqa: E501 + + :param dataset_id: The dataset_id of this TagSubmission. + :type dataset_id: int + """ + + self._dataset_id = dataset_id + @property def date_time(self): """Gets the date_time of this TagSubmission. @@ -127,6 +162,29 @@ def filename(self, filename): self._filename = filename + @property + def hash_sha256(self): + """Gets the hash_sha256 of this TagSubmission. + + SHA256 hash representing the contents of the submission eTUFF file # noqa: E501 + + :return: The hash_sha256 of this TagSubmission. + :rtype: str + """ + return self._hash_sha256 + + @hash_sha256.setter + def hash_sha256(self, hash_sha256): + """Sets the hash_sha256 of this TagSubmission. + + SHA256 hash representing the contents of the submission eTUFF file # noqa: E501 + + :param hash_sha256: The hash_sha256 of this TagSubmission. + :type hash_sha256: str + """ + + self._hash_sha256 = hash_sha256 + @property def metadata(self): """Gets the metadata of this TagSubmission. diff --git a/tagbase_server/tagbase_server/openapi/openapi.yaml b/tagbase_server/tagbase_server/openapi/openapi.yaml index a9ea7be..258114b 100644 --- a/tagbase_server/tagbase_server/openapi/openapi.yaml +++ b/tagbase_server/tagbase_server/openapi/openapi.yaml @@ -538,8 +538,10 @@ components: description: Information for a given tag example: tag: - - date_time: 2022-04-01T04:58:21.319061+00:00 + - dataset_id: 1 + date_time: 2022-04-01T04:58:21.319061+00:00 filename: eTUFF-sailfish-117259_2.txt + hash_sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b977 metadata: person_owner: John Do owner_contect: john@do.net @@ -550,8 +552,10 @@ components: submission_id: 5 tag_id: 3 version: "1" - - date_time: 2022-06-01T05:39:46.896088+00:00 + - dataset_id: 1 + date_time: 2022-06-01T05:39:46.896088+00:00 filename: eTUFF-sailfish-117259_2.txt + hash_sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 metadata: person_owner: Jane Do owner_contect: jane@do.net @@ -611,6 +615,11 @@ components: type: object TagSubmission: properties: + dataset_id: + description: The primary key from the Dataset relation + example: 1 + title: dataset_id + type: integer date_time: description: Local datetime stamp at the time of eTUFF tag data file ingestion example: 2022-04-01T04:58:21.319061+00:00 @@ -621,6 +630,12 @@ components: example: eTUFF-sailfish-117259_2.txt title: filename type: string + hash_sha256: + description: SHA256 hash representing the contents of the submission eTUFF + file + example: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + title: hash_sha256 + type: string metadata: additionalProperties: type: string diff --git a/tagbase_server/tagbase_server/utils/processing_utils.py b/tagbase_server/tagbase_server/utils/processing_utils.py index 86a97f3..e75d78c 100644 --- a/tagbase_server/tagbase_server/utils/processing_utils.py +++ b/tagbase_server/tagbase_server/utils/processing_utils.py @@ -85,9 +85,20 @@ def process_global_attributes(lines, cur, submission_id, metadata, submission_fi return processed_lines - 1 if processed_lines > 0 else 0 -def get_tag_id(cur, submission_filename): - sql_query = "SELECT COALESCE(MAX(tag_id), NEXTVAL('submission_tag_id_seq')) FROM submission WHERE filename = '{}'".format( - submission_filename +def get_tag_id(cur, dataset_id): + """ + Retrieve a 'tag_id' for a submission by performing a lookup on the 'dataset_id'. + If an entry exists for the dataset then grab the existing associated tag_id. If not, + create a new tag_id. + + :param cur: A database cursor + :type cur: cursor connection + + :param dataset_id: Dataset ID as described above. + :type dataset_id: str + """ + sql_query = "SELECT COALESCE(MAX(tag_id), NEXTVAL('submission_tag_id_seq')) FROM submission WHERE dataset_id = '{}'".format( + dataset_id ) logger.debug("Executing: %s", sql_query) cur.execute(sql_query) @@ -96,11 +107,50 @@ def get_tag_id(cur, submission_filename): return result +def get_dataset_id(cur, instrument_name, serial_number, ptt, platform): + """ + Retreive or create a dataset entry for a submission. If a dataset entry exists then grab the existing + id, if not, create a new one. + + :param cur: A database cursor + :type cur: cursor connection + + :param instrument_name: A unique instrument name, made clear to the end user that it is the primary identifier, e.g., iccat_gbyp0008 + :type instrument_name: str + + :param serial_number: A the device internal ID, e.g., 18P0201 + :type serial_number: str + + :param ptt: A satellite platform ID, e.g., 62342 + :type ptt: str + + :param platform: The species code/common name on which the device was deployed, e.g., Thunnus thynnus + :type platform: str + """ + cur.execute( + "SELECT COALESCE(MAX(dataset_id), NEXTVAL('dataset_dataset_id_seq')) FROM dataset WHERE instrument_name = '{}' AND serial_number = '{}' AND ptt = '{}' AND platform = '{}'".format( + instrument_name, serial_number, ptt, platform + ) + ) + dataset_id = cur.fetchone()[0] + logger.debug("Result: %s", dataset_id) + cur.execute( + "INSERT INTO dataset (dataset_id, instrument_name, serial_number, ptt, platform) VALUES ('{}', '{}', '{}', '{}', '{}') ON CONFLICT DO NOTHING".format( + dataset_id, instrument_name, serial_number, ptt, platform + ) + ) + logger.debug( + "Successful INSERT of '%s' into 'dataset' table.", + dataset_id, + ) + return dataset_id + + def insert_new_submission( - cur, tag_id, submission_filename, notes, version, hash_sha256 + cur, tag_id, submission_filename, notes, version, hash_sha256, dataset_id ): cur.execute( - "INSERT INTO submission (tag_id, filename, date_time, notes, version, hash_sha256) VALUES (%s, %s, %s, %s, %s, %s)", + "INSERT INTO submission (tag_id, filename, date_time, notes, version, hash_sha256, dataset_id) VALUES (%s, %s, %s, %s, %s, %s, %s)", ( tag_id, submission_filename, @@ -108,12 +158,9 @@ def insert_new_submission( notes, version, hash_sha256, + dataset_id, ), ) - logger.info( - "Successful INSERT of '%s' into 'submission' table.", - submission_filename, - ) cur.execute("SELECT currval('submission_submission_id_seq')") submission_id = cur.fetchone()[0] @@ -154,6 +201,50 @@ def detect_duplicate(cursor, hash_sha256): return False +def get_dataset_elements(submission_filename): + """ + Extract 'instrument_name', 'serial_number', 'ptt', 'platform', + 'referencetrack_included' and values from + global attributes. + + :param submission_filename: The file from which we wish to extract certain global attributes + :type submission_filename: str + """ + raw_global_attributes = [] + with open(submission_filename, "rb") as data: + for line in iter( + lambda: data.readline().decode("utf-8", "ignore").rstrip(), "// data:" + ): + raw_global_attributes.append(line) + instrument_name = "unknown" + serial_number = "unknown" + ppt = "unknown" + platform = "unknown" + referencetrack_included = "0" + for line in raw_global_attributes: + strp_line = line.strip() + if strp_line.startswith("//"): + continue + value = strp_line[1:].split(" = ")[1].replace('"', "") + if strp_line.startswith(":instrument_name"): + instrument_name = value + elif strp_line.startswith(":serial_number"): + serial_number = value + elif strp_line.startswith(":ptt"): + ptt = value + elif strp_line.startswith(":platform"): + platform = value + elif strp_line.startswith(":referencetrack_included"): + referencetrack_included = int(value) + return ( + instrument_name, + serial_number, + ptt, + platform, + referencetrack_included, + ) + + def process_etuff_file(file, version=None, notes=None): start = time.perf_counter() submission_filename = file # full path name is now preferred rather than - file[file.rindex("/") + 1 :] @@ -176,10 +267,33 @@ def process_etuff_file(file, version=None, notes=None): ) return 1 - tag_id = get_tag_id(cur, submission_filename) + ( + instrument_name, + serial_number, + ptt, + platform, + referencetrack_included, + ) = get_dataset_elements(submission_filename) + + dataset_id = get_dataset_id( + cur, instrument_name, serial_number, ptt, platform + ) + logger.info( + "Successfully reserved dataset_id: '%s' for '%s'.", + dataset_id, + submission_filename, + ) + + tag_id = get_tag_id(cur, dataset_id) submission_id = insert_new_submission( - cur, tag_id, submission_filename, notes, version, hash_sha256 + cur, + tag_id, + submission_filename, + notes, + version, + hash_sha256, + dataset_id, ) logger.info( "Successful INSERT of '%s' into 'submission' table.", @@ -291,7 +405,7 @@ def process_etuff_file(file, version=None, notes=None): a = x[0] b = x[1] c = x[2] - mog = cur.mogrify("(%s, %s, %s, %s)", (a, b, str(c), tag_id)) + mog = cur.mogrify("(%s, %s, %s, %s)", (a, b, str(c).strip('"'), tag_id)) cur.execute( "INSERT INTO metadata (submission_id, attribute_id, attribute_value, tag_id) VALUES " + mog.decode("utf-8") @@ -335,10 +449,19 @@ def process_etuff_file(file, version=None, notes=None): # copy buffer to db s_time = time.perf_counter() logger.info( - "Copying memory buffer to 'proc_observations' and executing 'data_migration' TRIGGER." + "Copying memory buffer to 'proc_observations' and executing data migration." ) try: cur.copy_from(buffer, "proc_observations", sep=",") + ref = bool(referencetrack_included) + logger.debug( + "Executing sp_execute_data_migration(%s, %s);", + int(submission_id), + ref, + ) + cur.execute( + "CALL sp_execute_data_migration(%s, %s);", (int(submission_id), ref) + ) except (Exception, psycopg2.DatabaseError) as error: logger.error("Error: %s", error) conn.rollback()