Skip to content

Commit 39b49d0

Browse files
authored
Merge pull request #76 from subugoe/ta_match
Refactor TA matching to avoid confusion about the main institution
2 parents 3903f4e + c01f907 commit 39b49d0

File tree

9 files changed

+108
-122
lines changed

9 files changed

+108
-122
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: hoaddata
22
Type: Package
33
Title: Data about hybrid open access journal publishing
4-
Version: 0.2.98
4+
Version: 0.2.99
55
Authors@R:
66
person("Najko", "Jahn", , "najko.jahn@sub.uni-goettingen.de",
77
role = c("aut", "cre"),

data/cc_articles.rda

210 KB
Binary file not shown.

data/cr_md.rda

2.77 KB
Binary file not shown.

data/cr_upw.rda

2.48 KB
Binary file not shown.

data/jct_oalex_venues.rda

34 Bytes
Binary file not shown.

data/jn_aff.rda

53.2 KB
Binary file not shown.

data/jn_ind.rda

3.65 KB
Binary file not shown.

inst/sql/jct_inst_enriched.sql

Lines changed: 45 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,46 @@
1-
-- This query is designed to identify institutions participating in transformative agreements.
2-
-- JCT does not cover all associated institutions, eg. university hospitals and MPG institutes.
3-
-- Here, we include associated institutions to ensure comprehensive coverage
4-
5-
-- Common Table Expression (CTE) - matching:
6-
WITH matching AS (
7-
-- Part 1: Retrieve data from 'oalex_inst' to include associated institutions
8-
(
9-
SELECT
10-
esac_id, -- ESAC TA ID
11-
oalex_inst.ror AS ror_main, -- ROR identifier for the main institution
12-
inst.ror AS ror -- ROR identifier for associated institutions
13-
FROM
14-
`subugoe-collaborative.hoaddata.jct_inst` AS jct_inst
15-
INNER JOIN
16-
`subugoe-collaborative.openalex.institutions` as oalex_inst
17-
ON
18-
jct_inst.ror_id = oalex_inst.ror
19-
INNER JOIN
20-
UNNEST(oalex_inst.associated_institutions) as inst
21-
ORDER BY
22-
esac_id
23-
)
1+
WITH
2+
obtain_associated_ror_ids AS (
3+
-- Part 1: Retrieve data from OpenAlex institution table to include associated institutions
4+
SELECT
5+
esac_id, -- ESAC TA ID
6+
jct_inst.ror_id AS ror_jct, -- ROR identifier from JCT
7+
inst.ror AS ror_associated -- ROR identifier for associated institutions from OpenAlex
8+
FROM
9+
`subugoe-collaborative.hoaddata.jct_inst` AS jct_inst
10+
LEFT JOIN
11+
`subugoe-collaborative.openalex.institutions` AS oalex_inst
12+
ON
13+
jct_inst.ror_id = oalex_inst.ror
14+
LEFT JOIN
15+
UNNEST(oalex_inst.associated_institutions) AS inst
16+
ORDER BY
17+
esac_id
18+
),
19+
create_matching_table AS (
20+
SELECT
21+
esac_id,
22+
'ror_jct' AS ror_type,
23+
ror_jct AS ror
24+
FROM
25+
obtain_associated_ror_ids
2426
UNION ALL
25-
-- Part 2: Retrieve data from 'jct_inst' for the main institutions
26-
SELECT
27-
esac_id, -- ESAC TA ID
28-
ror_id AS ror_main, -- ROR identifier for the main institution
29-
ror_id AS ror -- ROR identifier for the main institution (no associated institutions)
30-
FROM
31-
`subugoe-collaborative.hoaddata.jct_inst` AS jct_inst
32-
ORDER BY
33-
esac_id, ror_main
34-
)
35-
36-
-- Main Query:
37-
-- Select data from the 'matching' CTE and join it with 'jct_inst' to retrieve additional details.
38-
SELECT
39-
DISTINCT matching.*, -- Data from the 'matching' CTE
40-
start_date, -- Start date of participation in transformative agreement
41-
end_date -- End date of participation in transformative agreement
42-
FROM
43-
matching
44-
INNER JOIN
45-
`subugoe-collaborative.hoaddata.jct_inst` AS jct_inst
46-
ON
47-
matching.esac_id = jct_inst.esac_id
48-
ORDER BY
49-
esac_id, ror_main
27+
SELECT
28+
esac_id,
29+
'ror_associated' AS ror_type,
30+
ror_associated AS ror
31+
FROM
32+
obtain_associated_ror_ids
33+
)
34+
SELECT
35+
DISTINCT create_matching_table.*,
36+
start_date,
37+
end_date
38+
FROM
39+
create_matching_table
40+
INNER JOIN
41+
`subugoe-collaborative.hoaddata.jct_inst` AS jct_inst
42+
ON
43+
create_matching_table.esac_id = jct_inst.esac_id
44+
ORDER BY
45+
esac_id,
46+
ror

inst/sql/ta_oa_inst.sql

Lines changed: 62 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,84 +1,73 @@
1-
-- Publication statistics for institutions participating in transformative agreements (TA)
2-
-- The resulting table allows us to determine the impact TAs have on the open access publication activity of participating institutions.
1+
-- Combine ESAC TA data with institution information
2+
WITH esac_journals AS (
3+
SELECT DISTINCT
4+
hybrid_jns.issn_l AS matching_issn,
5+
hybrid_jns.esac_id,
6+
esac_publisher,
7+
start_date,
8+
EXTRACT(YEAR FROM start_date) AS start_year,
9+
end_date,
10+
EXTRACT(YEAR FROM end_date) AS end_year,
11+
issn_l,
12+
jct_inst.ror,
13+
jct_inst.ror_type,
14+
oalex.id AS oalex_inst_id
15+
FROM `subugoe-collaborative.hoaddata.jct_hybrid_jns` AS hybrid_jns
16+
-- Join with participating institutions
17+
INNER JOIN `subugoe-collaborative.hoaddata.jct_inst_enriched` AS jct_inst
18+
ON jct_inst.esac_id = hybrid_jns.esac_id
19+
-- Match with OpenAlex institutions
20+
INNER JOIN `subugoe-collaborative.openalex.institutions` AS oalex
21+
ON jct_inst.ror = oalex.ror
22+
),
323

4-
WITH
5-
-- ESAC TA / institutions matching table
6-
esac_journals AS (
7-
SELECT
8-
DISTINCT hybrid_jns.issn_l AS matching_issn, -- Extracting ISSN
9-
hybrid_jns.esac_id, -- ESAC ID of the TA
10-
esac_publisher, -- Publisher information
11-
start_date, -- Agreement start date
12-
EXTRACT(YEAR FROM start_date) AS start_year, -- Extracting the year from start_date
13-
end_date, -- Agreement end date
14-
EXTRACT(YEAR FROM end_date) AS end_year, -- Extracting the year from end_date
15-
issn_l, -- ISSN of the journal
16-
jct_inst.ror, -- ROR ID of the institution
17-
jct_inst.ror_main, -- Main ROR ID of the institution
18-
oalex.id AS oalex_inst_id -- OpenAlex institution ID
19-
FROM
20-
`subugoe-collaborative.hoaddata.jct_hybrid_jns` AS hybrid_jns
21-
-- Join with participating institutions
22-
INNER JOIN
23-
`subugoe-collaborative.hoaddata.jct_inst_enriched` AS jct_inst
24-
ON
25-
jct_inst.esac_id = hybrid_jns.esac_id
26-
-- OpenAlex / ROR Matching
27-
INNER JOIN
28-
`subugoe-collaborative.openalex.institutions` AS oalex
29-
ON
30-
jct_inst.ror = oalex.ror
31-
),
24+
-- Gather publication data for institutions per year and link to TA
25+
inst_per_year AS (
26+
SELECT DISTINCT
27+
esac_journals.*,
28+
oalex_inst.doi,
29+
oalex_inst.cr_year,
30+
oa.cc,
31+
cr.issued,
32+
esac_journals.ror as ror_matched,
33+
esac_journals.ror_type as ror_type_,
34+
-- Determine if publication is within TA date range
35+
CASE
36+
WHEN (DATE(cr.issued) BETWEEN DATE(start_date) AND DATE(end_date)) THEN TRUE
37+
ELSE FALSE
38+
END AS ta,
39+
-- Check if publication has a CC license
40+
CASE
41+
WHEN oa.cc IS NOT NULL THEN TRUE
42+
ELSE FALSE
43+
END AS has_cc
44+
FROM esac_journals
45+
-- Join with OpenAlex institution data
46+
INNER JOIN `subugoe-collaborative.hoaddata.cr_openalex_inst_full` AS oalex_inst
47+
ON esac_journals.oalex_inst_id = oalex_inst.id
48+
AND oalex_inst.issn_l = esac_journals.matching_issn
49+
-- Left join to include publications without CC licenses
50+
LEFT JOIN `subugoe-collaborative.hoaddata.cc_openalex_inst` AS oa
51+
ON oalex_inst.doi = oa.doi
52+
-- Join with Crossref data for publication dates
53+
INNER JOIN `subugoe-collaborative.cr_instant.snapshot` AS cr
54+
ON oalex_inst.doi = cr.doi
55+
)
3256

33-
-- Publications per year, institution, and agreement
34-
inst_per_year AS (
35-
SELECT
36-
DISTINCT esac_journals.*, -- Include columns from the previous CTE
37-
oalex_inst.doi, -- DOI of the publication
38-
oalex_inst.cr_year, -- Publication year
39-
cc, -- CC license information
40-
cr.issued, -- Date of publication
41-
esac_journals.ror AS ror_rel, -- ROR ID related to the journal
42-
esac_journals.ror_main AS ror_id, -- Main ROR ID related to the journal
43-
CASE
44-
WHEN (DATE(cr.issued) BETWEEN DATE(start_date) AND DATE(end_date)) THEN TRUE -- Check if publication is within the agreement's date range
45-
ELSE FALSE
46-
END AS ta, -- Flag indicating if the publication is within the agreement
47-
CASE
48-
WHEN cc IS NOT NULL THEN TRUE -- Check if the publication has a CC license
49-
ELSE FALSE
50-
END AS has_cc -- Flag indicating if the publication has a CC license
51-
FROM
52-
esac_journals
53-
INNER JOIN
54-
`subugoe-collaborative.hoaddata.cr_openalex_inst_full` AS oalex_inst
55-
ON
56-
esac_journals.oalex_inst_id = oalex_inst.id
57-
AND oalex_inst.issn_l = esac_journals.matching_issn
58-
LEFT JOIN
59-
`subugoe-collaborative.hoaddata.cc_openalex_inst` AS oa
60-
ON
61-
oalex_inst.doi = oa.doi
62-
INNER JOIN
63-
`subugoe-collaborative.cr_instant.snapshot` AS cr
64-
ON
65-
oalex_inst.doi = cr.doi
66-
)
67-
68-
-- Selecting relevant columns for the final result
69-
SELECT
57+
-- Final query to select relevant columns for analysis
58+
SELECT DISTINCT
7059
doi,
7160
cr_year,
7261
matching_issn AS issn_l,
7362
esac_id AS ta_journal_portfolio,
7463
esac_publisher,
7564
ta AS ta_active,
7665
cc,
77-
ror_rel AS ror_matching,
78-
ror_id AS ror_main
79-
FROM
80-
inst_per_year
81-
-- Ordering the result set
66+
ror_matched as ror,
67+
ror_type_ as ror_type
68+
FROM inst_per_year
69+
-- Order results by TA journal portfolio, publication year (descending) and DOI
8270
ORDER BY
8371
ta_journal_portfolio,
84-
cr_year DESC;
72+
cr_year DESC,
73+
doi

0 commit comments

Comments
 (0)