Skip to content

Commit 7d041b5

Browse files
dragonstylejjallaire
authored andcommitted
Improve the creation of citations
Bug #1609 has a number of great suggestions for improving citation metadata for documents. This addresses most of those suggestions, generating additional `citation_` meta tags based upon document citation data as well as making our handling of citation data more consistent. We will newly output: `citation_abstract` `citation_doi` `citation_pmid` `citation_language` `citation_keywords` `citation_pdf_url` `citation_public_url` `citation_abstract_url` `citation_fulltext_url` `citation_journal_abbrev` `citation_publisher` Based upon the `type` specified: `citation_conference_title`/`citation_conference` `citation_dissertation_institution` `citation_book_title` General approach is to allow specification of citation data with names and structure that corresponds with CSL schema and to map those values onto the appropriate meta names (rather than matching the meta names directly).
1 parent 2f45cc6 commit 7d041b5

File tree

10 files changed

+270
-22
lines changed

10 files changed

+270
-22
lines changed

src/core/csl.ts

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,18 @@
55
*
66
*/
77

8-
import { parsePandocDate } from "./date.ts";
8+
import { formatDate, parsePandocDate } from "./date.ts";
9+
10+
export const kPdfUrl = "pdf-url";
11+
export const kAbstractUrl = "abstract-url";
12+
export const kEIssn = "eissn";
13+
14+
export interface CSLExtras {
15+
[kPdfUrl]?: string;
16+
[kAbstractUrl]?: string;
17+
[kEIssn]?: string;
18+
keywords?: string[];
19+
}
920

1021
export interface CSL extends Record<string, unknown> {
1122
// The id. This is technically required, but some providers (like crossref) don't provide
@@ -32,14 +43,16 @@ export interface CSL extends Record<string, unknown> {
3243
// Array of Contributors
3344
author?: CSLName[];
3445

46+
editor?: CSLName[];
47+
3548
// Earliest of published-print and published-online
3649
issued?: CSLDate;
3750

3851
// Full titles of the containing work (usually a book or journal)
3952
"container-title"?: string;
4053

4154
// Short titles of the containing work (usually a book or journal)
42-
"short-container-title"?: string;
55+
"container-title-short"?: string;
4356

4457
// Issue number of an article's journal
4558
issue?: string;
@@ -66,7 +79,9 @@ export interface CSL extends Record<string, unknown> {
6679
// primarily because they may need to be sanitized
6780
ISSN?: string;
6881
ISBN?: string;
82+
PMID?: string;
6983
"original-title"?: string;
84+
"collection-title"?: string;
7085
"short-title"?: string;
7186
subtitle?: string;
7287
subject?: string;
@@ -247,7 +262,8 @@ export function cslDate(dateRaw: unknown): CSLDate | undefined {
247262
date.getMonth() + 1,
248263
date.getDate(),
249264
]],
250-
raw: dateRaw,
265+
literal: formatDate(date, "YYYY-MM-DD"),
266+
raw: formatDate(date, "YYYY-MM-DD"),
251267
};
252268
}
253269
return undefined;

src/format/html/format-html-appendix.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -326,12 +326,12 @@ function creativeCommonsUrl(license: string, lang?: string) {
326326
}
327327

328328
async function generateCite(input: string, format: Format, offset?: string) {
329-
const entry = documentCSL(input, format, "webpage", offset);
330-
if (entry) {
329+
const { csl } = documentCSL(input, format, "webpage", offset);
330+
if (csl) {
331331
// Render the HTML and BibTeX form of this document
332332
const cslPath = getCSLPath(input, format);
333-
const html = await renderHtml(entry, cslPath);
334-
const bibtex = await renderBibTex(entry);
333+
const html = await renderHtml(csl, cslPath);
334+
const bibtex = await renderBibTex(csl);
335335
return {
336336
html,
337337
bibtex,

src/format/html/format-html-meta.ts

Lines changed: 93 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,14 @@
88
import { kHtmlEmptyPostProcessResult } from "../../command/render/constants.ts";
99
import { Format, Metadata } from "../../config/types.ts";
1010
import { bibliographyCslJson } from "../../core/bibliography.ts";
11-
import { CSL, cslDateToEDTFDate } from "../../core/csl.ts";
11+
import {
12+
CSL,
13+
cslDateToEDTFDate,
14+
CSLExtras,
15+
kAbstractUrl,
16+
kEIssn,
17+
kPdfUrl,
18+
} from "../../core/csl.ts";
1219
import { Document } from "../../core/deno-dom.ts";
1320
import { encodeAttributeValue } from "../../core/html.ts";
1421
import { kWebsite } from "../../project/types/website/website-constants.ts";
@@ -24,8 +31,8 @@ export function metadataPostProcessor(
2431
) {
2532
return async (doc: Document) => {
2633
if (googleScholarEnabled(format)) {
27-
const csl = documentCSL(input, format, "webpage", offset);
28-
const documentMetadata = googleScholarMeta(csl);
34+
const { csl, extras } = documentCSL(input, format, "webpage", offset);
35+
const documentMetadata = googleScholarMeta(csl, extras);
2936
const referenceMetadata = await googleScholarReferences(input, format);
3037
[...documentMetadata, ...referenceMetadata].forEach((meta) => {
3138
writeMetaTag(meta.name, meta.content, doc);
@@ -56,6 +63,7 @@ interface MetaTagData {
5663

5764
function googleScholarMeta(
5865
csl: CSL,
66+
extras: CSLExtras,
5967
): MetaTagData[] {
6068
// The scholar metadata that we'll generate into
6169
const scholarMeta: MetaTagData[] = [];
@@ -66,6 +74,14 @@ function googleScholarMeta(
6674
write("citation_title", csl.title);
6775
}
6876

77+
if (csl.abstract) {
78+
write("citation_abstract", csl.abstract);
79+
}
80+
81+
if (extras.keywords) {
82+
write("citation_keywords", extras.keywords);
83+
}
84+
6985
// Authors
7086
if (csl.author) {
7187
csl.author.forEach((author) => {
@@ -76,6 +92,26 @@ function googleScholarMeta(
7692
});
7793
}
7894

95+
// Editors
96+
if (csl.editor) {
97+
csl.editor.forEach((editor) => {
98+
write(
99+
"citation_editor",
100+
editor.literal || `${editor.given} ${editor.family}`,
101+
);
102+
});
103+
}
104+
105+
if (csl.issued) {
106+
const edtfIssued = cslDateToEDTFDate(csl.issued);
107+
write("citation_publication_date", edtfIssued);
108+
write("citation_cover_date", edtfIssued);
109+
const parts = csl.issued["date-parts"];
110+
if (parts) {
111+
write("citation_year", parts[0][0]);
112+
}
113+
}
114+
79115
if (csl["available-date"]) {
80116
write("citation_online_date", cslDateToEDTFDate(csl["available-date"]));
81117
}
@@ -87,14 +123,22 @@ function googleScholarMeta(
87123
);
88124
}
89125

90-
if (csl.issued) {
91-
write("citation_publication_date", cslDateToEDTFDate(csl.issued));
126+
if (extras[kPdfUrl]) {
127+
write("citation_pdf_url", extras[kPdfUrl]);
128+
}
129+
130+
if (extras[kAbstractUrl]) {
131+
write("citation_abstract_html_url", extras[kAbstractUrl]);
92132
}
93133

94134
if (csl.issue) {
95135
write("citation_issue", csl.issue);
96136
}
97137

138+
if (csl.DOI) {
139+
write("citation_doi", csl.DOI);
140+
}
141+
98142
if (csl.ISBN) {
99143
write("citation_isbn", csl.ISBN);
100144
}
@@ -103,10 +147,22 @@ function googleScholarMeta(
103147
write("citation_issn", csl.ISSN);
104148
}
105149

150+
if (extras[kEIssn]) {
151+
write("citation_eissn", extras[kEIssn]);
152+
}
153+
154+
if (csl.PMID) {
155+
write("citation_pmid", csl.PMID);
156+
}
157+
106158
if (csl.volume) {
107159
write("citation_volume", csl.volume);
108160
}
109161

162+
if (csl.language) {
163+
write("citation_language", csl.language);
164+
}
165+
110166
if (csl["page-first"]) {
111167
write("citation_firstpage", csl["page-first"]);
112168
}
@@ -117,9 +173,17 @@ function googleScholarMeta(
117173

118174
const type = csl.type;
119175
if (type === "paper-conference") {
120-
write("citation_conference_title", csl["container-title"]);
176+
if (csl["container-title"]) {
177+
write("citation_conference_title", csl["container-title"]);
178+
}
179+
180+
if (csl.publisher) {
181+
write("citation_conference", csl.publisher);
182+
}
121183
} else if (type === "thesis") {
122-
write("citation_dissertation_institution", csl.publisher);
184+
if (csl.publisher) {
185+
write("citation_dissertation_institution", csl.publisher);
186+
}
123187
} else if (type === "report") {
124188
if (csl.publisher) {
125189
write(
@@ -133,8 +197,28 @@ function googleScholarMeta(
133197
csl.number,
134198
);
135199
}
200+
} else if (type === "book") {
201+
if (csl["container-title"]) {
202+
write("citation_book_title", csl["container-title"]);
203+
}
204+
} else if (type === "chapter") {
205+
write("citation_inbook_title", csl["container-title"]);
136206
} else {
137-
write("citation_journal_title", csl["container-title"]);
207+
if (csl["container-title"]) {
208+
write("citation_journal_title", csl["container-title"]);
209+
}
210+
211+
if (csl["container-title-short"]) {
212+
write("citation_journal_abbrev", csl["container-title-short"]);
213+
}
214+
215+
if (csl.publisher) {
216+
write("citation_publisher", csl.publisher);
217+
}
218+
}
219+
220+
if (csl["collection-title"]) {
221+
write("citation_series_title", csl["collection-title"]);
138222
}
139223

140224
return scholarMeta;
@@ -149,7 +233,7 @@ async function googleScholarReferences(input: string, format: Format) {
149233

150234
if (references) {
151235
references.forEach((reference) => {
152-
const refMetas = googleScholarMeta(reference);
236+
const refMetas = googleScholarMeta(reference, {});
153237
const metaStrs = refMetas.map((refMeta) => {
154238
return `${refMeta.name}=${refMeta.content};`;
155239
});

src/quarto-core/attribution/document.ts

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,13 @@ import { pathWithForwardSlashes } from "../../core/path.ts";
2020
import {
2121
CSL,
2222
cslDate,
23+
CSLExtras,
2324
cslNames,
2425
CSLType,
2526
cslType,
27+
kAbstractUrl,
28+
kEIssn,
29+
kPdfUrl,
2630
suggestId,
2731
} from "../../core/csl.ts";
2832
import {
@@ -35,6 +39,7 @@ const kCitation = "citation";
3539
const kURL = "URL";
3640
const kId = "id";
3741
const kCitationKey = "citation-key";
42+
const kEditor = "editor";
3843

3944
const kType = "type";
4045
const kCategories = "categories";
@@ -81,7 +86,7 @@ export function documentCSL(
8186
format: Format,
8287
defaultType: CSLType,
8388
offset?: string,
84-
) {
89+
): { csl: CSL; extras: CSLExtras } {
8590
const citationMetadata = citationMeta(format);
8691

8792
// The type
@@ -104,12 +109,20 @@ export function documentCSL(
104109

105110
// Author
106111
const authors = parseAuthor(
107-
format.metadata[kAuthor] || citationMetadata[kAuthor],
112+
citationMetadata[kAuthor] || format.metadata[kAuthor],
108113
);
109114
csl.author = cslNames(
110115
authors?.filter((auth) => auth !== undefined).map((auth) => auth?.name),
111116
);
112117

118+
// Editors
119+
const editors = parseAuthor(citationMetadata[kEditor]);
120+
csl.editor = cslNames(
121+
editors?.filter((editor) => editor !== undefined).map((editor) =>
122+
editor?.name
123+
),
124+
);
125+
113126
// Categories
114127
const categories =
115128
(citationMetadata[kCategories] || format.metadata[kCategories]);
@@ -371,7 +384,29 @@ export function documentCSL(
371384
csl[kCustom] = custom;
372385
}
373386

374-
return csl;
387+
// Process anything extra
388+
const extras: CSLExtras = {};
389+
if (format.metadata.keywords) {
390+
const kw = format.metadata.keywords;
391+
extras.keywords = Array.isArray(kw) ? kw : [kw];
392+
}
393+
394+
// Process extra URLS
395+
if (citationMetadata[kPdfUrl]) {
396+
extras[kPdfUrl] = citationMetadata[kPdfUrl] as string;
397+
}
398+
if (citationMetadata[kAbstractUrl]) {
399+
extras[kAbstractUrl] = citationMetadata[kAbstractUrl] as string;
400+
}
401+
402+
if (citationMetadata[kEIssn]) {
403+
extras[kEIssn] = citationMetadata[kEIssn] as string;
404+
}
405+
406+
return {
407+
csl,
408+
extras,
409+
};
375410
}
376411

377412
interface PageRange {
@@ -400,10 +435,14 @@ function synthesizeCitationUrl(
400435
if (baseUrl && outputFile && offset) {
401436
const rootDir = Deno.realPathSync(join(dirname(input), offset));
402437
if (outputFile === "index.html") {
403-
return `${baseUrl}/${pathWithForwardSlashes(relative(rootDir, dirname(input)))}`;
438+
return `${baseUrl}/${
439+
pathWithForwardSlashes(relative(rootDir, dirname(input)))
440+
}`;
404441
} else {
405-
return `${baseUrl}/${pathWithForwardSlashes(
406-
relative(rootDir, join(dirname(input), outputFile)))
442+
return `${baseUrl}/${
443+
pathWithForwardSlashes(
444+
relative(rootDir, join(dirname(input), outputFile)),
445+
)
407446
}`;
408447
}
409448
} else {

src/resources/editor/tools/vs-code.mjs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10015,6 +10015,11 @@ var require_yaml_intelligence_resources = __commonJS({
1001510015
description: "Abstract of the item (e.g. the abstract of a journal article)"
1001610016
}
1001710017
},
10018+
"abstract-url": {
10019+
string: {
10020+
description: "A url to the abstract for this item."
10021+
}
10022+
},
1001810023
accessed: {
1001910024
ref: "csl-date",
1002010025
description: "Date the item has been accessed."
@@ -10228,6 +10233,11 @@ var require_yaml_intelligence_resources = __commonJS({
1022810233
},
1022910234
hidden: true
1023010235
},
10236+
"fulltext-url": {
10237+
string: {
10238+
description: "A url to the full text for this item."
10239+
}
10240+
},
1023110241
genre: {
1023210242
string: {
1023310243
description: {
@@ -10424,6 +10434,11 @@ var require_yaml_intelligence_resources = __commonJS({
1042410434
ref: "csl-person",
1042510435
description: "Producer (e.g. of a television or radio broadcast)."
1042610436
},
10437+
"public-url": {
10438+
string: {
10439+
description: "A public url for this item."
10440+
}
10441+
},
1042710442
publisher: {
1042810443
string: {
1042910444
description: "The publisher of the item."

0 commit comments

Comments
 (0)