Skip to content

Commit 03b5973

Browse files
committed
Merge hyperref and hyperxmp metadata improvements (PR brucemiller#2365)
2 parents db9007b + 4fa0606 commit 03b5973

File tree

9 files changed

+194
-25
lines changed

9 files changed

+194
-25
lines changed

MANIFEST

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,6 +1009,9 @@ t/complex/hyperchars.xml
10091009
t/complex/hypertest.pdf
10101010
t/complex/hypertest.tex
10111011
t/complex/hypertest.xml
1012+
t/complex/hyperxmp.pdf
1013+
t/complex/hyperxmp.tex
1014+
t/complex/hyperxmp.xml
10121015
t/complex/labelled.pdf
10131016
t/complex/labelled.tex
10141017
t/complex/labelled.xml

lib/LaTeXML/Engine/Base_Schema.pool.ltxml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ RequireResource('LaTeXML.css');
8383
"ma" => "http://www.w3.org/ns/ma-ont#",
8484
"og" => "http://ogp.me/ns#",
8585
"owl" => "http://www.w3.org/2002/07/owl#",
86+
"prism" => "http://prismstandard.org/namespaces/basic/3.0/",
8687
"rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
8788
"rdfa" => "http://www.w3.org/ns/rdfa#",
8889
"rdfs" => "http://www.w3.org/2000/01/rdf-schema#",

lib/LaTeXML/Package/hyperref.sty.ltxml

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -86,21 +86,19 @@ foreach my $option ( # 3.1 General Options
8686
DeclareOption($option, undef); }
8787

8888
# \hypersetup{keyvals} configures various parameters,
89-
# for each pdf keyword, provide [property,(content|resource),datatype]
90-
our %pdfkey_property = (
91-
baseurl => '', # xmp:BaseURL ??
92-
pdfauthor => ['dcterms:creator', 'content'],
93-
pdfkeywords => ['dcterms:subject', 'content'], # & pdf:Keywords
94-
pdflang => ['dcterms:language', 'content'],
95-
pdfproducer => '', # pdf:Producer & xmp:CreatorTool
96-
pdfsubject => ['dcterms:subject', 'content'],
97-
pdftitle => ['dcterms:title', 'content'],
98-
# Include hyperxmp's keywords, as well.
99-
pdfauthortitle => '', # photoshop:AuthorsPosition
100-
pdfcaptionwriter => '', # photoshop:CaptionWriter !?!?!?
101-
pdfcopyright => ['dcterms:rights', 'content'], # & xmpRights:Marked
102-
pdflicenseurl => ['cc:licence', 'resource'], # xmpRights:WebStatement
103-
pdfmetalang => '', # dcterms:language ??
89+
# for each pdf keyword, provide [property,(content|resource),datatype,langsupport]
90+
# %pdfkey_property may have already been populated by hyperxmp
91+
our %pdfkey_property = (%pdfkey_property,
92+
baseurl => '', # xmp:BaseURL ??
93+
pdfauthor => ['dcterms:creator', 'content'],
94+
pdfcreationdate => '', # xmp:CreateDate
95+
pdfkeywords => ['dcterms:subject', 'content'], # & pdf:Keywords
96+
pdflang => ['dcterms:language', 'content'],
97+
pdfmoddate => '', # xmp:ModifyDate
98+
pdfproducer => '', # pdf:Producer & xmp:CreatorTool
99+
pdfsubject => ['dcterms:description', 'content'],
100+
pdftitle => ['dcterms:title', 'content'],
101+
pdftrapped => '', # pdf:Trapped
104102
);
105103
# date=>dcterms:date xmp:CreateDate xmp:ModifyDate xmp:MetadataDate ?
106104
# document identifier => xmlMM:DocumentID
@@ -126,21 +124,24 @@ DefPrimitive('\hypersetup RequiredKeyVals:Hyp', sub {
126124
my @pairs = $kv->getPairs;
127125
while (@pairs) {
128126
my ($key, $value) = (shift(@pairs), shift(@pairs));
129-
hyperref_setoption($key, Digest($value)); }
127+
hyperref_setoption($key, Expand($value)); }
130128
return; });
131129

132130
PushValue('@at@end@document', T_CS('\@add@PDF@RDFa@triples'));
133131

134132
DefConstructor('\@add@PDF@RDFa@triples', sub {
135133
my ($document, $xproperty, $content) = @_;
136134
if (my $root = $document->getDocument->documentElement) {
135+
my $metalang = LookupMapping('Hyperref_options', 'pdfmetalang') // LookupMapping('Hyperref_options', 'pdflang');
137136
foreach my $key (LookupMappingKeys('Hyperref_options')) {
138137
if (my $entry = ($pdfkey_property{$key})) {
139-
my ($property, $object, $datatype) = @$entry;
140-
my $value = LookupMapping('Hyperref_options', $key);
141-
my $node = $document->openElementAt($root, 'ltx:rdf',
142-
property => $property, $object => $value,
143-
($datatype ? (datatype => $datatype) : ()));
138+
my ($property, $object, $datatype, $langsupport) = @$entry;
139+
my $value = ToString(LookupMapping('Hyperref_options', $key));
140+
my ($lang, $localizedValue) = $value =~ m/^\[([^]]*)\](.*)/;
141+
my $node = $document->openElementAt($root, 'ltx:rdf',
142+
property => $property, $object => $langsupport ? $localizedValue // $value : $value,
143+
($langsupport && ($lang || $metalang) ? ('xml:lang' => $lang // $metalang) : ()),
144+
($datatype ? (datatype => $datatype) : ()));
144145
# Must do directly; $document->setAttribute omits empty attributes
145146
$node->setAttribute(about => '');
146147
$document->closeElementAt($node); } } } });

lib/LaTeXML/Package/hyperxmp.sty.ltxml

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,63 @@ use LaTeXML::Package;
1717

1818
RequirePackage('ifthen');
1919

20-
# Basically, the effects of hyperxmp are already built
21-
# into the LaTeXML binding for hyperref.
20+
# macro to pass alternate language entries
21+
# TODO support entries in multiple languages
22+
# TODO pdfmetalang should affect *following* entries, not all of them
23+
Let('\XMPLangAlt', '\@gobbletwo');
24+
25+
# macros for including commas in comma-separated lists
26+
# TODO implement comma-separated lists splitting
27+
Let('\xmpquote', '\relax');
28+
DefMacroI('\xmpcomma', undef, ',');
29+
30+
# %pdfkey_property may have already been populated by hyperref
31+
our %pdfkey_property = (%pdfkey_property,
32+
# modify hyperref keywords to support specifying the language
33+
pdfsubject => ['dcterms:description', 'content', undef, 1],
34+
pdftitle => ['dcterms:title', 'content', undef, 1],
35+
pdfaconformance => '', # pdfaid:conformance
36+
pdfapart => '', # pdfaid:part
37+
pdfauthortitle => '', # photoshop:AuthorsPosition
38+
pdfbookedition => ['prism:bookEdition', 'content', undef, 1],
39+
pdfbytes => ['prism:byteCount', 'content'],
40+
pdfcaptionwriter => '', # photoshop:CaptionWriter
41+
pdfcontactaddress => '', # Iptc4xmpCore:CiAdrExtadr
42+
pdfcontactcity => '', # Iptc4xmpCore:CiAdrCity
43+
pdfcontactcountry => '', # Iptc4xmpCore:CiAdrCtry
44+
pdfcontactemail => '', # Iptc4xmpCore:CiEmailWork
45+
pdfcontactphone => '', # Iptc4xmpCore:CiTelWork
46+
pdfcontactpostcode => '', # Iptc4xmpCore:CiAdrPcode
47+
pdfcontactregion => '', # Iptc4xmpCore:CiAdrRegion
48+
pdfcontacturl => '', # Iptc4xmpCore:CiUrlWork
49+
pdfcopyright => ['dcterms:rights', 'content', undef, 1], # & xmpRights:Marked, xmpRights:WebStatement
50+
pdfdate => ['dcterms:date', 'content'], # (if missing, hyperxmp uses \date -- we delegate the default choice to postprocessing)
51+
pdfdocumentid => '', # xmpMM:DocumentID
52+
pdfdoi => ['prism:doi', 'content'],
53+
pdfeissn => ['prism:eIssn', 'content'],
54+
pdfidentifier => ['dcterms:identifier', 'content'], # (if missing, hyperxmp uses the first non-empty doi, eissn, issn, isbn with prefix info:doi/ or urn:ISS(B|N): -- we delegate the default choice to postprocessing)
55+
pdfinstanceid => '', # xmpMM:InstanceID
56+
pdfisbn => ['prism:isbn', 'content'],
57+
pdfissn => ['prism:issn', 'content'],
58+
pdfissuenum => ['prism:number', 'content'],
59+
pdflicenseurl => ['cc:licence', 'resource'], # xmpRights:WebStatement
60+
pdfmetadate => '', # xmp:MetadataDate
61+
pdfmetalang => '', # the default language of the metadata entries themselves
62+
pdfnumpages => ['prism:pageCount', 'content'], # xmpTPg:NPages
63+
pdfpagerange => ['prism:pageRange', 'content'],
64+
pdfpublication => ['prism:publicationName', 'content', undef, 1],
65+
pdfpublisher => ['dcterms:publisher', 'content'],
66+
pdfpubstatus => '', # jav:journal_article_version
67+
pdfpubtype => ['prism:aggregationType', 'content'],
68+
pdfrendition => '', # xmpMM:RenditionClass
69+
pdfsource => ['dcterms:source', 'content'],
70+
pdfsubtitle => ['prism:subtitle', 'content', undef, 1],
71+
pdftype => ['dcterms:type', 'content'],
72+
pdfuapart => '', # pdfuaid:part
73+
pdfurl => ['prism:url', 'content'],
74+
pdfversionid => '', # xmpMM:VersionID
75+
pdfvolumenum => ['prism:volume', 'content'],
76+
pdfxstandard => '', # {pdfx,pdfxid}:GTS_PDFXVersion
77+
);
2278

2379
1;

t/80_complex.t

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ latexml_tests("t/complex",
99
cleveref_minimal => 'cleveref.sty',
1010
figure_dual_caption => {packages => 'graphicx.sty', texlive_min => 2021},
1111
figure_mixed_content => {
12-
packages => ['algorithm.sty','algorithmic.sty','graphicx.sty','ifthen.sty','keyval.sty'],
12+
packages => ['algorithm.sty','algorithmic.sty','graphicx.sty','ifthen.sty','keyval.sty'],
1313
texlive_min => 2021},
14+
hyperxmp => 'babel.sty',
1415
si => {
1516
env=>'CI', # only runs in continuous integration
1617
packages => 'siunitx.sty', texlive_min => 2015 } });

t/complex/hypertest.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,6 @@
9090
<rdf about="" content="Not subject to copyright" property="dcterms:rights"/>
9191
<rdf about="" content="test,hyperref" property="dcterms:subject"/>
9292
<rdf about="" property="cc:licence" resource="http://creativecommons.org/licenses/by/3.0/"/>
93-
<rdf about="" content="Testing LaTeXMLs processing of hyperref metadata" property="dcterms:subject"/>
93+
<rdf about="" content="Testing LaTeXML's processing of hyperref metadata" property="dcterms:description"/>
9494
<rdf about="" content="Test Hyperref Metadata" property="dcterms:title"/>
9595
</document>

t/complex/hyperxmp.pdf

64.8 KB
Binary file not shown.

t/complex/hyperxmp.tex

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
% example copied from hyperxmp with minor modifications
2+
\pdfobjcompresslevel=0 % added to easily inspect the XMP packet
3+
\documentclass[british]{article} % added british to test pdflang and pdfmetalang
4+
\usepackage[utf8]{inputenc}
5+
\usepackage{babel}
6+
\usepackage[unicode]{hyperref}
7+
\usepackage{hyperxmp}
8+
%
9+
\title{%
10+
On a heuristic viewpoint concerning the production and
11+
transformation of light}
12+
\author{Albert Einstein}
13+
\date{March 17, 1905}
14+
%
15+
\hypersetup{%
16+
pdfmetalang={en-AU}, % added to test pdfmetalang
17+
pdftitle={%
18+
On a heuristic viewpoint concerning the production and
19+
transformation of light},
20+
pdfsubtitle={[en-US]Putting that bum Maxwell in his place},
21+
pdfauthor={Albert Einstein},
22+
pdfauthortitle={\xmpquote{Technical Assistant\xmpcomma\ Level III}},
23+
pdfdate={1905-03-17},
24+
pdfcopyright={Copyright (C) 1905, Albert Einstein},
25+
pdfsubject={photoelectric effect},
26+
pdfkeywords={energy quanta, Hertz effect, quantum physics},
27+
pdflicenseurl={http://creativecommons.org/licenses/by-nc-nd/3.0/},
28+
pdfcaptionwriter={Scott Pakin},
29+
pdfcontactaddress={Kramgasse 49},
30+
pdfcontactcity={Bern},
31+
pdfcontactpostcode={3011},
32+
pdfcontactcountry={Switzerland},
33+
pdfcontactphone={031 312 00 91},
34+
pdfcontactemail={aeinstein@ipi.ch},
35+
pdfcontacturl={%
36+
http://einstein.biz/,
37+
https://www.facebook.com/AlbertEinstein
38+
},
39+
pdfdocumentid={uuid:6d1ac9ec-4ff2-515a-954b-648eeb4853b0},
40+
pdfversionid={2.998e8},
41+
pdfpublication={[de]Annalen der Physik},
42+
pdfpublisher={Wiley-VCH},
43+
pdfpubtype={journal},
44+
pdfvolumenum={322},
45+
pdfissuenum={6},
46+
pdfpagerange={132-148},
47+
pdfissn={0003-3804},
48+
pdfeissn={1521-3889},
49+
pdfpubstatus={VoR},
50+
pdflang={en-GB},
51+
pdfurl={http://www.physik.uni-augsburg.de/annalen/history/einstein-papers/1905_17_132-148.pdf},
52+
pdfdoi={10.1002/andp.19053220607},
53+
pdfidentifier={info:lccn/50013519}
54+
}
55+
\XMPLangAlt{de}{pdftitle={Über einen die Erzeugung und Verwandlung des
56+
Lichtes betreffenden heuristischen Gesichtspunkt}}
57+
%
58+
\begin{document}
59+
\maketitle
60+
A profound formal difference exists between the theoretical
61+
concepts that physicists have formed about gases and other
62+
ponderable bodies, and Maxwell's theory of electromagnetic
63+
processes in so-called empty space\dots
64+
\end{document}

t/complex/hyperxmp.xml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<?latexml class="article" options="british"?>
3+
<?latexml package="inputenc" options="utf8"?>
4+
<?latexml package="babel"?>
5+
<?latexml package="hyperref" options="unicode"?>
6+
<?latexml package="hyperxmp"?>
7+
<?latexml RelaxNGSchema="LaTeXML"?>
8+
<document xmlns="http://dlmf.nist.gov/LaTeXML" class="ltx_authors_1line" prefix="cc: http://creativecommons.org/ns# dcterms: http://purl.org/dc/terms/ prism: http://prismstandard.org/namespaces/basic/3.0/" xml:lang="en-GB">
9+
<resource src="LaTeXML.css" type="text/css"/>
10+
<resource src="ltx-article.css" type="text/css"/>
11+
<title>On a heuristic viewpoint concerning the production and
12+
transformation of light</title>
13+
<creator role="author">
14+
<personname>Albert Einstein</personname>
15+
</creator>
16+
<date role="creation">March 17, 1905</date>
17+
<para xml:id="p1">
18+
<p>A profound formal difference exists between the theoretical
19+
concepts that physicists have formed about gases and other
20+
ponderable bodies, and Maxwell’s theory of electromagnetic
21+
processes in so-called empty space…</p>
22+
</para>
23+
<rdf about="" content="Albert Einstein" property="dcterms:creator"/>
24+
<rdf about="" content="Copyright (C) 1905, Albert Einstein" property="dcterms:rights" xml:lang="en-AU"/>
25+
<rdf about="" content="1905-03-17" property="dcterms:date"/>
26+
<rdf about="" content="10.1002/andp.19053220607" property="prism:doi"/>
27+
<rdf about="" content="1521-3889" property="prism:eIssn"/>
28+
<rdf about="" content="info:lccn/50013519" property="dcterms:identifier"/>
29+
<rdf about="" content="0003-3804" property="prism:issn"/>
30+
<rdf about="" content="6" property="prism:number"/>
31+
<rdf about="" content="energy quanta, Hertz effect, quantum physics" property="dcterms:subject"/>
32+
<rdf about="" content="en-GB" property="dcterms:language"/>
33+
<rdf about="" property="cc:licence" resource="http://creativecommons.org/licenses/by-nc-nd/3.0/"/>
34+
<rdf about="" content="132-148" property="prism:pageRange"/>
35+
<rdf about="" content="Annalen der Physik" property="prism:publicationName" xml:lang="de"/>
36+
<rdf about="" content="Wiley-VCH" property="dcterms:publisher"/>
37+
<rdf about="" content="journal" property="prism:aggregationType"/>
38+
<rdf about="" content="photoelectric effect" property="dcterms:description" xml:lang="en-AU"/>
39+
<rdf about="" content="Putting that bum Maxwell in his place" property="prism:subtitle" xml:lang="en-US"/>
40+
<rdf about="" content="On a heuristic viewpoint concerning the production and&#10;transformation of light" property="dcterms:title" xml:lang="en-AU"/>
41+
<rdf about="" content="http://www.physik.uni-augsburg.de/annalen/history/einstein-papers/1905_17_132-148.pdf" property="prism:url"/>
42+
<rdf about="" content="322" property="prism:volume"/>
43+
</document>

0 commit comments

Comments
 (0)