Skip to content

Commit 612f076

Browse files
authored
Merge pull request #1160 from metanorma/fix/linebreak-empty-xref
fix behaviour of carriage returns before empty eref, xref, link in te…
2 parents bc1b2d7 + e338e3e commit 612f076

File tree

2 files changed

+37
-17
lines changed

2 files changed

+37
-17
lines changed

lib/metanorma/cleanup/text.rb

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ module Standoc
33
module Text
44
def ancestor_include?(elem, ancestors)
55
path = elem.path.gsub(/\[\d+\]/, "").split(%r{/})[1..-2]
6-
!path.intersection(ancestors).empty?
6+
path.intersect?(ancestors)
77
end
88

99
# process example/p, example/sourcecode, not example on its own:
@@ -30,29 +30,41 @@ def linebreak_cleanup_block(block)
3030
end
3131

3232
def lines_strip_textspan(span, nextspan)
33-
lines = span[:text].lines[0..-2].map(&:rstrip) <<
34-
span[:text].lines[-1]&.sub(/\n$/, "")
33+
#AAA
34+
lines = []
35+
span[:text] and
36+
lines = span[:text].lines[0..-2].map(&:rstrip) <<
37+
span[:text].lines[-1]&.sub(/\n$/, "")
38+
#lines = span[:text].lines[0..-2].map(&:rstrip) <<
39+
#span[:text].lines[-1]&.sub(/\n$/, "")
3540
# no final line rstrip: can be space linking to next line
3641
span[:last] or lines << nextspan[:text].lines.first # next token context
3742
lines
3843
end
3944

40-
# TODO: we are not counting empty xref, eref here
4145
def gather_text_for_linebreak_cleanup(block)
42-
x = block.xpath(".//text()").map do |e|
43-
{ elem: e, text: e.text, stem: ancestor_include?(e, %w(stem)),
44-
skip: ancestor_include?(e, PRESERVE_LINEBREAK_ELEMENTS) }
45-
end
46+
x = gather_text_for_linebreak_cleanup1(block)
4647
x.empty? and return x
47-
x.each { |e| e[:skip] ||= !e[:text].include?("\n") }
4848
x.each_with_index do |e, i|
49+
e[:skip] ||= !e[:text].include?("\n")
4950
# do not treat stem linebreaks as meaningful
5051
e[:skip] ||= x[i + 1]&.dig(:stem)
52+
e[:skip] ||= !e[:elem].text?
5153
end
5254
x[-1][:last] = true
5355
x
5456
end
5557

58+
def gather_text_for_linebreak_cleanup1(block)
59+
block.xpath(".//text() | .//eref[not(text())] | " \
60+
".//xref[not(text())] | .//termref[not(text())] | " \
61+
".//link[not(text())] ").map do |e|
62+
#x = block.xpath(".//text()").map do |e|
63+
{ elem: e, text: e.text, stem: ancestor_include?(e, %w(stem)),
64+
skip: ancestor_include?(e, PRESERVE_LINEBREAK_ELEMENTS) }
65+
end
66+
end
67+
5668
def smartquotes_cleanup(xmldoc)
5769
xmldoc.xpath("//date").each { |d| Metanorma::Utils::endash_date(d) }
5870
if @smartquotes then smartquotes_cleanup1(xmldoc)
@@ -79,14 +91,16 @@ def uninterrupt_quotes_around_xml(xmldoc)
7991
end
8092
end
8193

82-
# "abc<tag/>", def => "abc",<tag/> def
83-
# TODO?
84-
def uninterrupt_quotes_around_xml1(xmldoc)
85-
xmldoc.xpath("//text()[preceding-sibling::*[1]]").each do |n|
86-
uninterrupt_quotes_around_xml_skip(n) and next
87-
uninterrupt_quotes_around_xml1(n.previous)
88-
end
89-
end
94+
#AAA
95+
# "abc<tag/>", def => "abc",<tag/> def
96+
# TODO?
97+
#def uninterrupt_quotes_around_xml1(xmldoc)
98+
#xmldoc.xpath("//text()[preceding-sibling::*[1]]").each do |n|
99+
#uninterrupt_quotes_around_xml_skip(n) and next
100+
#uninterrupt_quotes_around_xml1(n.previous)
101+
#end
102+
#end
103+
90104

91105
IGNORE_QUOTES_ELEMENTS =
92106
%w(pre tt sourcecode stem asciimath figure bibdata passthrough
@@ -111,6 +125,7 @@ def uninterrupt_quotes_around_xml_skip(elem)
111125
ignoretext?(elem.previous)))
112126
end
113127

128+
# "abc<tag/>", def => "abc",<tag/> def
114129
def uninterrupt_quotes_around_xml1(elem)
115130
prev = elem.at(".//preceding::text()[1]") or return
116131
/\S\Z/.match?(prev.text) or return

spec/cleanup/blocks_spec.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1438,6 +1438,9 @@
14381438
ABC
14391439
*JSA*)
14401440
1441+
ABC
1442+
link:example.com[]
1443+
14411444
INPUT
14421445
output = <<~OUTPUT
14431446
<sections><clause id="_" type="scope" inline-header="false" obligation="normative">
@@ -1461,6 +1464,8 @@
14611464
<p id="_">ABC (<strong>JSA</strong>)</p>
14621465
14631466
<p id="_">ABC <strong>JSA</strong>)</p>
1467+
1468+
<p id="_">ABC <link target="example.com"/></p>
14641469
</clause>
14651470
</sections>
14661471
OUTPUT

0 commit comments

Comments
 (0)