Skip to content

Commit c913e1a

Browse files
authored
Merge pull request #1 from kula-ai/patch_paragraph_initialize
Fix text run splitting
2 parents c5bcb57 + 32d0844 commit c913e1a

File tree

2 files changed

+72
-0
lines changed

2 files changed

+72
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ pkg/*
55
doc/
66
vendor/
77
coverage/
8+
.idea

lib/docx/containers/paragraph.rb

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,90 @@ class Paragraph
88
include Container
99
include Elements::Element
1010

11+
PLACEHOLDER_REGEX = /\{\{(.*?)\}\}/ # In order to combine text runs with {{}} pattern
12+
1113
def self.tag
1214
'p'
1315
end
1416

1517

1618
# Child elements: pPr, r, fldSimple, hlink, subDoc
1719
# http://msdn.microsoft.com/en-us/library/office/ee364458(v=office.11).aspx
20+
#
21+
# See: https://github.com/ruby-docx/docx/issues/147 for placeholder patching
1822
def initialize(node, document_properties = {}, doc = nil)
1923
@node = node
2024
@properties_tag = 'pPr'
2125
@document_properties = document_properties
2226
@font_size = @document_properties[:font_size]
2327
@document = doc
28+
validate_placeholder_content
29+
end
30+
31+
# This method detects and replaces the corrupted nodes if any exists.
32+
def validate_placeholder_content
33+
placeholder_position_hash = detect_placeholder_positions
34+
content_size = [0]
35+
text_runs.each_with_index do |text_node, index|
36+
content_size[index + 1] = text_node.text.length + (index.zero? ? 0 : content_size[index])
37+
end
38+
content_size.pop
39+
placeholder_position_hash.each do |placeholder, placeholder_positions|
40+
placeholder_positions.each do |p_start_index|
41+
p_end_index = (p_start_index + placeholder.length - 1)
42+
tn_start_index = content_size.index(content_size.select { |size| size <= p_start_index }.max)
43+
tn_end_index = content_size.index(content_size.select { |size| size <= p_end_index }.max)
44+
next if tn_start_index == tn_end_index
45+
replace_incorrect_placeholder_content(placeholder, tn_start_index, tn_end_index, content_size[tn_start_index] - p_start_index, p_end_index - content_size[tn_end_index])
46+
end
47+
end
48+
end
49+
50+
# This method detect the placeholder's starting index and return the starting index in array.
51+
# Ex: Assumptions : text = 'This is Placeholder Text with {{Placeholder}} {{Text}} {{Placeholder}}'
52+
# It will detect the placeholder's starting index from the given text.
53+
# Here, starting index of '{{Placeholder}}' => [30, 55], '{{Text}}' => [46]
54+
# @return [Hash]
55+
# Ex: {'{{Placeholder}}' => [30, 55], '{{Text}}' => [46]}
56+
def detect_placeholder_positions
57+
text.scan(PLACEHOLDER_REGEX).flatten.uniq.each_with_object({}) do |placeholder, placeholder_hash|
58+
next if placeholder.include?("{") || placeholder.include?("}")
59+
placeholder_text = "{{#{placeholder}}}"
60+
current_index = text.index(placeholder_text)
61+
arr_of_index = [current_index]
62+
until current_index.nil?
63+
current_index = text.index(placeholder_text, current_index + 1)
64+
arr_of_index << current_index unless current_index.nil?
65+
end
66+
placeholder_hash[placeholder_text] = arr_of_index
67+
end
68+
end
69+
70+
# @param [String] :placeholder
71+
# @param [Integer] :start_index, end_index, p_start_index, p_end_index
72+
# This Method replaces below :
73+
# 1. Corrupted text nodes content with empty string
74+
# 2. Proper Placeholder content within the same text node
75+
# Ex: Assume we have a array of text nodes content as text_runs = ['This is ', 'Placeh', 'older Text', 'with ', '{{', 'Place', 'holder}}' , '{{Text}}', '{{Placeholder}}']
76+
# Here if you see, the '{{placeholder}}' is not available in the same text node. We need to merge the content of indexes - text_runs[5], text_runs[6], text_runs[7].
77+
# So We will replace the content as below:
78+
# 1. text_runs[5] = '{{Placeholder}}'
79+
# 2. text_runs[6] = ''
80+
# 3. text_runs[7] = ''
81+
def replace_incorrect_placeholder_content(placeholder, start_index, end_index, p_start_index, p_end_index)
82+
(start_index..end_index).each do |index|
83+
if index == start_index
84+
current_text = text_runs[index].text.to_s
85+
current_text[p_start_index..-1] = placeholder
86+
text_runs[index].text = current_text
87+
elsif index == end_index
88+
current_text = text_runs[index].text.to_s
89+
current_text[0..p_end_index] = ""
90+
text_runs[index].text = current_text
91+
else
92+
text_runs[index].text = ""
93+
end
94+
end
2495
end
2596

2697
# Set text of paragraph

0 commit comments

Comments
 (0)