Skip to content

Commit 054ae80

Browse files
authored
Fix text run placeholder detection (#2)
1 parent c913e1a commit 054ae80

File tree

1 file changed

+47
-57
lines changed

1 file changed

+47
-57
lines changed

lib/docx/containers/paragraph.rb

Lines changed: 47 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ class Paragraph
88
include Container
99
include Elements::Element
1010

11-
PLACEHOLDER_REGEX = /\{\{(.*?)\}\}/ # In order to combine text runs with {{}} pattern
11+
PLACEHOLDER_REGEX = /\{\{([^{}]*?)\}\}/
1212

1313
def self.tag
1414
'p'
@@ -28,70 +28,60 @@ def initialize(node, document_properties = {}, doc = nil)
2828
validate_placeholder_content
2929
end
3030

31-
# This method detects and replaces the corrupted nodes if any exists.
3231
def validate_placeholder_content
33-
placeholder_position_hash = detect_placeholder_positions
34-
content_size = [0]
35-
text_runs.each_with_index do |text_node, index|
36-
content_size[index + 1] = text_node.text.length + (index.zero? ? 0 : content_size[index])
37-
end
38-
content_size.pop
39-
placeholder_position_hash.each do |placeholder, placeholder_positions|
40-
placeholder_positions.each do |p_start_index|
41-
p_end_index = (p_start_index + placeholder.length - 1)
42-
tn_start_index = content_size.index(content_size.select { |size| size <= p_start_index }.max)
43-
tn_end_index = content_size.index(content_size.select { |size| size <= p_end_index }.max)
44-
next if tn_start_index == tn_end_index
45-
replace_incorrect_placeholder_content(placeholder, tn_start_index, tn_end_index, content_size[tn_start_index] - p_start_index, p_end_index - content_size[tn_end_index])
46-
end
32+
# First, build a map of all text run contents and their positions
33+
content_map = build_content_map
34+
full_text = text_runs.map(&:text).join('')
35+
36+
# Use global regex to find all placeholders with their positions
37+
placeholders = full_text.to_enum(:scan, PLACEHOLDER_REGEX).map do
38+
[Regexp.last_match.begin(0), Regexp.last_match.end(0)]
4739
end
48-
end
4940

50-
# This method detect the placeholder's starting index and return the starting index in array.
51-
# Ex: Assumptions : text = 'This is Placeholder Text with {{Placeholder}} {{Text}} {{Placeholder}}'
52-
# It will detect the placeholder's starting index from the given text.
53-
# Here, starting index of '{{Placeholder}}' => [30, 55], '{{Text}}' => [46]
54-
# @return [Hash]
55-
# Ex: {'{{Placeholder}}' => [30, 55], '{{Text}}' => [46]}
56-
def detect_placeholder_positions
57-
text.scan(PLACEHOLDER_REGEX).flatten.uniq.each_with_object({}) do |placeholder, placeholder_hash|
58-
next if placeholder.include?("{") || placeholder.include?("}")
59-
placeholder_text = "{{#{placeholder}}}"
60-
current_index = text.index(placeholder_text)
61-
arr_of_index = [current_index]
62-
until current_index.nil?
63-
current_index = text.index(placeholder_text, current_index + 1)
64-
arr_of_index << current_index unless current_index.nil?
41+
placeholders.each do |start_pos, end_pos|
42+
# Find the indexes of the text runs that includes the start and end of the placeholder
43+
start_text_run_index = content_map.index { |m| m[:start] <= start_pos && m[:end] >= start_pos }
44+
end_text_run_index = content_map.index { |m| m[:start] <= end_pos - 1 && m[:end] >= end_pos - 1 }
45+
46+
next if start_text_run_index.nil? || end_text_run_index.nil?
47+
next if start_text_run_index == end_text_run_index # Skip if entire placeholder is already in single run
48+
49+
placeholder_content = full_text[start_pos...end_pos]
50+
51+
(start_text_run_index..end_text_run_index).each do |i|
52+
if i == start_text_run_index
53+
# Merge the entire placeholder into the first run
54+
current_text = content_map[i][:text].dup
55+
current_text[start_pos - content_map[i][:start]..-1] = placeholder_content
56+
content_map[i][:run].text = current_text
57+
elsif i == end_text_run_index
58+
# Last run should preserve any content after the placeholder
59+
current_text = content_map[i][:text].dup
60+
remaining_text = current_text[(end_pos) - content_map[i][:start]..-1]
61+
content_map[i][:run].text = remaining_text
62+
else
63+
# Clear intermediate runs
64+
content_map[i][:run].text = ''
65+
end
6566
end
66-
placeholder_hash[placeholder_text] = arr_of_index
6767
end
6868
end
6969

70-
# @param [String] :placeholder
71-
# @param [Integer] :start_index, end_index, p_start_index, p_end_index
72-
# This Method replaces below :
73-
# 1. Corrupted text nodes content with empty string
74-
# 2. Proper Placeholder content within the same text node
75-
# Ex: Assume we have a array of text nodes content as text_runs = ['This is ', 'Placeh', 'older Text', 'with ', '{{', 'Place', 'holder}}' , '{{Text}}', '{{Placeholder}}']
76-
# Here if you see, the '{{placeholder}}' is not available in the same text node. We need to merge the content of indexes - text_runs[5], text_runs[6], text_runs[7].
77-
# So We will replace the content as below:
78-
# 1. text_runs[5] = '{{Placeholder}}'
79-
# 2. text_runs[6] = ''
80-
# 3. text_runs[7] = ''
81-
def replace_incorrect_placeholder_content(placeholder, start_index, end_index, p_start_index, p_end_index)
82-
(start_index..end_index).each do |index|
83-
if index == start_index
84-
current_text = text_runs[index].text.to_s
85-
current_text[p_start_index..-1] = placeholder
86-
text_runs[index].text = current_text
87-
elsif index == end_index
88-
current_text = text_runs[index].text.to_s
89-
current_text[0..p_end_index] = ""
90-
text_runs[index].text = current_text
91-
else
92-
text_runs[index].text = ""
93-
end
70+
def build_content_map
71+
content_map = []
72+
current_position = 0
73+
74+
text_runs.each do |text_run|
75+
run_text = text_run.text.to_s
76+
content_map << {
77+
start: current_position,
78+
end: current_position + run_text.length - 1,
79+
text: run_text,
80+
run: text_run
81+
}
82+
current_position += run_text.length
9483
end
84+
content_map
9585
end
9686

9787
# Set text of paragraph

0 commit comments

Comments
 (0)