diff --git a/README.md b/README.md index e779291..21454b6 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ doc.tables.each do |table| puts cell.text end end - + table.columns.each do |column| # Column-based iteration column.cells.each do |cell| puts cell.text @@ -84,6 +84,10 @@ doc.bookmarks['example_bookmark'].insert_text_after("Hello world.") # Insert multiple lines of text at our bookmark doc.bookmarks['example_bookmark_2'].insert_multiple_lines_after(['Hello', 'World', 'foo']) +# The previous methods can also be passed a hash specifying formatting e.g. +doc.bookmarks['example_bookmark'].insert_text_after("Hello world.", + { bold: true, font: 'Times New Roman', font_size: 20, color: 'FF0000' }) + # Remove paragraphs doc.paragraphs.each do |p| p.remove! if p.to_s =~ /TODO/ @@ -117,6 +121,6 @@ p_child = p_element.at_xpath("//child::*") # selects first child * Calculate element formatting based on values present in element properties as well as properties inherited from parents * Default formatting of inserted elements to inherited values -* Implement formattable elements. +* Implement formattable tables. * Implement styles. * Easier multi-line text insertion at a single bookmark (inserting paragraph nodes after the one containing the bookmark) diff --git a/lib/docx/containers/paragraph.rb b/lib/docx/containers/paragraph.rb index 8652ead..9209012 100755 --- a/lib/docx/containers/paragraph.rb +++ b/lib/docx/containers/paragraph.rb @@ -1,5 +1,6 @@ require 'docx/containers/text_run' require 'docx/containers/container' +require 'docx/formatting/paragraph_formatting' module Docx module Elements @@ -7,11 +8,14 @@ module Containers class Paragraph include Container include Elements::Element + include ParagraphFormatting def self.tag 'p' end + attr_reader :properties_tag + alias_method :formatting, :parse_formatting # Child elements: pPr, r, fldSimple, hlink, subDoc # http://msdn.microsoft.com/en-us/library/office/ee364458(v=office.11).aspx @@ -36,6 +40,12 @@ def text=(content) end end + # Set text of paragraph with formatting + def set_text(content, formatting={}) + self.text = content + text_runs.each { |tr| tr.apply_formatting(formatting) } + end + # Return text of paragraph def to_s text_runs.map(&:text).join('') @@ -79,17 +89,15 @@ def font_size size_tag = @node.xpath('w:pPr//w:sz').first size_tag ? size_tag.attributes['val'].value.to_i / 2 : @font_size end - + alias_method :text, :to_s private # Returns the alignment if any, or nil if left def alignment - alignment_tag = @node.xpath('.//w:jc').first - alignment_tag ? alignment_tag.attributes['val'].value : nil + formatting[:alignment] end - end end end diff --git a/lib/docx/containers/text_run.rb b/lib/docx/containers/text_run.rb index a4e82de..6e283a8 100755 --- a/lib/docx/containers/text_run.rb +++ b/lib/docx/containers/text_run.rb @@ -1,4 +1,5 @@ require 'docx/containers/container' +require 'docx/formatting/text_run_formatting' module Docx module Elements @@ -6,27 +7,23 @@ module Containers class TextRun include Container include Elements::Element + include TextRunFormatting - DEFAULT_FORMATTING = { - italic: false, - bold: false, - underline: false - } - def self.tag 'r' end attr_reader :text - attr_reader :formatting - + attr_reader :document_properties + attr_reader :properties_tag + alias_method :formatting, :parse_formatting + def initialize(node, document_properties = {}) @node = node + @document_properties = document_properties @text_nodes = @node.xpath('w:t').map {|t_node| Elements::Text.new(t_node) } @properties_tag = 'rPr' @text = parse_text || '' - @formatting = parse_formatting || DEFAULT_FORMATTING - @document_properties = document_properties @font_size = @document_properties[:font_size] end @@ -40,19 +37,17 @@ def text=(content) end end + # Set the text of text run with formatting + def set_text(content, formatting={}) + self.text = content + apply_formatting(formatting) + end + # Returns text contained within text run def parse_text @text_nodes.map(&:content).join('') end - def parse_formatting - { - italic: !@node.xpath('.//w:i').empty?, - bold: !@node.xpath('.//w:b').empty?, - underline: !@node.xpath('.//w:u').empty? - } - end - def to_s @text end @@ -65,26 +60,23 @@ def to_html styles = {} styles['text-decoration'] = 'underline' if underlined? # No need to be granular with font size down to the span level if it doesn't vary. - styles['font-size'] = "#{font_size}pt" if font_size != @font_size + styles['font-size'] = "#{font_size}pt" if font_size != @font_size + styles['font-family'] = %Q["#{formatting[:font]}"] if formatting[:font] + styles['color'] = "##{formatting[:color]}" if formatting[:color] html = html_tag(:span, content: html, styles: styles) unless styles.empty? return html end def italicized? - @formatting[:italic] + formatting[:italic] end - + def bolded? - @formatting[:bold] - end - - def underlined? - @formatting[:underline] + formatting[:bold] end - def font_size - size_tag = @node.xpath('w:rPr//w:sz').first - size_tag ? size_tag.attributes['val'].value.to_i / 2 : @font_size + def underlined? + formatting[:underline] end end end diff --git a/lib/docx/document.rb b/lib/docx/document.rb index a5722d3..b1405c3 100755 --- a/lib/docx/document.rb +++ b/lib/docx/document.rb @@ -37,7 +37,8 @@ def initialize(path, &block) # This stores the current global document properties, for now def document_properties { - font_size: font_size + font_size: font_size, + font: font } end @@ -74,6 +75,10 @@ def font_size size_tag ? size_tag.attributes['val'].value.to_i / 2 : nil end + def font + font_tag = @styles.at_xpath('//w:docDefaults//w:rPrDefault//w:rPr//w:rFonts') + font_tag ? font_tag['w:ascii'] : nil + end ## # *Deprecated* # diff --git a/lib/docx/elements/bookmark.rb b/lib/docx/elements/bookmark.rb index 025b15f..18b1185 100644 --- a/lib/docx/elements/bookmark.rb +++ b/lib/docx/elements/bookmark.rb @@ -5,7 +5,7 @@ module Elements class Bookmark include Element attr_accessor :name - + def self.tag 'bookmarkStart' end @@ -15,20 +15,20 @@ def initialize(node) @name = @node['w:name'] end - # Insert text before bookmarkStart node - def insert_text_before(text) + # Insert text before bookmarkStart node with optional formatting + def insert_text_before(text, formatting={}) text_run = get_run_after - text_run.text = "#{text}#{text_run.text}" + text_run.set_text("#{text}#{text_run.text}", formatting) end - # Insert text after bookmarkStart node - def insert_text_after(text) + # Insert text after bookmarkStart node with optional formatting + def insert_text_after(text, formatting={}) text_run = get_run_before - text_run.text = "#{text_run.text}#{text}" + text_run.set_text("#{text_run.text}#{text}", formatting) end - # insert multiple lines starting with paragraph containing bookmark node. - def insert_multiple_lines(text_array) + # insert multiple lines starting with paragraph containing bookmark node. With optional formatting + def insert_multiple_lines(text_array, formatting={}) # Hold paragraphs to be inserted into, corresponding to the index of the strings in the text array paragraphs = [] paragraph = self.parent_paragraph @@ -45,13 +45,13 @@ def insert_multiple_lines(text_array) # Insert text into corresponding newly created paragraphs paragraphs.each_index do |index| - paragraphs[index].text = text_array[index] + paragraphs[index].set_text(text_array[index], formatting) end end # Get text run immediately prior to bookmark node def get_run_before - # at_xpath returns the first match found and preceding-sibling returns siblings in the + # at_xpath returns the first match found and preceding-sibling returns siblings in the # order they appear in the document not the order as they appear when moving out from # the starting node if not (r_nodes = @node.xpath("./preceding-sibling::w:r")).empty? @@ -76,4 +76,4 @@ def get_run_after end end end -end \ No newline at end of file +end diff --git a/lib/docx/elements/element.rb b/lib/docx/elements/element.rb index 471918d..e361fca 100755 --- a/lib/docx/elements/element.rb +++ b/lib/docx/elements/element.rb @@ -93,4 +93,4 @@ def create_within(element) end end end -end \ No newline at end of file +end diff --git a/lib/docx/formatting/formatting.rb b/lib/docx/formatting/formatting.rb new file mode 100644 index 0000000..4c32de0 --- /dev/null +++ b/lib/docx/formatting/formatting.rb @@ -0,0 +1,14 @@ +module Docx + module Formatting + def add_property(tag) + property_node.remove if properties_node.at_xpath(".//w:#{tag}") # Remove and replace property + properties_node.add_child("").first + end + + def properties_node + properties = node.at_xpath(".//w:#{properties_tag}") + # Should a paragraph formatting node not exist create one + properties ||= node.prepend_child("").first + end + end +end diff --git a/lib/docx/formatting/paragraph_formatting.rb b/lib/docx/formatting/paragraph_formatting.rb new file mode 100644 index 0000000..1d6d75d --- /dev/null +++ b/lib/docx/formatting/paragraph_formatting.rb @@ -0,0 +1,27 @@ +require 'docx/formatting/formatting' + +module Docx + module ParagraphFormatting + include Formatting + + def apply_formatting(formatting) + if (formatting[:alignment]) + alignment_node = add_property('jc') + alignment_node['w:val'] = formatting[:alignment] + end + end + + def parse_formatting + formatting = {} + alignment_node = node.at_xpath('.//w:jc') + formatting[:alignment] = alignment_node ? alignment_node['w:val'] : nil + formatting + end + + def self.default_formatting + { + alignment: nil + } + end + end +end diff --git a/lib/docx/formatting/text_run_formatting.rb b/lib/docx/formatting/text_run_formatting.rb new file mode 100644 index 0000000..98784b3 --- /dev/null +++ b/lib/docx/formatting/text_run_formatting.rb @@ -0,0 +1,53 @@ +require 'docx/formatting/formatting' + +module Docx + module TextRunFormatting + include Formatting + + def apply_formatting(formatting) + if (formatting[:font]) + font_node = add_property('rFonts') + font_node["w:ascii"] = formatting[:font] + font_node["w:hAnsi"] = formatting[:font] + end + if (formatting[:font_size]) + font_size_node = add_property('sz') + font_size_node['w:val'] = formatting[:font_size] * 2 # Font size is stored in half-points + end + add_property('i') if formatting[:italic] + add_property('b') if formatting[:bold] + add_property('u') if formatting[:underline] + if (formatting[:color]) + color_node = add_property('color') + color_node["w:val"] = formatting[:color] + end + end + + def parse_formatting() + formatting = {} + formatting[:italic] = !node.xpath('.//w:i').empty? + formatting[:bold] = !node.xpath('.//w:b').empty? + formatting[:underline] = !node.xpath('.//w:u').empty? + font_node = node.at_xpath('.//w:rFonts') + formatting[:font] = font_node ? font_node['w:ascii'] : document_properties[:font] + formatting[:font_size] = font_size + color_node = node.at_xpath('.//w:color') + formatting[:color] = color_node ? color_node['w:val'] : nil + formatting + end + + def self.default_formatting(document_properties) + { + italic: false, bold: false, underline: false, + font: document_properties[:font], + font_size: document_properties[:font_size], + color: nil + } + end + + def font_size + size_tag = @node.at_xpath('.//w:sz') + size_tag ? size_tag.attributes['val'].value.to_i / 2 : @document_properties[:font_size] + end + end +end diff --git a/lib/docx/version.rb b/lib/docx/version.rb index 663e8fe..aac1ff8 100644 --- a/lib/docx/version.rb +++ b/lib/docx/version.rb @@ -1,3 +1,3 @@ module Docx #:nodoc: - VERSION = '0.2.07' + VERSION = '0.3.0' end diff --git a/spec/docx/document_spec.rb b/spec/docx/document_spec.rb index d038d66..b41b7db 100755 --- a/spec/docx/document_spec.rb +++ b/spec/docx/document_spec.rb @@ -5,7 +5,7 @@ describe Docx::Document do before(:all) do @fixtures_path = "spec/fixtures" - @formatting_line_count = 12 # number of lines the formatting.docx file has + @formatting_line_count = 14 # number of lines the formatting.docx file has end describe 'reading' do @@ -35,7 +35,7 @@ @doc.each_paragraph do |p| p.each_text_run do |tr| expect(tr).to be_an_instance_of(Docx::Elements::Containers::TextRun) - expect(tr.formatting).to eq(Docx::Elements::Containers::TextRun::DEFAULT_FORMATTING) + expect(tr.formatting).to eq(Docx::TextRunFormatting.default_formatting(@doc.document_properties)) end end end @@ -160,11 +160,58 @@ end end + describe 'editing with formatting' do + before do + @doc = Docx::Document.open(@fixtures_path + '/editing.docx') + @formatting = { italic: false, underline: false, bold: true, font: 'Times New Roman', font_size: 20, color: 'FF0000' } + @default_formatting = Docx::TextRunFormatting::default_formatting(@doc.document_properties) + end + + it 'allows insertion of text before a bookmark with formatting' do + expect(@doc.paragraphs.first.text).to eq('test text') + @doc.bookmarks['beginning_bookmark'].insert_text_before('foo', @formatting) + expect(@doc.paragraphs.first.text).to eq('footest text') + text_runs = @doc.paragraphs.first.text_runs + expect(text_runs[0].text).to eq('footest') + expect(text_runs[0].formatting).to eq @formatting + expect(text_runs[1].text).to eq(' text') + expect(text_runs[1].formatting).to eq @default_formatting + end + + it 'allows insertion of text after a bookmark with formatting' do + expect(@doc.paragraphs.first.text).to eq('test text') + @doc.bookmarks['end_bookmark'].insert_text_after('bar', @formatting) + expect(@doc.paragraphs.first.text).to eq('test textbar') + text_runs = @doc.paragraphs.first.text_runs + expect(text_runs[0].text).to eq('test') + expect(text_runs[0].formatting).to eq @default_formatting + expect(text_runs[1].text).to eq(' textbar') + expect(text_runs[1].formatting).to eq @formatting + end + + it 'should allow multiple lines of text to be inserted at a bookmark with formatting' do + expect(@doc.paragraphs.last.text).to eq('') + new_lines = ['replacement test', 'second paragraph test', 'and a third paragraph test'] + @doc.bookmarks['isolated_bookmark'].insert_multiple_lines(new_lines, @formatting) + new_lines.each_index do |line| + expect(@doc.paragraphs[line + 2].text).to eq(new_lines[line]) + text_run = @doc.paragraphs[line + 2].text_runs.first + expect(text_run.formatting).to eq(@formatting) + end + end + + it 'should allow paragraphs to be aligned' do + expect(@doc.paragraphs[0].formatting).to eq(alignment: nil) + @doc.paragraphs[0].apply_formatting(alignment: 'center') + expect(@doc.paragraphs[0].formatting).to eq(alignment: 'center') + end + end + describe 'read formatting' do before do @doc = Docx::Document.open(@fixtures_path + '/formatting.docx') @formatting = @doc.paragraphs.map { |p| p.text_runs.map(&:formatting) } - @default_formatting = Docx::Elements::Containers::TextRun::DEFAULT_FORMATTING + @default_formatting = Docx::TextRunFormatting::default_formatting(@doc.document_properties) @only_italic = @default_formatting.merge italic: true @only_bold = @default_formatting.merge bold: true @only_underline = @default_formatting.merge underline: true @@ -184,57 +231,67 @@ expect(@doc.paragraphs[8].text).to eq('This paragraph is aligned right.') expect(@doc.paragraphs[9].text).to eq('This paragraph is 14 points.') expect(@doc.paragraphs[10].text).to eq('This paragraph has a word at 16 points.') + expect(@doc.paragraphs[11].text).to eq('This sentence has different formatting in different places.') + expect(@doc.paragraphs[12].text).to eq('This sentence uses the Times New Roman font.') + expect(@doc.paragraphs[13].text).to eq('This sentence is red. ') end it 'should contain a paragraph with multiple text runs' do - + expect(@doc.paragraphs.any? { |p| p.text_runs.length <= 2 }).to eq(true) end it 'should detect normal formatting' do [0, 4].each do |i| expect(@formatting[i][0]).to eq(@default_formatting) - expect(@doc.paragraphs[i].text_runs[0].italicized?).to eq(false) - expect(@doc.paragraphs[i].text_runs[0].bolded?).to eq(false) - expect(@doc.paragraphs[i].text_runs[0].underlined?).to eq(false) + text_run_formatting = @doc.paragraphs[i].text_runs[0].formatting + expect(text_run_formatting[:italic]).to eq(false) + expect(text_run_formatting[:bold]).to eq(false) + expect(text_run_formatting[:underline]).to eq(false) end end it 'should detect italic formatting' do expect(@formatting[1][0]).to eq(@only_italic) - expect(@doc.paragraphs[1].text_runs[0].italicized?).to eq(true) - expect(@doc.paragraphs[1].text_runs[0].bolded?).to eq(false) - expect(@doc.paragraphs[1].text_runs[0].underlined?).to eq(false) + text_run_formatting = @doc.paragraphs[1].text_runs[0].formatting + expect(text_run_formatting[:italic]).to eq(true) + expect(text_run_formatting[:bold]).to eq(false) + expect(text_run_formatting[:underline]).to eq(false) end it 'should detect bold formatting' do expect(@formatting[2][0]).to eq(@only_bold) - expect(@doc.paragraphs[2].text_runs[0].italicized?).to eq(false) - expect(@doc.paragraphs[2].text_runs[0].bolded?).to eq(true) - expect(@doc.paragraphs[2].text_runs[0].underlined?).to eq(false) + text_run_formatting = @doc.paragraphs[2].text_runs[0].formatting + expect(text_run_formatting[:italic]).to eq(false) + expect(text_run_formatting[:bold]).to eq(true) + expect(text_run_formatting[:underline]).to eq(false) end it 'should detect underline formatting' do expect(@formatting[3][0]).to eq(@only_underline) - expect(@doc.paragraphs[3].text_runs[0].italicized?).to eq(false) - expect(@doc.paragraphs[3].text_runs[0].bolded?).to eq(false) - expect(@doc.paragraphs[3].text_runs[0].underlined?).to eq(true) + text_run_formatting = @doc.paragraphs[3].text_runs[0].formatting + expect(text_run_formatting[:italic]).to eq(false) + expect(text_run_formatting[:bold]).to eq(false) + expect(text_run_formatting[:underline]).to eq(true) end it 'should detect mixed formatting' do expect(@formatting[5][0]).to eq(@default_formatting) - expect(@doc.paragraphs[5].text_runs[0].italicized?).to eq(false) - expect(@doc.paragraphs[5].text_runs[0].bolded?).to eq(false) - expect(@doc.paragraphs[5].text_runs[0].underlined?).to eq(false) - + text_run_formatting = @doc.paragraphs[5].text_runs[0].formatting + expect(text_run_formatting[:italic]).to eq(false) + expect(text_run_formatting[:bold]).to eq(false) + expect(text_run_formatting[:underline]).to eq(false) + expect(@formatting[5][1]).to eq(@all_formatted) - expect(@doc.paragraphs[5].text_runs[1].italicized?).to eq(true) - expect(@doc.paragraphs[5].text_runs[1].bolded?).to eq(true) - expect(@doc.paragraphs[5].text_runs[1].underlined?).to eq(true) - + text_run_formatting = @doc.paragraphs[5].text_runs[1].formatting + expect(text_run_formatting[:italic]).to eq(true) + expect(text_run_formatting[:bold]).to eq(true) + expect(text_run_formatting[:underline]).to eq(true) + expect(@formatting[5][2]).to eq(@default_formatting) - expect(@doc.paragraphs[5].text_runs[2].italicized?).to eq(false) - expect(@doc.paragraphs[5].text_runs[2].bolded?).to eq(false) - expect(@doc.paragraphs[5].text_runs[2].underlined?).to eq(false) + text_run_formatting = @doc.paragraphs[5].text_runs[2].formatting + expect(text_run_formatting[:italic]).to eq(false) + expect(text_run_formatting[:bold]).to eq(false) + expect(text_run_formatting[:underline]).to eq(false) end it 'should detect centered paragraphs' do @@ -255,27 +312,25 @@ expect(@doc.paragraphs[9].aligned_right?).to eq(false) end - # ECMA-376 Office Open XML spec (4th edition), 17.3.2.38, size is - # defined in half-points, meaning 14pt text returns a value of 28. - # http://www.ecma-international.org/publications/standards/Ecma-376.htm - it 'should return proper font size for paragraphs' do - expect(@doc.font_size).to eq 11 - expect(@doc.paragraphs[5].font_size).to eq 11 - paragraph = @doc.paragraphs[9] - expect(paragraph.font_size).to eq 14 - expect(paragraph.text_runs[0].font_size).to eq 14 + it 'should return proper font size for runs' do + expect(@doc.document_properties[:font_size]).to eq 11 + text_runs = @doc.paragraphs[10].text_runs + expect(text_runs[0].formatting[:font_size]).to eq 11 + expect(text_runs[1].formatting[:font_size]).to eq 16 + expect(text_runs[2].formatting[:font_size]).to eq 11 + expect(text_runs[3].formatting[:font_size]).to eq 11 + expect(text_runs[4].formatting[:font_size]).to eq 11 end - it 'should return proper font size for runs' do - expect(@doc.font_size).to eq 11 - paragraph = @doc.paragraphs[10] - expect(paragraph.font_size).to eq 11 - text_runs = paragraph.text_runs - expect(text_runs[0].font_size).to eq 11 - expect(text_runs[1].font_size).to eq 16 - expect(text_runs[2].font_size).to eq 11 - expect(text_runs[3].font_size).to eq 11 - expect(text_runs[4].font_size).to eq 11 + it 'should detect font for a textrun' do + expect(@doc.document_properties[:font]).to eq(nil) + textrun = @doc.paragraphs[12].text_runs.first + expect(textrun.formatting[:font]).to eq('Times New Roman') + end + + it 'should detect the color of text in a textrun' do + textrun = @doc.paragraphs[13].text_runs.first + expect(textrun.formatting[:color]).to eq('FF0000') end end @@ -327,7 +382,7 @@ expect(scan.last).to eq('

') expect(scan[1]).to eq('Normal') end - + it 'should emphasize italicized text' do scan = @doc.paragraphs[1].to_html.scan(@em_regex).flatten expect(scan.first).to eq(']+style\=\"([^\"]+).+(<\/p>)/ + regex = /(\]+style\=\"([^\"]+).+(<\/p>)/ scan = @doc.paragraphs[9].to_html.scan(regex).flatten expect(scan.first).to eq '' @@ -370,6 +425,22 @@ expect(scan[1].split(';').include?('font-size:16pt')).to eq(true) end + it 'should set font on styled text runs' do + regex = /(\]+style\=\"([^\;]+)[^\<]+(<\/span>)/ + scan = @doc.paragraphs[12].to_html.scan(regex).flatten + expect(scan.first).to eq '' + expect(scan[1].split(';').include?('font-family:"Times New Roman"')).to eq(true) + end + + it 'should set font color on styled text runs' do + regex = /(\]+style\=\"([^\"]+)[^\<]+(<\/span>)/ + scan = @doc.paragraphs[13].to_html.scan(regex).flatten + expect(scan.first).to eq '' + expect(scan[1].split(';').include?('color:#FF0000')).to eq(true) + end + it 'should properly highlight different text in different places in a sentence' do paragraph = @doc.paragraphs[11] scan = paragraph.to_html.scan(@em_regex).flatten @@ -423,4 +494,3 @@ end end end - diff --git a/spec/fixtures/formatting.docx b/spec/fixtures/formatting.docx index 72b5b88..68e1ab0 100644 Binary files a/spec/fixtures/formatting.docx and b/spec/fixtures/formatting.docx differ