diff --git a/lib/docx/document.rb b/lib/docx/document.rb index 3102da4..89eb764 100755 --- a/lib/docx/document.rb +++ b/lib/docx/document.rb @@ -18,15 +18,24 @@ module Docx # puts d.text # end class Document - attr_reader :xml, :doc, :zip, :styles + + # A path with * indicates that there are possibly multiple documents + # matching that glob, eg. word/header1.xml, word/header2.xml + DOCUMENT_PATHS = { + doc: "word/document.xml", + styles: "word/styles.xml", + headers: "word/header*.xml", + footers: "word/footer*.xml", + numbering: "word/numbering.xml" + } + + attr_reader :xml, :doc, :zip, :styles, :headers, :footers, :numbering def initialize(path, &block) @replace = {} @zip = Zip::File.open(path) - @document_xml = @zip.read('word/document.xml') - @doc = Nokogiri::XML(@document_xml) - @styles_xml = @zip.read('word/styles.xml') - @styles = Nokogiri::XML(@styles_xml) + extract_documents + if block_given? yield self @zip.close @@ -57,6 +66,8 @@ def paragraphs def bookmarks bkmrks_hsh = Hash.new bkmrks_ary = @doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node } + bkmrks_ary += @headers.values.map { |xml_doc| xml_doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node } }.flatten + bkmrks_ary += @footers.values.map { |xml_doc| xml_doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node } }.flatten # auto-generated by office 2010 bkmrks_ary.reject! {|b| b.name == "_GoBack" } bkmrks_ary.each {|b| bkmrks_hsh[b.name] = b } @@ -123,13 +134,49 @@ def replace_entry(entry_path, file_contents) private + def extract_documents + DOCUMENT_PATHS.each do |attr_name, path| + if path.match /\*/ + extract_multiple_documents_from_globbed_path(attr_name, path) + else + extract_single_document_from_path(attr_name, path) + end + end + end + + def extract_single_document_from_path(attr_name, path) + if @zip.find_entry(path) + xml_doc = @zip.read(path) + self.instance_variable_set(:"@#{attr_name}", Nokogiri::XML(xml_doc)) + end + end + + def extract_multiple_documents_from_globbed_path(hash_attr_name, glob_path) + files = @zip.glob(glob_path).map { |h| h.name } + filename_and_contents_pairs = files.map do |file| + simple_file_name = file.sub(/^word\//, "").sub(/\.xml$/, "") + [simple_file_name, Nokogiri::XML(@zip.read(file))] + end + hash = Hash[filename_and_contents_pairs] + self.instance_variable_set(:"@#{hash_attr_name}", hash) + end + #-- # TODO: Flesh this out to be compatible with other files # TODO: Method to set flag on files that have been edited, probably by inserting something at the # end of methods that make edits? #++ def update - replace_entry "word/document.xml", doc.serialize(:save_with => 0) + DOCUMENT_PATHS.each do |attr_name, path| + if path.match /\*/ + self.instance_variable_get("@#{attr_name}").each do |simple_file_name, contents| + replace_entry("word/#{simple_file_name}.xml", contents.serialize(:save_with => 0)) + end + else + xml_document = self.instance_variable_get("@#{attr_name}") + replace_entry path, xml_document.serialize(:save_with => 0) if xml_document + end + end end # generate Elements::Containers::Paragraph from paragraph XML node diff --git a/spec/docx/document_spec.rb b/spec/docx/document_spec.rb index a1f76dc..f9f6e6f 100755 --- a/spec/docx/document_spec.rb +++ b/spec/docx/document_spec.rb @@ -279,6 +279,22 @@ end end + describe 'multiple documents' do + before do + @doc = Docx::Document.open(@fixtures_path + '/multi_doc.docx') + end + + it 'should extract all inner documents' do + expect(@doc.doc).to_not be_nil + expect(@doc.styles).to_not be_nil + expect(@doc.headers).to_not be_nil + expect(@doc.headers["header1"].text).to eq "Hello from the header." + expect(@doc.footers).to_not be_nil + expect(@doc.footers["footer1"].text).to eq "Hello from the footer." + expect(@doc.numbering).to_not be_nil + end + end + describe 'saving' do before do @doc = Docx::Document.open(@fixtures_path + '/saving.docx') diff --git a/spec/fixtures/multi_doc.docx b/spec/fixtures/multi_doc.docx new file mode 100644 index 0000000..008d06e Binary files /dev/null and b/spec/fixtures/multi_doc.docx differ