Skip to content

Commit 1a7e681

Browse files
committed
Refactor to reuse more I/O code
1 parent c0bcd30 commit 1a7e681

File tree

50 files changed

+522
-311
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+522
-311
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ DocSharp is a pure C# library to convert between document formats without Office
55
The following packages are currently available:
66

77
- DocSharp.Binary: convert Office 97-2003 binary documents (doc, xls, ppt) to OpenXML documents (docx, xlsx, pptx). This is a fork of the abandoned [b2xtranslator project](https://github.com/EvolutionJobs/b2xtranslator) which provides critical fixes.
8-
- DocSharp.Docx: convert DOCX to RTF, HTML, Markdown and plain text (.txt). Possible applications include generating Open XML documents in C# and exporting for other editors, or loading Microsoft Word documents in a RichTextBox / RichEditBox control.
8+
- DocSharp.Docx: convert DOCX to RTF, HTML, Markdown and plain text (.txt). Possible applications include generating Open XML documents in C# and exporting for other editors/services, or loading Microsoft Word documents in a RichTextBox / RichEditBox control.
99
- DocSharp.Markdown: convert Markdown to DOCX or RTF using custom Markdig renderers.
1010

1111
Packages can be installed via NuGet:
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
using System.IO;
2+
using System.Text;
3+
4+
namespace DocSharp;
5+
6+
public abstract class DocumentConverterBase<TOutput> where TOutput : class
7+
{
8+
public abstract void Convert(Stream inputStream, Stream outputStream);
9+
10+
public virtual void Convert(Stream inputStream, string outputFilePath)
11+
{
12+
using (var outputStream = File.OpenWrite(outputFilePath))
13+
Convert(inputStream, outputStream);
14+
}
15+
16+
public virtual void Convert(string inputFilePath, Stream outputStream)
17+
{
18+
using (var inputStream = File.OpenRead(inputFilePath))
19+
Convert(inputStream, outputStream);
20+
}
21+
22+
public virtual void Convert(string inputFilePath, string outputFilePath)
23+
{
24+
using (var inputStream = File.OpenRead(inputFilePath))
25+
using (var outputStream = File.OpenWrite(outputFilePath))
26+
Convert(inputStream, outputStream);
27+
}
28+
}
29+
30+
public abstract class BinaryDocumentConverterBase<TOutput> : DocumentConverterBase<TOutput> where TOutput : class
31+
{
32+
public void Convert(byte[] inputBytes, Stream outputStream)
33+
{
34+
using (var memoryStream = new MemoryStream(inputBytes))
35+
Convert(memoryStream, outputStream);
36+
}
37+
38+
public void Convert(byte[] inputBytes, string outputFilePath)
39+
{
40+
using (var memoryStream = new MemoryStream(inputBytes))
41+
Convert(memoryStream, outputFilePath);
42+
}
43+
}
44+
45+
public abstract class TextDocumentConverterBase<TOutput> : DocumentConverterBase<TOutput> where TOutput : class
46+
{
47+
public abstract void Convert(TextReader reader, Stream outputStream);
48+
49+
public void Convert(TextReader reader, string outputFilePath)
50+
{
51+
using (var outputStream = File.OpenWrite(outputFilePath))
52+
Convert(reader, outputStream);
53+
}
54+
55+
public override void Convert(Stream inputStream, Stream outputStream)
56+
{
57+
using (var reader = new StreamReader(inputStream, Encoding.UTF8, detectEncodingFromByteOrderMarks: true, bufferSize: 1024, leaveOpen: true))
58+
Convert(reader, outputStream);
59+
}
60+
61+
public void ConvertString(string inputContent, Stream outputStream, Encoding? encoding = null)
62+
{
63+
encoding ??= Encoding.UTF8;
64+
using (var memoryStream = new MemoryStream())
65+
{
66+
using (var writer = new StreamWriter(memoryStream, encoding, 1024, leaveOpen: true))
67+
{
68+
writer.Write(inputContent);
69+
writer.Flush();
70+
memoryStream.Position = 0;
71+
Convert(memoryStream, outputStream);
72+
}
73+
}
74+
}
75+
76+
public void ConvertString(string inputContent, string outputFilePath, Encoding? encoding = null)
77+
{
78+
using (var outputStream = File.OpenWrite(outputFilePath))
79+
ConvertString(inputContent, outputStream, encoding);
80+
}
81+
}
82+

src/DocSharp.Docx/DocSharp.Docx.csproj

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@
1717
<PackageTags>docx html rtf markdown txt convert converter openxml office word</PackageTags>
1818
</PropertyGroup>
1919

20+
<ItemGroup>
21+
<AssemblyAttribute Include="System.Runtime.CompilerServices.InternalsVisibleToAttribute">
22+
<_Parameter1>DocSharp.Renderer</_Parameter1>
23+
</AssemblyAttribute>
24+
</ItemGroup>
25+
2026
<ItemGroup>
2127
<None Remove="Resources\OMML2MML.XSL" />
2228
</ItemGroup>

src/DocSharp.Docx/DocxConverterBase.cs

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@
1313

1414
namespace DocSharp.Docx;
1515

16-
public abstract class DocxConverterBase<TOutput>
16+
/// <summary>
17+
/// Base class for DOCX converters.
18+
/// </summary>
19+
/// <typeparam name="TOutput"></typeparam>
20+
public abstract class DocxConverterBase<TOutput> : BinaryDocumentConverterBase<TOutput> where TOutput : class
1721
{
1822
/// <summary>
1923
/// Get or set the base file path for processing external sub-documents (if any).
@@ -28,10 +32,44 @@ static DocxConverterBase()
2832
}
2933
#endif
3034

31-
internal List<(List<OpenXmlElement> content, SectionProperties properties)> Sections;
35+
internal List<(List<OpenXmlElement> content, SectionProperties properties)> Sections = new();
3236
internal bool TitlePage = false;
3337
internal bool FacingPages = false;
3438

39+
/// <summary>
40+
/// Convert a DOCX file to the output format.
41+
/// </summary>
42+
/// <param name="inputDocument">The WordprocessingDocument to use.</param>
43+
/// <param name="outputStream">The output stream.</param>
44+
public abstract void Convert(WordprocessingDocument inputDocument, Stream outputStream);
45+
// This is the main function that must be implemented by specific converters.
46+
47+
/// <summary>
48+
/// Convert a DOCX file to the output format.
49+
/// </summary>
50+
/// <param name="inputStream">The input DOCX stream to use.</param>
51+
/// <param name="outputStream">The output stream.</param>
52+
public override void Convert(Stream inputStream, Stream outputStream)
53+
{
54+
using (var wordDocument = WordprocessingDocument.Open(inputStream, false))
55+
{
56+
Convert(wordDocument, outputStream);
57+
}
58+
}
59+
60+
/// <summary>
61+
/// Convert a DOCX file to the output format.
62+
/// </summary>
63+
/// <param name="inputDocument">The WordprocessingDocument to use.</param>
64+
/// <param name="outputFilePath">The output file path.</param>
65+
public virtual void Convert(WordprocessingDocument inputDocument, string outputFilePath)
66+
{
67+
using (var fs = new FileStream(outputFilePath, FileMode.Create, FileAccess.Write))
68+
{
69+
Convert(inputDocument, fs);
70+
}
71+
}
72+
3573
internal virtual void ProcessDocument(Document document, TOutput sb)
3674
{
3775
if (document.DocumentBackground is DocumentBackground bg)

src/DocSharp.Docx/DocxExtensions.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99

1010
namespace DocSharp.Docx;
1111

12+
/// <summary>
13+
/// Provides public extension methods for WordprocessingDocument.
14+
/// </summary>
1215
public static class DocxExtensions
1316
{
1417
/// <summary>

src/DocSharp.Docx/DocxToHtml/DocxToHtmlConverter.Borders.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
namespace DocSharp.Docx;
1212

13-
public partial class DocxToHtmlConverter : DocxToTextWriterBase<HtmlTextWriter>
13+
public partial class DocxToHtmlConverter : DocxToXmlWriterBase<HtmlTextWriter>
1414
{
1515
internal void ProcessBorder(BorderType? border, string? cssAttribute, ref List<string> styles)
1616
{

src/DocSharp.Docx/DocxToHtml/DocxToHtmlConverter.DrawingML.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
namespace DocSharp.Docx;
2020

21-
public partial class DocxToHtmlConverter : DocxToTextWriterBase<HtmlTextWriter>
21+
public partial class DocxToHtmlConverter : DocxToXmlWriterBase<HtmlTextWriter>
2222
{
2323
internal override void ProcessDrawing(Drawing drawing, HtmlTextWriter sb)
2424
{

src/DocSharp.Docx/DocxToHtml/DocxToHtmlConverter.FootnoteEndnote.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
namespace DocSharp.Docx;
1414

15-
public partial class DocxToHtmlConverter : DocxToTextWriterBase<HtmlTextWriter>
15+
public partial class DocxToHtmlConverter : DocxToXmlWriterBase<HtmlTextWriter>
1616
{
1717
// Note: FootnoteReference and EndnoteReference are found inside runs in the document body,
1818
// while FootnoteReferenceMark and EndnoteReferenceMark are in runs in the footnote/endnote part itself.

src/DocSharp.Docx/DocxToHtml/DocxToHtmlConverter.Image.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
namespace DocSharp.Docx;
2424

25-
public partial class DocxToHtmlConverter : DocxToTextWriterBase<HtmlTextWriter>
25+
public partial class DocxToHtmlConverter : DocxToXmlWriterBase<HtmlTextWriter>
2626
{
2727
internal void ProcessImagePart(OpenXmlPart? rootPart, string relId, double width, double height, HtmlTextWriter sb)
2828
{

src/DocSharp.Docx/DocxToHtml/DocxToHtmlConverter.List.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
namespace DocSharp.Docx;
1111

12-
public partial class DocxToHtmlConverter : DocxToTextWriterBase<HtmlTextWriter>
12+
public partial class DocxToHtmlConverter : DocxToXmlWriterBase<HtmlTextWriter>
1313
{
1414
private readonly Dictionary<int, (int numId, int abstractNumId, int counter)> _listLevelCounters = new();
1515

0 commit comments

Comments
 (0)