sajari · catenacyber · Nov 25, 2025
diff --git a/docx.go b/docx.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"strings"
 	"time"
 )
 
@@ -61,6 +62,9 @@ func ConvertDocx(r io.Reader) (string, map[string]string, error) {
 
 	meta := make(map[string]string)
 	var textHeader, textBody, textFooter string
+	var textBodySb64 strings.Builder
+	var textFooterSb64 strings.Builder
+	var textHeaderSb64 strings.Builder
 	for _, override := range contentTypeDefinition.Overrides {
 		f := zipFiles[override.PartName]
 
@@ -92,22 +96,25 @@ func ConvertDocx(r io.Reader) (string, map[string]string, error) {
 			if err != nil {
 				return "", nil, err
 			}
-			textBody += body + "\n"
+			textBodySb64.WriteString(body + "\n")
 		case override.ContentType == "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml":
 			footer, err := parseDocxText(f)
 			if err != nil {
 				return "", nil, err
 			}
-			textFooter += footer + "\n"
+			textFooterSb64.WriteString(footer + "\n")
 		case override.ContentType == "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml":
 			header, err := parseDocxText(f)
 			if err != nil {
 				return "", nil, err
 			}
-			textHeader += header + "\n"
+			textHeaderSb64.WriteString(header + "\n")
 		}
 
 	}
+	textBody += textBodySb64.String()
+	textFooter += textFooterSb64.String()
+	textHeader += textHeaderSb64.String()
 	return textHeader + "\n" + textBody + "\n" + textFooter, meta, nil
 }
 

diff --git a/html.go b/html.go
@@ -72,10 +72,12 @@ func cleanHTML(r io.Reader, all bool) string {
 	junkSection := false
 
 	d := html.NewTokenizer(r)
+	var outputSb strings.Builder
 	for {
 		// token type
 		tokenType := d.Next()
 		if tokenType == html.ErrorToken {
+			output += outputSb.String()
 			return output
 		}
 		token := d.Token()
@@ -90,28 +92,29 @@ func cleanHTML(r io.Reader, all bool) string {
 			}
 
 			if !junkSection && mainSection {
-				output += "<" + token.Data + ">"
+				outputSb.WriteString("<" + token.Data + ">")
 			}
 
 		case html.TextToken: // text between start and end tag
 			if !junkSection && mainSection {
-				output += token.Data
+				outputSb.WriteString(token.Data)
 			}
 
 		case html.EndTagToken: // </tag>
 			if !junkSection && mainSection {
-				output += "</" + token.Data + ">"
+				outputSb.WriteString("</" + token.Data + ">")
 			}
 			if !acceptedHTMLTag(token.Data) {
 				junkSection = false
 			}
 
 		case html.SelfClosingTagToken: // <tag/>
 			if !junkSection && mainSection {
-				output += "<" + token.Data + " />" // TODO: Can probably keep attributes from the meta tags
+				outputSb.WriteString("<" + token.Data + " />") // TODO: Can probably keep attributes from the meta tags
 			}
 		}
 	}
+	output += outputSb.String()
 }
 
 // HTMLReadabilityOptions is a type which defines parameters that are passed to the justext package.
@@ -150,14 +153,15 @@ func HTMLReadability(r io.Reader) ([]byte, error) {
 
 	useClasses := strings.SplitN(HTMLReadabilityOptionsValues.ReadabilityUseClasses, ",", 10)
 
-	output := ""
+	var outputSb strings.Builder
 	for _, paragraph := range paragraphSet {
 		for _, class := range useClasses {
 			if paragraph.CfClass == class {
-				output += paragraph.Text + "\n"
+				outputSb.WriteString(paragraph.Text + "\n")
 			}
 		}
 	}
+	output := outputSb.String()
 
 	return []byte(output), nil
 }

diff --git a/pptx.go b/pptx.go
@@ -48,7 +48,7 @@ func ConvertPptx(r io.Reader) (string, map[string]string, error) {
 	}
 
 	meta := make(map[string]string)
-	var textBody string
+	var textBodySb strings.Builder
 	for _, override := range contentTypeDefinition.Overrides {
 		f := zipFiles[override.PartName]
 
@@ -59,8 +59,9 @@ func ConvertPptx(r io.Reader) (string, map[string]string, error) {
 			if err != nil {
 				return "", nil, fmt.Errorf("could not parse pptx: %v", err)
 			}
-			textBody += body + "\n"
+			textBodySb.WriteString(body + "\n")
 		}
 	}
+	textBody := textBodySb.String()
 	return strings.TrimSuffix(textBody, "\n"), meta, nil
 }
diff --git a/rtf.go b/rtf.go
@@ -24,14 +24,16 @@ func ConvertRTF(r io.Reader) (string, map[string]string, error) {
 
 	// Step through content looking for meta data and stripping out comments
 	meta := make(map[string]string)
+	var outputSb strings.Builder
 	for _, line := range strings.Split(string(tmpOutput), "\n") {
 		if parts := strings.SplitN(line, ":", 2); len(parts) > 1 {
 			meta[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1])
 		}
 		if !strings.HasPrefix(line, "### ") {
-			output += line + "\n"
+			outputSb.WriteString(line + "\n")
 		}
 	}
+	output += outputSb.String()
 
 	// Identify meta data
 	if tmp, ok := meta["AUTHOR"]; ok {

diff --git a/xml.go b/xml.go
@@ -5,6 +5,7 @@ import (
 	"encoding/xml"
 	"fmt"
 	"io"
+	"strings"
 )
 
 // ConvertXML converts an XML file to text.
@@ -40,11 +41,13 @@ func XMLToText(r io.Reader, breaks []string, skip []string, strict bool) (string
 		case xml.CharData:
 			result += string(v)
 		case xml.StartElement:
+			var resultSb strings.Builder
 			for _, breakElement := range breaks {
 				if v.Name.Local == breakElement {
-					result += "\n"
+					resultSb.WriteString("\n")
 				}
 			}
+			result += resultSb.String()
 			for _, skipElement := range skip {
 				if v.Name.Local == skipElement {
 					depth := 1