24
24
import org .springframework .ai .document .DocumentTransformer ;
25
25
26
26
/**
27
+ * ContentFormatTransformer processes a list of documents by applying a content formatter
28
+ * to each document.
29
+ *
27
30
* @author Christian Tzolov
31
+ * @since 1.0.0
28
32
*/
29
33
public class ContentFormatTransformer implements DocumentTransformer {
30
34
31
35
/**
32
36
* Disable the content-formatter template rewrite.
33
37
*/
34
- private boolean disableTemplateRewrite = false ;
38
+ private final boolean disableTemplateRewrite ;
35
39
36
- private ContentFormatter contentFormatter ;
40
+ private final ContentFormatter contentFormatter ;
37
41
42
+ /**
43
+ * Creates a ContentFormatTransformer object with the given ContentFormatter.
44
+ * @param contentFormatter the ContentFormatter to be used for transforming the
45
+ * documents
46
+ */
38
47
public ContentFormatTransformer (ContentFormatter contentFormatter ) {
39
48
this (contentFormatter , false );
40
49
}
41
50
51
+ /**
52
+ * The ContentFormatTransformer class is responsible for processing a list of
53
+ * documents by applying a content formatter to each document.
54
+ * @param contentFormatter The ContentFormatter to be used for transforming the
55
+ * documents
56
+ * @param disableTemplateRewrite Flag indicating whether to disable the
57
+ * content-formatter template rewrite
58
+ */
42
59
public ContentFormatTransformer (ContentFormatter contentFormatter , boolean disableTemplateRewrite ) {
43
60
this .contentFormatter = contentFormatter ;
44
61
this .disableTemplateRewrite = disableTemplateRewrite ;
@@ -47,45 +64,50 @@ public ContentFormatTransformer(ContentFormatter contentFormatter, boolean disab
47
64
/**
48
65
* Post process documents chunked from loader. Allows extractors to be chained.
49
66
* @param documents to post process.
50
- * @return
67
+ * @return processed documents
51
68
*/
52
69
public List <Document > apply (List <Document > documents ) {
53
-
54
- if (this .contentFormatter != null ) {
55
-
56
- documents .forEach (document -> {
57
- // Update formatter
58
- if (document .getContentFormatter () instanceof DefaultContentFormatter
59
- && this .contentFormatter instanceof DefaultContentFormatter ) {
60
-
61
- DefaultContentFormatter docFormatter = (DefaultContentFormatter ) document .getContentFormatter ();
62
- DefaultContentFormatter toUpdateFormatter = (DefaultContentFormatter ) this .contentFormatter ;
63
-
64
- var updatedEmbedExcludeKeys = new ArrayList <>(docFormatter .getExcludedEmbedMetadataKeys ());
65
- updatedEmbedExcludeKeys .addAll (toUpdateFormatter .getExcludedEmbedMetadataKeys ());
66
-
67
- var updatedInterfaceExcludeKeys = new ArrayList <>(docFormatter .getExcludedInferenceMetadataKeys ());
68
- updatedInterfaceExcludeKeys .addAll (toUpdateFormatter .getExcludedInferenceMetadataKeys ());
69
-
70
- var builder = DefaultContentFormatter .builder ()
71
- .withExcludedEmbedMetadataKeys (updatedEmbedExcludeKeys )
72
- .withExcludedInferenceMetadataKeys (updatedInterfaceExcludeKeys )
73
- .withMetadataTemplate (docFormatter .getMetadataTemplate ())
74
- .withMetadataSeparator (docFormatter .getMetadataSeparator ());
75
-
76
- if (!this .disableTemplateRewrite ) {
77
- builder .withTextTemplate (docFormatter .getTextTemplate ());
78
- }
79
- document .setContentFormatter (builder .build ());
80
- }
81
- else {
82
- // Override formatter
83
- document .setContentFormatter (this .contentFormatter );
84
- }
85
- });
70
+ if (contentFormatter != null ) {
71
+ documents .forEach (this ::processDocument );
86
72
}
87
73
88
74
return documents ;
89
75
}
90
76
77
+ private void processDocument (Document document ) {
78
+ if (document .getContentFormatter () instanceof DefaultContentFormatter docFormatter
79
+ && contentFormatter instanceof DefaultContentFormatter toUpdateFormatter ) {
80
+ updateFormatter (document , docFormatter , toUpdateFormatter );
81
+
82
+ }
83
+ else {
84
+ overrideFormatter (document );
85
+ }
86
+ }
87
+
88
+ private void updateFormatter (Document document , DefaultContentFormatter docFormatter ,
89
+ DefaultContentFormatter toUpdateFormatter ) {
90
+ List <String > updatedEmbedExcludeKeys = new ArrayList <>(docFormatter .getExcludedEmbedMetadataKeys ());
91
+ updatedEmbedExcludeKeys .addAll (toUpdateFormatter .getExcludedEmbedMetadataKeys ());
92
+
93
+ List <String > updatedInterfaceExcludeKeys = new ArrayList <>(docFormatter .getExcludedInferenceMetadataKeys ());
94
+ updatedInterfaceExcludeKeys .addAll (toUpdateFormatter .getExcludedInferenceMetadataKeys ());
95
+
96
+ DefaultContentFormatter .Builder builder = DefaultContentFormatter .builder ()
97
+ .withExcludedEmbedMetadataKeys (updatedEmbedExcludeKeys )
98
+ .withExcludedInferenceMetadataKeys (updatedInterfaceExcludeKeys )
99
+ .withMetadataTemplate (docFormatter .getMetadataTemplate ())
100
+ .withMetadataSeparator (docFormatter .getMetadataSeparator ());
101
+
102
+ if (!disableTemplateRewrite ) {
103
+ builder .withTextTemplate (docFormatter .getTextTemplate ());
104
+ }
105
+
106
+ document .setContentFormatter (builder .build ());
107
+ }
108
+
109
+ private void overrideFormatter (Document document ) {
110
+ document .setContentFormatter (contentFormatter );
111
+ }
112
+
91
113
}
0 commit comments