File tree Expand file tree Collapse file tree 1 file changed +16
-4
lines changed Expand file tree Collapse file tree 1 file changed +16
-4
lines changed Original file line number Diff line number Diff line change @@ -141,11 +141,23 @@ export async function extractTextFromPDF(
141141 const prevEndX = prevItem . transform [ 4 ] + ( prevItem . width ?? 0 ) ;
142142 const currentStartX = item . transform [ 4 ] ;
143143 const space = currentStartX - prevEndX ;
144-
145- if ( space > ( ( item . width ?? 0 ) * 0.3 ) ) {
146- lineText += ' ' + item . str ;
144+
145+ // Get average character width as fallback
146+ const avgCharWidth = ( item . width ?? 0 ) / Math . max ( 1 , item . str . length ) ;
147+
148+ // Multiple conditions for space detection
149+ const needsSpace =
150+ // Primary check: significant gap between items
151+ space > Math . max ( avgCharWidth * 0.3 , 2 ) ||
152+ // Secondary check: natural word boundary
153+ ( ! / ^ \W / . test ( item . str ) && ! / \W $ / . test ( prevItem . str ) ) ||
154+ // Tertiary check: items are far enough apart relative to their size
155+ ( space > ( ( prevItem . width ?? 0 ) * 0.25 ) ) ;
156+
157+ if ( needsSpace ) {
158+ lineText += ' ' + item . str ;
147159 } else {
148- lineText += item . str ;
160+ lineText += item . str ;
149161 }
150162 }
151163 prevItem = item ;
You can’t perform that action at this time.
0 commit comments