@@ -5,8 +5,7 @@ class DocStripper {
55 // Default options - all enabled
66 this . options = {
77 removeEmptyLines : options . removeEmptyLines !== false ,
8- removePageNumbers : options . removePageNumbers !== false ,
9- removeHeadersFooters : options . removeHeadersFooters !== false ,
8+ removeHeadersFooters : options . removeHeadersFooters !== false , // Includes page numbers removal
109 removeDuplicates : options . removeDuplicates !== false ,
1110 removePunctuationLines : options . removePunctuationLines !== false ,
1211 preserveParagraphSpacing : options . preserveParagraphSpacing !== false ,
@@ -492,8 +491,8 @@ class DocStripper {
492491 shouldRemove = true ;
493492 }
494493
495- // Skip page numbers (if enabled) - BEFORE merge
496- if ( ! shouldRemove && this . options . removePageNumbers && this . isPageNumber ( stripped ) ) {
494+ // Skip page numbers (if headers/footers removal is enabled) - BEFORE merge
495+ if ( ! shouldRemove && this . options . removeHeadersFooters && this . isPageNumber ( stripped ) ) {
497496 headerFooterRemovedBeforeMerge ++ ;
498497 shouldRemove = true ;
499498 }
@@ -776,8 +775,7 @@ class SmartCleaner {
776775 // Settings for customizing cleaning behavior
777776 this . settings = {
778777 removeEmptyLines : true ,
779- removePageNumbers : true ,
780- removeHeadersFooters : true ,
778+ removeHeadersFooters : true , // Includes page numbers removal
781779 removeDuplicates : true ,
782780 removePunctuationLines : true ,
783781 preserveParagraphSpacing : true ,
@@ -786,6 +784,7 @@ class SmartCleaner {
786784 normalizeWhitespace : false ,
787785 keepTableSpacing : true ,
788786 cleaningModeType : 'conservative' , // 'conservative' or 'aggressive'
787+ cleaningTemperament : 'gentle' , // 'gentle', 'moderate', 'thorough', 'aggressive'
789788 } ;
790789 }
791790
@@ -949,11 +948,8 @@ class SmartCleaner {
949948 const removeInstructions = [ ] ;
950949 const preserveInstructions = [ ] ;
951950
952- if ( this . settings . removePageNumbers ) {
953- removeInstructions . push ( '- Page numbers: standalone digits (1, 2, 3), Roman numerals (I, II, III), single letters (A, B, C)' ) ;
954- }
955-
956951 if ( this . settings . removeHeadersFooters ) {
952+ removeInstructions . push ( '- Page numbers: standalone digits (1, 2, 3), Roman numerals (I, II, III), single letters (A, B, C)' ) ;
957953 removeInstructions . push ( '- Headers/footers: "Page X of Y", "Confidential", "DRAFT", "Internal Use Only", "PROPRIETARY", etc.' ) ;
958954 }
959955
@@ -982,25 +978,42 @@ class SmartCleaner {
982978 // Build rules section
983979 const rules = [ ] ;
984980
985- // Add mode-specific guidance
986- const modeGuidance = this . settings . cleaningModeType === 'aggressive'
987- ? 'AGGRESSIVE MODE: Be thorough in cleaning. You can merge broken lines and normalize formatting when appropriate.'
988- : 'CONSERVATIVE MODE: Be cautious and preserve structure. When in doubt, keep content. Do not merge lines or normalize formatting aggressively.' ;
981+ // Add temperament-specific guidance
982+ const temperament = this . settings . cleaningTemperament || 'gentle' ;
983+ let modeGuidance = '' ;
984+
985+ if ( temperament === 'gentle' ) {
986+ modeGuidance = 'GENTLE MODE: Be very cautious and preserve structure. When in doubt, keep content. Do not merge lines or normalize formatting aggressively.' ;
987+ } else if ( temperament === 'moderate' ) {
988+ modeGuidance = 'MODERATE MODE: Balanced cleaning. You can merge broken lines when appropriate, but preserve formatting structure.' ;
989+ } else if ( temperament === 'thorough' ) {
990+ modeGuidance = 'THOROUGH MODE: Comprehensive cleaning with normalization. Merge broken lines, normalize whitespace and Unicode, but preserve paragraph spacing for readability.' ;
991+ } else if ( temperament === 'aggressive' ) {
992+ modeGuidance = 'AGGRESSIVE MODE: Maximum cleaning. Be thorough in removing noise, merge broken lines, normalize formatting, and remove all unnecessary spacing for compact output.' ;
993+ } else {
994+ // Fallback to old mode-based guidance
995+ modeGuidance = this . settings . cleaningModeType === 'aggressive'
996+ ? 'AGGRESSIVE MODE: Be thorough in cleaning. You can merge broken lines and normalize formatting when appropriate.'
997+ : 'CONSERVATIVE MODE: Be cautious and preserve structure. When in doubt, keep content. Do not merge lines or normalize formatting aggressively.' ;
998+ }
989999
9901000 rules . push ( `0. MODE: ${ modeGuidance } ` ) ;
9911001
9921002 if ( this . settings . removeDuplicates ) {
993- rules . push ( '1. For consecutive duplicate lines: keep the FIRST occurrence, drop subsequent ones' ) ;
1003+ const duplicateRule = temperament === 'aggressive'
1004+ ? '1. For duplicate lines (consecutive OR similar): keep the FIRST occurrence, drop subsequent ones. Be more aggressive in detecting near-duplicates.'
1005+ : '1. For consecutive duplicate lines: keep the FIRST occurrence, drop subsequent ones' ;
1006+ rules . push ( duplicateRule ) ;
9941007 }
9951008 if ( this . settings . removeEmptyLines ) {
996- if ( this . settings . preserveParagraphSpacing ) {
1009+ if ( this . settings . preserveParagraphSpacing && temperament !== 'aggressive' ) {
9971010 rules . push ( '2. Empty lines: preserve ONE empty line between non-empty paragraphs, drop multiple empty lines' ) ;
9981011 } else {
999- rules . push ( '2. Empty lines: remove ALL empty lines' ) ;
1012+ rules . push ( '2. Empty lines: remove ALL empty lines (no paragraph spacing) ' ) ;
10001013 }
10011014 }
1002- if ( this . settings . removePageNumbers ) {
1003- rules . push ( '3. Page numbers: drop ONLY if the entire line is just a number/letter (e.g., "1", "III", "A")' ) ;
1015+ if ( this . settings . removeHeadersFooters ) {
1016+ rules . push ( '3. Page numbers and headers/footers : drop ONLY if the entire line matches known patterns (e.g., "1", "III", "A", "Page X of Y", "Confidential ")' ) ;
10041017 }
10051018 rules . push ( '4. Keep ALL meaningful content - when in doubt, use "keep"' ) ;
10061019
@@ -1503,8 +1516,7 @@ class App {
15031516 // Initialize with default settings (all enabled)
15041517 this . stripper = new DocStripper ( {
15051518 removeEmptyLines : true ,
1506- removePageNumbers : true ,
1507- removeHeadersFooters : true ,
1519+ removeHeadersFooters : true , // Includes page numbers removal
15081520 removeDuplicates : true ,
15091521 removePunctuationLines : true ,
15101522 preserveParagraphSpacing : true ,
@@ -1630,8 +1642,7 @@ class App {
16301642
16311643 // Settings checkboxes
16321644 this . removeEmptyLines = document . getElementById ( 'removeEmptyLines' ) ;
1633- this . removePageNumbers = document . getElementById ( 'removePageNumbers' ) ;
1634- this . removeHeadersFooters = document . getElementById ( 'removeHeadersFooters' ) ;
1645+ this . removeHeadersFooters = document . getElementById ( 'removeHeadersFooters' ) ; // Includes page numbers removal
16351646 // removeRepeatingHeadersFooters is now automatically enabled when removeHeadersFooters is enabled
16361647 this . removeDuplicates = document . getElementById ( 'removeDuplicates' ) ;
16371648 this . removePunctuationLines = document . getElementById ( 'removePunctuationLines' ) ;
@@ -1719,12 +1730,15 @@ class App {
17191730 if ( settings . removeEmptyLines !== undefined && this . removeEmptyLines ) {
17201731 this . removeEmptyLines . checked = settings . removeEmptyLines ;
17211732 }
1722- if ( settings . removePageNumbers !== undefined && this . removePageNumbers ) {
1723- this . removePageNumbers . checked = settings . removePageNumbers ;
1724- }
17251733 if ( settings . removeHeadersFooters !== undefined && this . removeHeadersFooters ) {
17261734 this . removeHeadersFooters . checked = settings . removeHeadersFooters ;
17271735 }
1736+ // Backward compatibility: if old removePageNumbers setting exists, merge it
1737+ if ( settings . removePageNumbers !== undefined && settings . removePageNumbers === false ) {
1738+ if ( this . removeHeadersFooters ) {
1739+ this . removeHeadersFooters . checked = false ;
1740+ }
1741+ }
17281742 // removeRepeatingHeadersFooters is now automatically enabled when removeHeadersFooters is enabled
17291743 if ( settings . removeDuplicates !== undefined && this . removeDuplicates ) {
17301744 this . removeDuplicates . checked = settings . removeDuplicates ;
@@ -1779,8 +1793,7 @@ class App {
17791793 cleaningModeType : this . cleaningModeType ,
17801794 cleaningMode : this . cleaningMode ,
17811795 removeEmptyLines : this . removeEmptyLines ?. checked ?? true ,
1782- removePageNumbers : this . removePageNumbers ?. checked ?? true ,
1783- removeHeadersFooters : this . removeHeadersFooters ?. checked ?? true ,
1796+ removeHeadersFooters : this . removeHeadersFooters ?. checked ?? true , // Includes page numbers removal
17841797 removeRepeatingHeadersFooters : this . removeHeadersFooters ?. checked ?? true , // Automatically enabled when removeHeadersFooters is enabled
17851798 removeDuplicates : this . removeDuplicates ?. checked ?? true ,
17861799 removePunctuationLines : this . removePunctuationLines ?. checked ?? true ,
@@ -1817,11 +1830,11 @@ class App {
18171830
18181831 // Update label and description
18191832 const labels = [ 'Gentle' , 'Moderate' , 'Thorough' , 'Aggressive' ] ;
1820- const descriptions = [
1833+ const descriptions = [
18211834 'Safe defaults, preserves formatting. Best for most documents.' ,
1822- 'Balanced cleaning with line merging enabled.' ,
1823- 'Thorough cleaning with Unicode normalization. Preserves paragraph spacing.' ,
1824- 'Maximum cleaning with all optimizations. Removes paragraph spacing.'
1835+ 'Balanced cleaning with line merging enabled. Preserves paragraph spacing. ' ,
1836+ 'Comprehensive cleaning with normalization. Preserves paragraph spacing for readability .' ,
1837+ 'Maximum cleaning with all optimizations. Removes paragraph spacing for compact output .'
18251838 ] ;
18261839
18271840 let labelIndex = 0 ;
@@ -1845,11 +1858,9 @@ class App {
18451858 // Apply defaults based on temperament mode
18461859 // Base options (always ON)
18471860 if ( this . removeEmptyLines ) this . removeEmptyLines . checked = true ;
1848- if ( this . removePageNumbers ) this . removePageNumbers . checked = true ;
1849- if ( this . removeHeadersFooters ) this . removeHeadersFooters . checked = true ;
1861+ if ( this . removeHeadersFooters ) this . removeHeadersFooters . checked = true ; // Includes page numbers
18501862 if ( this . removeDuplicates ) this . removeDuplicates . checked = true ;
18511863 if ( this . removePunctuationLines ) this . removePunctuationLines . checked = true ;
1852- if ( this . preserveParagraphSpacing ) this . preserveParagraphSpacing . checked = true ;
18531864 if ( this . dehyphenate ) this . dehyphenate . checked = true ;
18541865 if ( this . keepTableSpacing ) this . keepTableSpacing . checked = true ;
18551866
@@ -1859,11 +1870,15 @@ class App {
18591870 // Only basic cleaning, no merging or normalization
18601871 if ( this . mergeBrokenLines ) this . mergeBrokenLines . checked = false ;
18611872 if ( this . normalizeWhitespace ) this . normalizeWhitespace . checked = false ;
1873+ if ( this . normalizeUnicode ) this . normalizeUnicode . checked = false ;
1874+ if ( this . preserveParagraphSpacing ) this . preserveParagraphSpacing . checked = true ;
18621875 break ;
18631876 case 'moderate' :
18641877 // Enable line merging
18651878 if ( this . mergeBrokenLines ) this . mergeBrokenLines . checked = true ;
18661879 if ( this . normalizeWhitespace ) this . normalizeWhitespace . checked = false ;
1880+ if ( this . normalizeUnicode ) this . normalizeUnicode . checked = false ;
1881+ if ( this . preserveParagraphSpacing ) this . preserveParagraphSpacing . checked = true ;
18671882 break ;
18681883 case 'thorough' :
18691884 // Enable merging, whitespace normalization, and Unicode normalization
@@ -1872,6 +1887,7 @@ class App {
18721887 if ( this . normalizeWhitespace ) this . normalizeWhitespace . checked = true ;
18731888 if ( this . normalizeUnicode ) this . normalizeUnicode . checked = true ;
18741889 if ( this . preserveParagraphSpacing ) this . preserveParagraphSpacing . checked = true ;
1890+ // More lenient empty line handling - keep paragraph spacing
18751891 break ;
18761892 case 'aggressive' :
18771893 // Maximum cleaning: all optimizations enabled
@@ -1880,6 +1896,7 @@ class App {
18801896 if ( this . normalizeWhitespace ) this . normalizeWhitespace . checked = true ;
18811897 if ( this . normalizeUnicode ) this . normalizeUnicode . checked = true ;
18821898 if ( this . preserveParagraphSpacing ) this . preserveParagraphSpacing . checked = false ;
1899+ // More aggressive empty line removal - no paragraph spacing
18831900 break ;
18841901 }
18851902 }
@@ -1888,8 +1905,7 @@ class App {
18881905 if ( this . cleaningModeType === 'aggressive' ) {
18891906 // Aggressive mode defaults
18901907 if ( this . removeEmptyLines ) this . removeEmptyLines . checked = true ;
1891- if ( this . removePageNumbers ) this . removePageNumbers . checked = true ;
1892- if ( this . removeHeadersFooters ) this . removeHeadersFooters . checked = true ;
1908+ if ( this . removeHeadersFooters ) this . removeHeadersFooters . checked = true ; // Includes page numbers
18931909 // removeRepeatingHeadersFooters is now automatically enabled when removeHeadersFooters is enabled
18941910 if ( this . removeDuplicates ) this . removeDuplicates . checked = true ;
18951911 if ( this . removePunctuationLines ) this . removePunctuationLines . checked = true ;
@@ -1901,8 +1917,7 @@ class App {
19011917 } else {
19021918 // Conservative mode defaults (current behavior)
19031919 if ( this . removeEmptyLines ) this . removeEmptyLines . checked = true ;
1904- if ( this . removePageNumbers ) this . removePageNumbers . checked = true ;
1905- if ( this . removeHeadersFooters ) this . removeHeadersFooters . checked = true ;
1920+ if ( this . removeHeadersFooters ) this . removeHeadersFooters . checked = true ; // Includes page numbers
19061921 // removeRepeatingHeadersFooters is now automatically enabled when removeHeadersFooters is enabled
19071922 if ( this . removeDuplicates ) this . removeDuplicates . checked = true ;
19081923 if ( this . removePunctuationLines ) this . removePunctuationLines . checked = true ;
@@ -2023,12 +2038,6 @@ class App {
20232038 this . updateStartButton ( ) ;
20242039 } ) ;
20252040 }
2026- if ( this . removePageNumbers ) {
2027- this . removePageNumbers . addEventListener ( 'change' , ( ) => {
2028- this . saveSettings ( ) ;
2029- this . updateStartButton ( ) ;
2030- } ) ;
2031- }
20322041 if ( this . removeHeadersFooters ) {
20332042 this . removeHeadersFooters . addEventListener ( 'change' , ( ) => {
20342043 this . saveSettings ( ) ;
@@ -2280,8 +2289,7 @@ class App {
22802289 // Get current settings from checkboxes (used for both Fast and Smart modes)
22812290 const settings = {
22822291 removeEmptyLines : this . removeEmptyLines ? this . removeEmptyLines . checked : true ,
2283- removePageNumbers : this . removePageNumbers ? this . removePageNumbers . checked : true ,
2284- removeHeadersFooters : this . removeHeadersFooters ? this . removeHeadersFooters . checked : true ,
2292+ removeHeadersFooters : this . removeHeadersFooters ? this . removeHeadersFooters . checked : true , // Includes page numbers removal
22852293 removeRepeatingHeadersFooters : this . removeHeadersFooters ? this . removeHeadersFooters . checked : true , // Automatically enabled when removeHeadersFooters is enabled
22862294 removeDuplicates : this . removeDuplicates ? this . removeDuplicates . checked : true ,
22872295 removePunctuationLines : this . removePunctuationLines ? this . removePunctuationLines . checked : true ,
@@ -2292,6 +2300,7 @@ class App {
22922300 normalizeUnicode : this . normalizeUnicode ? this . normalizeUnicode . checked : false ,
22932301 keepTableSpacing : this . keepTableSpacing ? this . keepTableSpacing . checked : true ,
22942302 cleaningModeType : this . cleaningModeType , // Pass mode type to SmartCleaner
2303+ cleaningTemperament : this . cleaningTemperamentMode || 'gentle' , // Pass temperament to SmartCleaner
22952304 } ;
22962305
22972306 // Create new stripper instance with current settings (for Fast mode or fallback)
@@ -2737,7 +2746,7 @@ class App {
27372746 snackbar . className = 'support-snackbar' ;
27382747 snackbar . innerHTML = `
27392748 <div class="snackbar-content">
2740- <span class="snackbar-text">Saved you some time? ☕ <a href="https://kiku0.gumroad.com/coffee" target="_blank" rel="noopener">Support on Gumroad</a></span>
2749+ <span class="snackbar-text">Saved you some time? 🛒 <a href="https://kiku0.gumroad.com/coffee" target="_blank" rel="noopener">Support on Gumroad</a></span>
27412750 <button class="snackbar-close" aria-label="Close">×</button>
27422751 </div>
27432752 ` ;
0 commit comments