@@ -3,27 +3,26 @@ class Duckdb
33 class CleanHtmlFilter < Filter
44 def call
55 # First extract the main content
6- @doc = at_css ( 'main' )
6+ @doc = at_css ( '#main_content_wrap' , ' main')
77 return doc if @doc . nil?
88
9+ doc . prepend_child at_css ( '.title' ) . remove
10+ at_css ( '.title' ) . name = 'h1'
11+
912 # Remove navigation and header elements
10- css ( '.headerline' , '.landingmenu' , '.search_icon' , '#sidebar' , '.pagemeta' , '.toc_menu' , '.section-nav' ) . remove
13+ css ( '.headerline' , '.headlinebar' , '. landingmenu', '.search_icon' , '#sidebar' , '.pagemeta' , '.toc_menu' , '.section-nav' ) . remove
1114
1215 # Clean up code blocks
13- css ( 'pre' ) . each do |node |
14- # Detect language from class or parent div
15- if node [ 'class' ] &.include? ( 'sql' ) || node . at_css ( 'code.sql' )
16- node [ 'data-language' ] = 'sql'
17- elsif node [ 'class' ] &.include? ( 'language-sql' )
18- node [ 'data-language' ] = 'sql'
19- end
16+ css ( 'div.highlighter-rouge' ) . each do |node |
17+ node [ 'data-language' ] = node [ 'class' ] [ /language-(\w +)/ , 1 ] if node [ 'class' ]
2018 node . content = node . content . strip
19+ node . name = 'pre'
2120 end
2221
23- # Remove unnecessary attributes but keep essential ones
22+ # Remove unnecessary attributes
2423 css ( 'div, span, p' ) . each do |node |
2524 node . remove_attribute ( 'style' )
26- node . remove_attribute ( 'class' ) unless node [ 'class' ] =~ /highlight/
25+ node . remove_attribute ( 'class' )
2726 end
2827
2928 # Remove empty elements
0 commit comments