diff --git a/lib/docs/filters/qt/clean_html.rb b/lib/docs/filters/qt/clean_html.rb index 1772a765bc..27204fca3b 100644 --- a/lib/docs/filters/qt/clean_html.rb +++ b/lib/docs/filters/qt/clean_html.rb @@ -2,22 +2,19 @@ module Docs class Qt class CleanHtmlFilter < Filter def call - # Remove unneeded elements - css('.copy-notice, .navigationbar, .headerNavi, .footerNavi, .sidebar, .toc, #ec_toggle', '.landingicons img', 'br').remove + # Narrow down container further. Breadcrumb is safe to remove. + @doc = at_css('article .mainContent .context') unless root_page? + + css('h1').remove_attribute('class') # QML property/method header css('.qmlproto').each do |node| - id = node.at_css('tr')['id'] - id = node.at_css('a')['name'] if id.blank? + id = node.at_css('span.name').content node.inner_html = node.at_css('td').inner_html node.name = 'h3' node['id'] = id end - css('.main-rounded', '.content', '.line', '.context', '.descr', '.types', '.func', '.table', 'div:not([class])', '.landing', '.col-1', '.heading', '.qmlitem', '.qmldoc', 'div.pre').each do |node| - node.before(node.children).remove - end - css('pre').each do |node| node.content = node.content node['data-language'] = 'cpp' if node['class'].include?('cpp') diff --git a/lib/docs/filters/qt/entries.rb b/lib/docs/filters/qt/entries.rb index 27cd0a31f2..2cc41616c4 100644 --- a/lib/docs/filters/qt/entries.rb +++ b/lib/docs/filters/qt/entries.rb @@ -19,25 +19,8 @@ def get_name end def get_type - breadcrumb = css('#main_title_bar + ul li') - category = if breadcrumb.length < 3 - then 'Qt'.dup - else breadcrumb.at(1).content - end - - if category == 'Qt' - return 'Qt Platforms' if name.include?(' for ') || name == 'Qt Platform Abstraction' - return 'Qt Quick' if name == 'Qt Quick Test' || name == 'Qt Quick Test Reference Documentation' - - alwaysInQt = ['Qt Configure Options', 'Qt Image Formats'] - category = name if name.start_with?('Qt ') && !alwaysInQt.include?(name) - end - - qtPlatformsTypes = ['Qt Platform Headers', 'Qt Android Extras', 'Qt Mac Extras', 'Qt Windows Extras', 'Qt X11 Extras'] - return 'Qt Platforms' if qtPlatformsTypes.include?(category) - - category.remove!(' Manual') - category + breadcrumb = css('ul.c-breadcrump li') # Yes, really: breadcrump. + breadcrumb[1].content end def include_default_entry? @@ -107,11 +90,9 @@ def additional_entries end # QML properties/functions - qmlTypeName = at_css('h1.title').content.remove(' QML Type', '') css('.qmlproto').each do |node| title = node.content.strip - id = node.at_css('tr')['id'] - id = node.at_css('a')['name'] if id.blank? + id = node.at_css('span.name').content # Remove options title.remove!(%r{^\[.*\] }) @@ -128,7 +109,8 @@ def additional_entries # Remove return type title.remove!(%r{.* }) - title = "#{qmlTypeName}.#{title.strip}" + title = title.strip + unless titles.include?(title) # Remove duplicates (function overloading) entries << [title, id] titles.push(title) diff --git a/lib/docs/scrapers/qt.rb b/lib/docs/scrapers/qt.rb index 3371120be3..31a21a62ee 100644 --- a/lib/docs/scrapers/qt.rb +++ b/lib/docs/scrapers/qt.rb @@ -11,8 +11,8 @@ class Qt < UrlScraper html_filters.push 'qt/entries', 'qt/clean_html' - options[:container] = 'article, .main' - options[:max_image_size] = 156_000 + options[:container] = '.b-sidebar__content' + options[:max_image_size] = 256_000 options[:skip_patterns] = [ # License, copyright attributions /3rdparty/, @@ -56,6 +56,8 @@ class Qt < UrlScraper "compatmap.html", # Indexes + "qdoc-index.html", + "qmake-manual.html", "classes.html", "qtmodules.html", "modules-qml.html", @@ -103,7 +105,12 @@ class Qt < UrlScraper Licensed under the GNU Free Documentation License, Version 1.3. HTML - version do + version '6.9' do + self.release = '6.9' + self.base_url = "https://doc.qt.io/qt-#{self.release}/" + end + + version '6.8' do self.release = '6.8' self.base_url = "https://doc.qt.io/qt-#{self.release}/" end