Skip to content

Commit 3bce6d4

Browse files
authored
Merge pull request #12
Migrate PDF generation from `wicked_pdf` to `grover`, refactor related components
2 parents c8047ec + c9968bc commit 3bce6d4

File tree

26 files changed

+693
-156
lines changed

26 files changed

+693
-156
lines changed

.env.docker

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,9 @@ ELASTICSEARCH_ADDRESS=0.0.0.0
5656
# PDF GENERATION
5757
# Tools and settings for PDF document generation
5858
# ==============================================================================
59-
#WKHTMLTOPDF_PATH= # Path to wkhtmltopdf binary
60-
#PUPPETEER_TIMEOUT= # Puppeteer timeout in milliseconds
59+
GROVER_EXECUTABLE_PATH=/usr/bin/chromium
60+
GROVER_NO_SANDBOX=true
61+
PUPPETEER_TIMEOUT=3000
6162

6263
# ==============================================================================
6364
# ERROR TRACKING

.env.example

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# ==============================================================================
55
APP_NAME= # Application name
66
APPLICATION_DOMAIN= # Domain where the application is hosted
7+
FORCE_PREVIEW_GENERATION= # Force generation of document and materials previews
78

89
# ==============================================================================
910
# DATABASE CONFIGURATION
@@ -56,7 +57,7 @@ ELASTICSEARCH_ADDRESS= # Elasticsearch server address
5657
# PDF GENERATION
5758
# Tools and settings for PDF document generation
5859
# ==============================================================================
59-
WKHTMLTOPDF_PATH= # Path to wkhtmltopdf binary
60+
GROVER_EXECUTABLE_PATH= # Path to the Google Chrome or Chromium executable
6061
PUPPETEER_TIMEOUT= # Puppeteer timeout in milliseconds
6162

6263
# ==============================================================================

Dockerfile.dev

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,34 @@
1-
FROM ruby:3.4.7-slim
1+
FROM ruby:3.4.7-slim AS base
2+
3+
# Install base packages + Chromium for PDF generation
4+
RUN apt-get update -qq && \
5+
apt-get install --no-install-recommends -y \
6+
curl \
7+
libjemalloc2 \
8+
libvips \
9+
postgresql-client \
10+
# Chromium for Grover PDF generation
11+
chromium \
12+
chromium-driver \
13+
# Required Chromium dependencies
14+
fonts-liberation \
15+
libappindicator3-1 \
16+
libasound2 \
17+
libatk-bridge2.0-0 \
18+
libatk1.0-0 \
19+
libcups2 \
20+
libdbus-1-3 \
21+
libnspr4 \
22+
libnss3 \
23+
libx11-xcb1 \
24+
libxcomposite1 \
25+
libxdamage1 \
26+
libxrandr2 \
27+
xdg-utils && \
28+
ln -s /usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2 /usr/local/lib/libjemalloc.so && \
29+
rm -rf /var/lib/apt/lists /var/cache/apt/archives
30+
31+
FROM base AS dev
232

333
# Install development packages
434
RUN apt-get update -qq && \

Gemfile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ group :development do
6666
gem "web-console"
6767
end
6868

69+
# ============================================================================
70+
# New dependencies
71+
# ============================================================================
72+
gem "grover"
73+
6974
# ============================================================================
7075
# Dependencies migrated from lcms-engine gem
7176
# ============================================================================
@@ -123,8 +128,6 @@ gem "combine_pdf", "~> 1.0"
123128
gem "rubyzip", "~> 2.3"
124129
gem "nokogiri", "~> 1.16"
125130
gem "sanitize", "~> 6.1"
126-
gem "wicked_pdf", "~> 2.1"
127-
gem "wkhtmltopdf-binary", "~> 0.12.6"
128131

129132
# HTTP & External APIs
130133
gem "httparty", "~> 0.22"

Gemfile.lock

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,8 @@ GEM
342342
multi_json (~> 1.11)
343343
os (>= 0.9, < 2.0)
344344
signet (>= 0.16, < 2.a)
345+
grover (1.2.4)
346+
nokogiri (~> 1)
345347
hashie (5.0.0)
346348
hiredis-client (0.26.1)
347349
redis-client (= 0.26.1)
@@ -790,16 +792,12 @@ GEM
790792
base64
791793
websocket-extensions (>= 0.1.0)
792794
websocket-extensions (0.1.5)
793-
wicked_pdf (2.8.2)
794-
activesupport
795-
ostruct
796795
will_paginate (4.0.1)
797796
will_paginate-bootstrap-style (0.3.0)
798797
will_paginate (~> 4.0, >= 4.0.0)
799798
with_advisory_lock (5.3.0)
800799
activerecord (>= 6.1)
801800
zeitwerk (>= 2.6)
802-
wkhtmltopdf-binary (0.12.6.10)
803801
xpath (3.2.0)
804802
nokogiri (~> 1.8)
805803
zeitwerk (2.7.3)
@@ -847,6 +845,7 @@ DEPENDENCIES
847845
fog-aws (~> 3.5, >= 3.5.2)
848846
google-apis-drive_v3 (~> 0.66)
849847
google-apis-script_v1 (~> 0.28)
848+
grover
850849
hiredis-client
851850
httparty (~> 0.22)
852851
image_processing (~> 1.2)
@@ -900,11 +899,9 @@ DEPENDENCIES
900899
virtus (~> 1.0, >= 1.0.5)
901900
web-console
902901
webdrivers
903-
wicked_pdf (~> 2.1)
904902
will_paginate (~> 4.0)
905903
will_paginate-bootstrap-style (~> 0.3)
906904
with_advisory_lock (~> 5.0)
907-
wkhtmltopdf-binary (~> 0.12.6)
908905

909906
BUNDLED WITH
910907
2.6.9

app/controllers/materials_controller.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@ class MaterialsController < Admin::AdminController
44
before_action :set_material
55

66
def preview_pdf
7-
if !ENV.fetch('FORCE_PREVIEW_GENERATION', false) && (url = @material.preview_links["pdf"]).present?
7+
if !ENV.fetch("FORCE_PREVIEW_GENERATION", false) && (url = @material.preview_links["pdf"]).present?
88
return redirect_to url
99
end
1010

1111
preview_for :pdf
1212
end
1313

1414
def preview_gdoc
15-
if !ENV.fetch('FORCE_PREVIEW_GENERATION', false) && (url = @material.preview_links["gdoc"]).present?
15+
if !ENV.fetch("FORCE_PREVIEW_GENERATION", false) && (url = @material.preview_links["gdoc"]).present?
1616
return redirect_to url
1717
end
1818

@@ -28,7 +28,7 @@ def preview_for(preview_type, options = {})
2828
if service.perform
2929
links = @material.preview_links
3030
@material.update preview_links: links.merge(preview_type => service.url)
31-
redirect_to service.url
31+
redirect_to service.url, allow_other_host: true
3232
else
3333
redirect_to material_path(@material), alert: service.error
3434
end

app/helpers/asset_helper.rb

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# frozen_string_literal: true
2+
3+
module AssetHelper
4+
REDIS_PREFIX = "ub-b64-asset"
5+
6+
class << self
7+
def base64_encoded(path, cache: false)
8+
key = "#{REDIS_PREFIX}#{path}"
9+
10+
if cache
11+
b64_asset = redis.get(key)
12+
return b64_asset if b64_asset.present?
13+
end
14+
15+
b64_asset = encode path
16+
redis.set key, b64_asset, ex: 1.day.to_i if cache
17+
b64_asset
18+
end
19+
20+
def inlined(path)
21+
if Rails.env.development? || Rails.env.test?
22+
asset = Rails.application.assets.find_asset(path)
23+
else
24+
filesystem_path = Rails.application.assets_manifest.assets[path]
25+
asset = File.read(Rails.root.join("public", "assets", filesystem_path))
26+
end
27+
asset
28+
end
29+
30+
private
31+
32+
def encode(path)
33+
if Rails.env.development? || Rails.env.test?
34+
asset = Rails.application.assets.find_asset(path)
35+
content_type = asset&.content_type
36+
elsif (filesystem_path = Rails.application.assets_manifest.assets[path])
37+
asset = File.read(Rails.root.join("public", "assets", filesystem_path))
38+
content_type = Mime::Type.lookup_by_extension(File.extname(path).split(".").last)
39+
end
40+
raise "Could not find asset '#{path}'" if asset.nil?
41+
raise "Unknown MimeType for asset '#{path}'" if content_type.nil?
42+
43+
encoded = Base64.encode64(asset.to_s).gsub(/\s+/, "")
44+
"data:#{content_type};base64,#{Rack::Utils.escape(encoded)}"
45+
end
46+
47+
def redis
48+
Rails.application.config.redis
49+
end
50+
end
51+
end

app/presenters/document_presenter.rb

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ def content_for(context_type, options = {})
99
with_excludes = (options[:excludes] || []).any?
1010
content = render_content(context_type, options)
1111
content = update_activity_timing(content) if with_excludes
12-
content = remove_optional_break(content) if ela? && with_excludes
1312
content
1413
end
1514

@@ -21,20 +20,10 @@ def doc_type
2120
"lesson"
2221
end
2322

24-
def remove_optional_break(content)
25-
html = Nokogiri::HTML.fragment content
26-
html.at_css(".o-ld-optbreak-wrapper")&.remove
27-
html.to_html
28-
end
29-
3023
def base_metadata
3124
@base_metadata ||= DocTemplate::Objects::Document.build_from(metadata)
3225
end
3326

34-
def module_value
35-
ela? ? send(:module) : unit
36-
end
37-
3827
def pdf_filename
3928
name = short_breadcrumb(join_with: "_", with_short_lesson: true)
4029
name += PDF_SUBTITLES[content_type.to_sym]
@@ -63,6 +52,7 @@ def render_content(context_type, options = {})
6352
def short_breadcrumb(join_with: " / ", with_short_lesson: false, with_subject: true, unit_level: false)
6453
lesson_abbr = with_short_lesson ? "L#{lesson}" : "Lesson #{lesson}" \
6554
unless unit_level
55+
module_value = ela? ? send(:module) : unit
6656
[
6757
with_subject ? SUBJECT_FULL[subject] || subject : nil,
6858
grade.to_i.zero? ? grade : "G#{grade}",

app/presenters/material_presenter.rb

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
# frozen_string_literal: true
22

33
class MaterialPresenter < ContentPresenter
4-
attr_accessor :document
5-
6-
delegate :name_date, :show_title, :subject, to: :base_metadata
4+
delegate :grade, :name_date, :show_title, :subject, to: :base_metadata
75

86
DEFAULT_TITLE = "Material"
97
MATERIAL_TYPES = {
@@ -14,22 +12,17 @@ class MaterialPresenter < ContentPresenter
1412

1513
def base_filename(with_version: true)
1614
name = base_metadata.identifier
17-
name = "#{document.short_breadcrumb(join_with: '_', with_short_lesson: true)}_#{name}"
1815
with_version ? "#{name}_v#{version.presence || 1}" : name
1916
end
2017

2118
def cc_attribution
22-
base_metadata.cc_attribution.presence || document&.cc_attribution
19+
base_metadata.cc_attribution.to_s
2320
end
2421

2522
def content_for(context_type, options = {})
2623
render_content(context_type, options)
2724
end
2825

29-
def gdoc_folder
30-
"#{document.id}_v#{document.version}"
31-
end
32-
3326
def gdoc_preview_title
3427
preview_links["gdoc"].present? ? "Preview Google Document" : "Generate Google Document"
3528
end
@@ -51,7 +44,7 @@ def orientation
5144
end
5245

5346
def pdf_filename
54-
"#{document.id}/#{base_filename}"
47+
"#{id}/#{base_filename}"
5548
end
5649

5750
def pdf_url
@@ -93,11 +86,7 @@ def base_metadata
9386
@base_metadata ||= DocTemplate::Objects::Material.build_from(metadata)
9487
end
9588

96-
def material_links
97-
@material_links ||= (document || @lesson).links["materials"]&.dig(id.to_s)
98-
end
99-
10089
def material_url(key)
101-
material_links&.dig(key).to_s
90+
"TBD"
10291
end
10392
end

app/services/material_preview_generator.rb

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ def initialize(material, options = {})
1515
end
1616

1717
def perform
18-
return false unless assign_document
19-
2018
options[:type] == :pdf ? generate_pdf : generate_gdoc
2119
rescue StandardError => e
2220
@error = e.message
@@ -27,18 +25,6 @@ def perform
2725

2826
attr_reader :material, :options
2927

30-
# TODO: Check why we need the connection between material and document
31-
def assign_document # rubocop:disable Naming/PredicateMethod
32-
document = material.documents.last || Document.last
33-
unless document.present?
34-
@error = "Can't generate PDF for preview: no documents exist"
35-
return false
36-
end
37-
material.document = DocumentGenerator.document_presenter.new document
38-
39-
true
40-
end
41-
4228
def generate_gdoc
4329
folder_id = options[:folder_id]
4430
file_id = material.preview_links["gdoc"].to_s.match(GDOC_RE)&.[](1)
@@ -47,7 +33,7 @@ def generate_gdoc
4733
end
4834

4935
def generate_pdf # rubocop:disable Naming/PredicateMethod
50-
pdf_filename = "#{PDF_S3_FOLDER}/#{material.id}/#{material.base_filename}#{ContentPresenter::PDF_EXT}"
36+
pdf_filename = "#{PDF_S3_FOLDER}/#{material.base_filename}#{ContentPresenter::PDF_EXT}"
5137
pdf = DocumentExporter::Pdf::Material.new(material).export
5238
@url = S3Service.upload pdf_filename, pdf
5339
true

0 commit comments

Comments
 (0)