Skip to content

Commit cd54087

Browse files
committed
[E] Enhance ingestion messaging when source paths are malformed
* Surface ingestion severity properly * Add a gitignore to handle new zips in the spec/data directory without `git add -f` * Update rake ingestion tasks to better log failures when they occur
1 parent 93e2740 commit cd54087

File tree

14 files changed

+312
-41
lines changed

14 files changed

+312
-41
lines changed

api/app/jobs/ingestions/log_message_job.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
module Ingestions
44
class LogMessageJob < AsyncApplicationJob
5-
def perform(ingestion_id:, kind:, payload:)
6-
IngestionMessage.create!(ingestion_id:, kind:, payload:)
5+
def perform(ingestion_id:, kind:, payload:, severity: "unknown")
6+
IngestionMessage.create!(ingestion_id:, kind:, payload:, severity:)
77
end
88
end
99
end

api/app/models/ingestion.rb

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ class Ingestion < ApplicationRecord
2020
belongs_to :text, optional: true
2121
belongs_to :text_section, optional: true
2222
belongs_to :project
23-
has_many :ingestion_messages, -> { reorder(created_at: :asc) }, inverse_of: :ingestion, dependent: :delete_all
23+
has_many :ingestion_messages, -> { in_default_order }, inverse_of: :ingestion, dependent: :delete_all
2424

2525
# Validations
2626
validates :source, presence: true, if: :file_based_ingestion?
@@ -131,7 +131,8 @@ def add(severity, message = nil, _progname = nil)
131131
::Ingestions::LogMessageJob.perform_later(
132132
ingestion_id: id,
133133
kind: "log",
134-
payload: line
134+
payload: line,
135+
severity: severity.downcase,
135136
)
136137
end
137138

@@ -144,37 +145,59 @@ def commit_log
144145
self.log_buffer = []
145146
end
146147

148+
# @param [User] user
149+
# @return [void]
147150
def begin_processing(user)
148151
update_column :processing_failed, false
149152

150153
target_kind.begin_processing(user, self)
151154
end
152155

156+
# @param [StandardError, Ingestions::IngestionError, ActiveModel::Errors] errors
157+
# @return [void]
153158
def handle_ingestion_exception(errors)
154159
update_column :processing_failed, true
155160

156161
error("Processing failed.\n")
157162

158-
if errors.respond_to?(:full_messages)
159-
output_errors(errors)
160-
else
161-
compose_and_output_backtrace(errors)
162-
end
163+
handle_ingestion_errors!(errors)
163164

164165
processing_failure
165166
end
166167

167168
private
168169

170+
# @param [StandardError, Ingestions::IngestionError, ActiveModel::Errors] errors
171+
# @return [void]
172+
def handle_ingestion_errors!(errors)
173+
if errors.respond_to?(:full_messages)
174+
output_errors(errors)
175+
else
176+
case errors
177+
when StandardError
178+
compose_and_output_backtrace(errors)
179+
else
180+
# :nocov:
181+
fatal("Something went wrong with ingestion: #{errors.inspect}")
182+
# :nocov:
183+
end
184+
end
185+
end
186+
187+
# @param [StandardError, Ingestions::IngestionError] errors
188+
# @return [void]
169189
def compose_and_output_backtrace(errors)
170190
output = errors.message
191+
171192
Rails.backtrace_cleaner.clean(errors.backtrace).each do |line|
172193
output += "\n#{line}"
173194
end
174195

175196
error(output)
176197
end
177198

199+
# @param [ActiveModel::Errors] errors
200+
# @return [void]
178201
def output_errors(errors)
179202
errors.full_messages.each do |e|
180203
error(e)

api/app/models/ingestion_message.rb

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,37 @@
22

33
# A message stored during resource ingestion to be later returned to the client
44
class IngestionMessage < ApplicationRecord
5-
belongs_to :ingestion
5+
LOG_LEVELS = %w[unknown debug info warn error fatal].freeze
6+
7+
LOG_LEVEL_ENUM = LOG_LEVELS.to_h { |level| [level, level] }.freeze
8+
9+
enum :severity, LOG_LEVEL_ENUM, suffix: :severity, allow_blank: false, default: "unknown"
10+
11+
belongs_to :ingestion, inverse_of: :ingestion_messages
12+
13+
scope :in_default_order, -> { reorder(created_at: :asc) }
14+
15+
scope :logs, -> { where(kind: "log") }
16+
scope :log_message_matches, ->(needle) { logs.where(arel_log_message_matches(needle)) }
17+
18+
before_validation :normalize_severity!
19+
620
validates :kind, :payload, presence: true
21+
22+
private
23+
24+
# @return [void]
25+
def normalize_severity!
26+
self.severity = severity.to_s.downcase.presence_in(LOG_LEVELS) || "unknown"
27+
end
28+
29+
class << self
30+
# @param [String] needle
31+
# @return [Arel::Nodes::Matches]
32+
def arel_log_message_matches(needle)
33+
ingestion_messages = IngestionMessage.arel_table
34+
35+
arel_named_fn("extract_ingestion_message_text", ingestion_messages[:payload]).matches(arel_quote(needle), nil, false)
36+
end
37+
end
738
end

api/app/serializers/v1/ingestion_message_serializer.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ class IngestionMessageSerializer < ManifoldSerializer
66

77
typed_attribute :kind, Types::String.meta(read_only: true)
88
typed_attribute :payload, Types::Any.meta(read_only: true)
9+
typed_attribute :severity, Types::Any.meta(read_only: true)
910
typed_attribute :id, Types::Serializer::ID
1011
typed_attribute :created_at, Types::String.meta(read_only: true)
1112
end

api/app/services/ingestions/strategies/manifest.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,17 @@ def preprocess
4444
rel_path = context.rel_path_without_ext source
4545
context.write_build_file "#{rel_path}.html", raw_html
4646
end
47+
48+
# This block is only called when there are missing source paths.
49+
inspector.validate_local_sources! do |missing|
50+
error "services.ingestions.pre_processor.manifest.missing_source_paths"
51+
52+
missing.each do |source_path|
53+
error "services.ingestions.pre_processor.manifest.missing_source_path", source_path: source_path
54+
end
55+
56+
raise Ingestions::IngestionError, "Manifest pre-processing failed: missing source paths"
57+
end
4758
end
4859

4960
def manifest

api/app/services/ingestions/strategy/manifest/inspector.rb

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,30 @@ def external_sources
103103
source_map.select { |source| context.url? source["source_path"] }
104104
end
105105

106+
def local_sources
107+
source_map.reject { |source| context.url? source["source_path"] }
108+
end
109+
110+
def local_source_paths
111+
local_sources.pluck("source_path")
112+
end
113+
114+
# @yield [missing]
115+
# @yieldparam [Array<String>] missing list of missing local source paths
116+
# @yieldreturn [void]
117+
# @return [Array<String>] list of missing local source paths
118+
def validate_local_sources!
119+
source_root_path = Pathname(context.source_root)
120+
121+
missing = local_source_paths.reject do |source_path|
122+
source_root_path.join(source_path).exist?
123+
end
124+
125+
yield missing if block_given? && missing.any?
126+
127+
return missing
128+
end
129+
106130
def update_source_map(original_path, new_path)
107131
item = source_map.detect do |source|
108132
source["source_path"] == original_path

api/config/locales/services/ingestions/en.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ en:
4848
landmark_nav_title: 'Landmark nav title is "%{text}"'
4949
toc_nav_title: 'TOC nav title is "%{text}"'
5050
page_list_nav_title: 'Page list nav title is "%{text}"'
51+
pre_processor:
52+
manifest:
53+
missing_source_paths: "One or more source paths were defined by the manifest but not found. Check spelling and capitalization."
54+
missing_source_path: "Manifest could not find an expected source path: %{source_path}"
5155
post_processor:
5256
log:
5357
transform_toc_structure: 'Transforming TOC structure'
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# frozen_string_literal: true
2+
3+
class AddSeverityToIngestionMessages < ActiveRecord::Migration[7.0]
4+
def change
5+
create_enum :ingestion_message_severity, %w[
6+
unknown debug info warn error fatal
7+
]
8+
9+
reversible do |dir|
10+
dir.up do
11+
execute <<~SQL
12+
CREATE FUNCTION public.normalize_ingestion_message_severity(text) RETURNS public.ingestion_message_severity AS $$
13+
SELECT
14+
CASE LOWER($1)
15+
WHEN 'debug' THEN 'debug'::public.ingestion_message_severity
16+
WHEN 'info' THEN 'info'::public.ingestion_message_severity
17+
WHEN 'warn' THEN 'warn'::public.ingestion_message_severity
18+
WHEN 'error' THEN 'error'::public.ingestion_message_severity
19+
WHEN 'fatal' THEN 'fatal'::public.ingestion_message_severity
20+
ELSE 'unknown'::public.ingestion_message_severity
21+
END;
22+
$$ LANGUAGE SQL IMMUTABLE CALLED ON NULL INPUT PARALLEL SAFE;
23+
24+
CREATE FUNCTION public.extract_ingestion_message_severity(jsonb) RETURNS public.ingestion_message_severity AS $$
25+
SELECT
26+
CASE
27+
WHEN jsonb_typeof($1) = 'array' THEN public.normalize_ingestion_message_severity($1->>0)
28+
ELSE 'unknown'::public.ingestion_message_severity
29+
END;
30+
$$ LANGUAGE SQL IMMUTABLE CALLED ON NULL INPUT PARALLEL SAFE;
31+
32+
CREATE FUNCTION public.extract_ingestion_message_text(jsonb) RETURNS text AS $$
33+
SELECT
34+
CASE
35+
WHEN jsonb_typeof($1) = 'array' THEN $1->>1
36+
ELSE NULL
37+
END;
38+
$$ LANGUAGE SQL IMMUTABLE CALLED ON NULL INPUT PARALLEL SAFE;
39+
SQL
40+
end
41+
42+
dir.down do
43+
execute <<~SQL
44+
DROP FUNCTION public.extract_ingestion_message_text(jsonb);
45+
DROP FUNCTION public.extract_ingestion_message_severity(jsonb);
46+
DROP FUNCTION public.normalize_ingestion_message_severity(text);
47+
SQL
48+
end
49+
end
50+
51+
change_table :ingestion_messages do |t|
52+
t.enum :severity, enum_type: :ingestion_message_severity, null: false, default: "unknown"
53+
54+
t.index %[(extract_ingestion_message_text(payload))],
55+
name: "index_ingestion_messages_on_extracted_text",
56+
where: %[kind = 'log']
57+
end
58+
59+
reversible do |dir|
60+
dir.up do
61+
say_with_time "Migrating ingestion message severities" do
62+
exec_update(<<~SQL, "Migrate severities")
63+
UPDATE ingestion_messages
64+
SET severity = public.extract_ingestion_message_severity(payload)
65+
WHERE kind = 'log';
66+
SQL
67+
end
68+
end
69+
end
70+
end
71+
end

api/db/structure.sql

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,20 @@ CREATE EXTENSION IF NOT EXISTS "uuid-ossp" WITH SCHEMA public;
135135
COMMENT ON EXTENSION "uuid-ossp" IS 'generate universally unique identifiers (UUIDs)';
136136

137137

138+
--
139+
-- Name: ingestion_message_severity; Type: TYPE; Schema: public; Owner: -
140+
--
141+
142+
CREATE TYPE public.ingestion_message_severity AS ENUM (
143+
'unknown',
144+
'debug',
145+
'info',
146+
'warn',
147+
'error',
148+
'fatal'
149+
);
150+
151+
138152
--
139153
-- Name: manifold_lang; Type: TYPE; Schema: public; Owner: -
140154
--
@@ -145,6 +159,36 @@ CREATE TYPE public.manifold_lang AS ENUM (
145159
);
146160

147161

162+
--
163+
-- Name: extract_ingestion_message_severity(jsonb); Type: FUNCTION; Schema: public; Owner: -
164+
--
165+
166+
CREATE FUNCTION public.extract_ingestion_message_severity(jsonb) RETURNS public.ingestion_message_severity
167+
LANGUAGE sql IMMUTABLE PARALLEL SAFE
168+
AS $_$
169+
SELECT
170+
CASE
171+
WHEN jsonb_typeof($1) = 'array' THEN public.normalize_ingestion_message_severity($1->>0)
172+
ELSE 'unknown'::public.ingestion_message_severity
173+
END;
174+
$_$;
175+
176+
177+
--
178+
-- Name: extract_ingestion_message_text(jsonb); Type: FUNCTION; Schema: public; Owner: -
179+
--
180+
181+
CREATE FUNCTION public.extract_ingestion_message_text(jsonb) RETURNS text
182+
LANGUAGE sql IMMUTABLE PARALLEL SAFE
183+
AS $_$
184+
SELECT
185+
CASE
186+
WHEN jsonb_typeof($1) = 'array' THEN $1->>1
187+
ELSE NULL
188+
END;
189+
$_$;
190+
191+
148192
--
149193
-- Name: extract_text_section_content(jsonb); Type: FUNCTION; Schema: public; Owner: -
150194
--
@@ -258,6 +302,25 @@ CREATE FUNCTION public.manifold_slugify(text) RETURNS text
258302
$_$;
259303

260304

305+
--
306+
-- Name: normalize_ingestion_message_severity(text); Type: FUNCTION; Schema: public; Owner: -
307+
--
308+
309+
CREATE FUNCTION public.normalize_ingestion_message_severity(text) RETURNS public.ingestion_message_severity
310+
LANGUAGE sql IMMUTABLE PARALLEL SAFE
311+
AS $_$
312+
SELECT
313+
CASE LOWER($1)
314+
WHEN 'debug' THEN 'debug'::public.ingestion_message_severity
315+
WHEN 'info' THEN 'info'::public.ingestion_message_severity
316+
WHEN 'warn' THEN 'warn'::public.ingestion_message_severity
317+
WHEN 'error' THEN 'error'::public.ingestion_message_severity
318+
WHEN 'fatal' THEN 'fatal'::public.ingestion_message_severity
319+
ELSE 'unknown'::public.ingestion_message_severity
320+
END;
321+
$_$;
322+
323+
261324
--
262325
-- Name: to_unaccented_tsv(jsonb); Type: FUNCTION; Schema: public; Owner: -
263326
--
@@ -1674,7 +1737,8 @@ CREATE TABLE public.ingestion_messages (
16741737
kind text NOT NULL,
16751738
payload jsonb NOT NULL,
16761739
created_at timestamp(6) without time zone NOT NULL,
1677-
updated_at timestamp(6) without time zone NOT NULL
1740+
updated_at timestamp(6) without time zone NOT NULL,
1741+
severity public.ingestion_message_severity DEFAULT 'unknown'::public.ingestion_message_severity NOT NULL
16781742
);
16791743

16801744

@@ -4929,6 +4993,13 @@ CREATE INDEX index_import_selections_on_source_text_id ON public.import_selectio
49294993
CREATE INDEX index_import_selections_on_text_id ON public.import_selections USING btree (text_id);
49304994

49314995

4996+
--
4997+
-- Name: index_ingestion_messages_on_extracted_text; Type: INDEX; Schema: public; Owner: -
4998+
--
4999+
5000+
CREATE INDEX index_ingestion_messages_on_extracted_text ON public.ingestion_messages USING btree (public.extract_ingestion_message_text(payload)) WHERE (kind = 'log'::text);
5001+
5002+
49325003
--
49335004
-- Name: index_ingestion_messages_on_ingestion_id; Type: INDEX; Schema: public; Owner: -
49345005
--
@@ -7770,6 +7841,7 @@ INSERT INTO "schema_migrations" (version) VALUES
77707841
('20250609192241'),
77717842
('20251016204352'),
77727843
('20251017174417'),
7773-
('20251017211501');
7844+
('20251017211501'),
7845+
('20251020225421');
77747846

77757847

0 commit comments

Comments
 (0)