@@ -45,63 +45,68 @@ AS $BODY$
4545 SELECT regexp_replace(public .text_without_metadata (public .text_without_format (input)), ' </?(J|i)>' , ' ' , ' g' )
4646$BODY$;
4747
48- -- splits text into words with associated metadata (strong, morph, footnote, note, header)
49- DROP FUNCTION IF EXISTS public .words_with_metadata (text );
50- CREATE OR REPLACE FUNCTION public .words_with_metadata (input text )
48+ -- extracts words with metadata into jsonb
49+ DROP FUNCTION IF EXISTS public .collect_tags (text );
50+ CREATE OR REPLACE FUNCTION public .collect_tags (input text )
5151 RETURNS jsonb
52- LANGUAGE ' plpgsql'
52+ LANGUAGE plpgsql
5353 COST 100
5454 VOLATILE PARALLEL UNSAFE
5555AS $BODY$
5656DECLARE
57- result jsonb := ' []' ::jsonb;
58- word text ;
59- tags text ; -- all tags that follow the word (space-separated in the source)
60- strong text ;
61- morph text ;
62- footnote text ;
63- note text ;
64- header text ;
65- cleaned text ;
57+ cleaned text ;
58+ result jsonb;
6659BEGIN
60+ -- optional, keep your preprocessing if it's still desired
6761 cleaned := public .text_without_format (input);
6862 cleaned := replace(cleaned, ' <i>' , ' [' );
6963 cleaned := replace(cleaned, ' </i>' , ' ]' );
7064
71- -- 1st group = the word (no whitespace or '<')
72- -- 2nd group = zero or more following tags belonging to that word
73- FOR word, tags IN
74- SELECT m[ 1 ], m [2 ]
65+ WITH matches AS (
66+ SELECT
67+ (m)[ 1 ] AS tag,
68+ (m) [2 ] AS val
7569 FROM regexp_matches(
7670 cleaned,
77- ' ([^\s <]+)' ||
78- ' ((?:\s *(?:<S>[^<]+</S>|<m>[^<]+</m>|<f>[^<]+</f>|<n>[^<]+</n>|<h>[^<]+</h>))*)' ,
71+ ' <([A-Za-z][A-Za-z0-9]*)>([^<]+)</\1 >' ,
7972 ' g'
8073 ) AS m
81- LOOP
82- -- pull fields from the collected tags blob
83- strong := substring (tags from ' <S>([^<]+)</S>' );
84- morph := substring (tags from ' <m>([^<]+)</m>' );
85- footnote := substring (tags from ' <f>([^<]+)</f>' );
86- note := substring (tags from ' <n>([^<]+)</n>' );
87- header := substring (tags from ' <h>([^<]+)</h>' );
88-
89- result := result || jsonb_build_array(
90- jsonb_strip_nulls(jsonb_build_object(
91- ' text' , word,
92- ' strong' , strong,
93- ' morph' , morph,
94- ' footnote' , footnote,
95- ' note' , note,
96- ' header' , header
97- ))
98- );
99- END LOOP;
74+ ),
75+ agg AS (
76+ SELECT
77+ tag,
78+ array_agg(val) AS vals
79+ FROM matches
80+ GROUP BY tag
81+ )
82+ SELECT
83+ COALESCE(jsonb_object_agg(tag, to_jsonb(vals)), ' {}' ::jsonb)
84+ INTO result
85+ FROM agg;
10086
10187 RETURN result;
10288END;
10389$BODY$;
10490
91+ -- fetches a verse along with its metadata
92+ DROP FUNCTION IF EXISTS public .fetch_verse_with_metadata (text );
93+ CREATE OR REPLACE FUNCTION public .fetch_verse_with_metadata(p_verse_id text )
94+ RETURNS TABLE(verse_id text , metadata jsonb)
95+ LANGUAGE ' plpgsql'
96+ COST 100
97+ VOLATILE PARALLEL UNSAFE
98+ ROWS 1000
99+ AS $BODY$
100+ BEGIN
101+ RETURN QUERY
102+ SELECT
103+ v .id AS verse_id,
104+ public .collect_tags (v .text ) AS metadata
105+ FROM public ._all_verses v
106+ WHERE v .id = p_verse_id;
107+ END;
108+ $BODY$;
109+
105110-- parses a Bible address
106111DROP FUNCTION IF EXISTS public .parse_address (text );
107112CREATE OR REPLACE FUNCTION public .parse_address(address text )
0 commit comments