44 * Contains all RAG-related tables for parliament and legislation embeddings.
55 */
66
7- import type { InferSelectModel } from "drizzle-orm" ;
7+ import { type InferSelectModel , sql } from "drizzle-orm" ;
88import {
9+ check ,
910 customType ,
1011 index ,
1112 integer ,
1213 jsonb ,
1314 pgSchema ,
1415 text ,
1516 timestamp ,
17+ unique ,
1618 varchar ,
1719 vector ,
1820} from "drizzle-orm/pg-core" ;
1921import { nanoid } from "nanoid" ;
2022
23+ /**
24+ * Valid source types for legislation resources
25+ * Used for CHECK constraint and TypeScript type alignment
26+ */
27+ export const LEG_SOURCE_TYPES = [
28+ "act" ,
29+ "act_section" ,
30+ "regulation" ,
31+ "regulation_section" ,
32+ "defined_term" ,
33+ "preamble" ,
34+ "treaty" ,
35+ "cross_reference" ,
36+ "table_of_provisions" ,
37+ "signature_block" ,
38+ "related_provisions" ,
39+ ] as const ;
40+
41+ export type LegSourceType = ( typeof LEG_SOURCE_TYPES ) [ number ] ;
42+
2143export const ragSchema = pgSchema ( "rag" ) ;
2244
2345/**
@@ -182,8 +204,8 @@ export type ParlEmbedding = InferSelectModel<typeof parlEmbeddings>;
182204 * Fields needed for search filtering and citation building.
183205 */
184206export type LegResourceMetadata = {
185- // Identity - source types for acts, regulations, and their sections
186- sourceType : "act" | "act_section" | "regulation" | "regulation_section" ;
207+ // Identity - source types for all legislation content
208+ sourceType : LegSourceType ;
187209 language : "en" | "fr" ;
188210 chunkIndex ?: number ; // 0 for metadata chunk, 1+ for content chunks
189211
@@ -196,6 +218,33 @@ export type LegResourceMetadata = {
196218 sectionId ?: string ; // FK to legislation.sections.id
197219 sectionLabel ?: string ; // e.g., "91", "Schedule I"
198220 marginalNote ?: string ; // Short description of section
221+ sectionStatus ?: string ; // "in-force", "repealed", "not-in-force", etc.
222+ sectionType ?: string ; // "section", "schedule", "preamble", "heading", etc.
223+ hierarchyPath ?: string [ ] ; // e.g., ["Part I", "Division 1", "Subdivision A"]
224+ contentFlags ?: {
225+ // Mirrors ContentFlags from legislation schema
226+ hasTable ?: boolean ;
227+ hasFormula ?: boolean ;
228+ hasImage ?: boolean ;
229+ imageSources ?: string [ ] ;
230+ hasRepealed ?: boolean ;
231+ } ;
232+ sectionInForceDate ?: string ; // ISO date when section came into force
233+ historicalNotes ?: {
234+ // Mirrors HistoricalNoteItem from legislation schema
235+ text : string ;
236+ type ?: string ;
237+ enactedDate ?: string ;
238+ inForceStartDate ?: string ;
239+ enactId ?: string ;
240+ } [ ] ;
241+
242+ // Defined term specific fields
243+ termId ?: string ; // FK to legislation.defined_terms.id
244+ term ?: string ; // The defined term itself (e.g., "barrier", "obstable")
245+ termPaired ?: string ; // The paired term in other language
246+ scopeType ?: string ; // "act", "regulation", "part", "section"
247+ scopeSections ?: string [ ] ; // Section scope if applicable
199248
200249 // Act metadata fields
201250 longTitle ?: string ;
@@ -211,6 +260,34 @@ export type LegResourceMetadata = {
211260 enablingActId ?: string ;
212261 enablingActTitle ?: string ;
213262 registrationDate ?: string ;
263+
264+ // Preamble-specific fields
265+ preambleIndex ?: number ; // Position in preamble array
266+
267+ // Treaty-specific fields
268+ treatyTitle ?: string ; // Title of the treaty/convention
269+
270+ // Cross-reference fields
271+ crossRefId ?: string ; // FK to legislation.cross_references.id
272+ targetType ?: string ; // "act" or "regulation"
273+ targetRef ?: string ; // Reference to target document
274+ targetSectionRef ?: string ; // Optional section reference
275+ referenceText ?: string ; // Display text for the reference
276+
277+ // Table of provisions fields
278+ provisionLabel ?: string ; // Label from table of provisions
279+ provisionTitle ?: string ; // Title from table of provisions
280+ provisionLevel ?: number ; // Hierarchy level
281+
282+ // Signature block fields
283+ signatureName ?: string ; // Name of signatory
284+ signatureTitle ?: string ; // Title of signatory
285+ signatureDate ?: string ; // Date of signature
286+
287+ // Related provisions fields
288+ relatedProvisionLabel ?: string ; // Label from related provision (e.g., "Transitional Provisions")
289+ relatedProvisionSource ?: string ; // Source reference
290+ relatedProvisionSections ?: string [ ] ; // Referenced section numbers
214291} ;
215292
216293/**
@@ -224,16 +301,29 @@ export const legResources = ragSchema.table(
224301 id : varchar ( "id" , { length : 191 } )
225302 . primaryKey ( )
226303 . $defaultFn ( ( ) => nanoid ( ) ) ,
227- sectionId : varchar ( "section_id" , { length : 191 } ) . notNull ( ) ,
304+ // Unique resource key for deduplication: "{sourceType}:{sourceId}:{language}:{chunkIndex}"
305+ resourceKey : varchar ( "resource_key" , { length : 255 } ) . notNull ( ) ,
228306 content : text ( "content" ) . notNull ( ) ,
229307 metadata : jsonb ( "metadata" ) . $type < LegResourceMetadata > ( ) . notNull ( ) ,
308+ // Denormalized columns for fast filtering (avoids JSONB extraction in queries)
309+ language : varchar ( "language" , { length : 2 } ) . notNull ( ) ,
310+ sourceType : varchar ( "source_type" , { length : 30 } ) . notNull ( ) ,
230311 createdAt : timestamp ( "created_at" ) . defaultNow ( ) . notNull ( ) ,
231312 updatedAt : timestamp ( "updated_at" ) . defaultNow ( ) . notNull ( ) ,
232313 } ,
233314 ( table ) => [
234- index ( "leg_resources_section_id_idx" ) . on ( table . sectionId ) ,
315+ // Unique constraint to prevent duplicates on concurrent runs or restarts
316+ unique ( "leg_resources_resource_key_unique" ) . on ( table . resourceKey ) ,
317+ index ( "leg_resources_resource_key_idx" ) . on ( table . resourceKey ) ,
318+ // Composite index for common filtering patterns (language + sourceType)
319+ index ( "leg_resources_lang_source_idx" ) . on ( table . language , table . sourceType ) ,
235320 // Single GIN index on metadata for flexible querying
236321 index ( "leg_resources_metadata_gin" ) . using ( "gin" , table . metadata ) ,
322+ // CHECK constraint for valid source types (data integrity)
323+ check (
324+ "leg_resources_source_type_check" ,
325+ sql `${ table . sourceType } IN (${ sql . raw ( LEG_SOURCE_TYPES . map ( ( t ) => `'${ t } '` ) . join ( ", " ) ) } )`
326+ ) ,
237327 ]
238328) ;
239329
0 commit comments