Skip to content

Commit f3e20b5

Browse files
committed
feat(documents): improve document handling
Signed-off-by: Kipruto <43873157+kelvinkipruto@users.noreply.github.com>
1 parent 52d51da commit f3e20b5

File tree

6 files changed

+36
-19
lines changed

6 files changed

+36
-19
lines changed

src/collections/Documents.ts

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@ export const Documents: CollectionConfig = {
1919
name: 'url',
2020
type: 'text',
2121
},
22+
{
23+
name: 'docURL', // only used to download file from airtable
24+
type: 'text',
25+
admin: {
26+
hidden: true,
27+
},
28+
},
2229
{
2330
name: 'file',
2431
type: 'upload',
@@ -115,13 +122,19 @@ export const Documents: CollectionConfig = {
115122
},
116123
{
117124
type: 'tabs',
125+
admin: {
126+
readOnly: true,
127+
},
118128
tabs: [
119129
{
120130
label: 'Extracted Text',
121131
fields: [
122132
{
123133
name: 'extractedText',
124134
type: 'richText',
135+
admin: {
136+
readOnly: true,
137+
},
125138
},
126139
],
127140
},
@@ -153,31 +166,25 @@ export const Documents: CollectionConfig = {
153166
},
154167
{
155168
type: 'row',
156-
admin: {
157-
readOnly: true,
158-
},
159169
fields: [
160170
{
161171
name: 'uniqueId',
162172
type: 'text',
163-
// admin: {
164-
// readOnly: true,
165-
// },
173+
admin: {
174+
hidden: true,
175+
},
166176
},
167177
{
168178
name: 'checkMediaId',
169179
type: 'text',
170-
// admin: {
171-
// readOnly: true,
172-
// },
180+
admin: {
181+
hidden: true,
182+
},
173183
},
174184
{
175185
name: 'checkMediaURL',
176186
label: 'CheckMedia URL',
177187
type: 'text',
178-
// admin: {
179-
// readOnly: true,
180-
// },
181188
},
182189
],
183190
},

src/payload-types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ export interface Document {
186186
id: string;
187187
title: string;
188188
url?: string | null;
189+
docURL?: string | null;
189190
file?: (string | null) | Media;
190191
politicalEntity?: string | null;
191192
country?: string | null;
@@ -468,6 +469,7 @@ export interface MediaSelect<T extends boolean = true> {
468469
export interface DocumentsSelect<T extends boolean = true> {
469470
title?: T;
470471
url?: T;
472+
docURL?: T;
471473
file?: T;
472474
politicalEntity?: T;
473475
country?: T;

src/tasks/aiSummarizer.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ export const AISummarizer: TaskConfig<'aiSummarizer'> = {
5757
- Source field must contain exact quotes from the document text
5858
- Each promise should have clear supporting evidence in the source quotes
5959
- Group similar promises under appropriate categories
60+
- Reply in the language the document is written in
6061
6162
**Document:**
6263

src/tasks/downloadDocuments.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ export const DownloadDocuments: TaskConfig<'downloadDocuments'> = {
6464
},
6565
select: {
6666
url: true,
67+
docURL: true,
6768
},
6869
})
6970

@@ -72,14 +73,17 @@ export const DownloadDocuments: TaskConfig<'downloadDocuments'> = {
7273
for (const doc of documents) {
7374
try {
7475
logger.debug('Processing document', { id: doc.id })
75-
const { url } = doc
76+
const { url, docURL } = doc
7677

77-
if (!url) {
78+
// priritize file uploaded to airtable
79+
const urlToFetch = docURL || url
80+
81+
if (!urlToFetch) {
7882
logger.warn('Document has no URL', { id: doc.id })
7983
continue
8084
}
8185

82-
const res = await fetch(url)
86+
const res = await fetch(urlToFetch)
8387

8488
if (!res.ok) {
8589
throw new Error(`Failed to fetch document: ${res.status} ${res.statusText}`)

src/tasks/fetchAirtableDocuments.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ export const FetchAirtableDocuments: TaskConfig<'fetchAirtableDocuments'> = {
6868
yearFrom: doc.yearFrom,
6969
yearTo: doc.yearTo,
7070
airtableID: doc.id,
71-
url: doc.document.length ? doc.document[0] : doc.uRL,
71+
url: doc.uRL,
72+
docURL: doc.document[0],
7273
},
7374
})
7475
}),

src/tasks/uploadToMeedan.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { TaskConfig } from 'payload'
22
import { createFactCheckClaim } from '@/lib/meedan'
33
import { markDocumentAsProcessed } from '@/lib/airtable'
4-
import { Document } from '@/payload-types'
4+
import { Document, Media } from '@/payload-types'
55

66
export const UploadToMeedan: TaskConfig<'uploadToMeedan'> = {
77
slug: 'uploadToMeedan',
@@ -38,7 +38,7 @@ export const UploadToMeedan: TaskConfig<'uploadToMeedan'> = {
3838
},
3939
},
4040
limit: -1,
41-
depth: 0,
41+
depth: 2,
4242
})
4343

4444
logger.info(`Found ${documents.length} documents to process`)
@@ -106,6 +106,8 @@ export const UploadToMeedan: TaskConfig<'uploadToMeedan'> = {
106106
${extraction.source}
107107
`.trim()
108108

109+
const downloadedFile = doc.file as Media
110+
109111
const response = await createFactCheckClaim({
110112
apiKey: meedanAPIKey,
111113
teamId,
@@ -114,7 +116,7 @@ export const UploadToMeedan: TaskConfig<'uploadToMeedan'> = {
114116
claimDescription: extraction.summary,
115117
factCheck: {
116118
title: extraction.summary,
117-
url: doc.url || '',
119+
url: doc.url || downloadedFile.url || doc.docURL || '',
118120
language: doc.language || '',
119121
publish_report: false,
120122
},

0 commit comments

Comments
 (0)