Skip to content

Commit da7d930

Browse files
committed
bugfix: skip first page of rendered images
1 parent 0e520f4 commit da7d930

File tree

1 file changed

+12
-8
lines changed

1 file changed

+12
-8
lines changed

pdf2md/server/src/workers/supervisor-worker.rs

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,20 @@ pub async fn chunk_pdf(
127127

128128
let pages = pdf
129129
.render(pdf2image::Pages::All, None)
130-
.map_err(|err| ServiceError::BadRequest(format!("Failed to render PDF file {:?}", err)))?;
130+
.map_err(|err| ServiceError::BadRequest(format!("Failed to render PDF file {:?}", err)))?
131+
.into_iter()
132+
.skip(1)
133+
.collect::<Vec<_>>();
134+
131135
let num_pages = pages.len();
132136

137+
update_task_status(
138+
task.id,
139+
FileTaskStatus::ProcessingFile(num_pages as u32),
140+
&clickhouse_client,
141+
)
142+
.await?;
143+
133144
// Process each chunk
134145
for (i, page) in pages.into_iter().enumerate() {
135146
let file_name = format!("{}page{}.jpeg", task.id, i + 1);
@@ -165,12 +176,5 @@ pub async fn chunk_pdf(
165176
log::info!("Uploaded page {} of {} to S3", i + 1, num_pages);
166177
}
167178

168-
update_task_status(
169-
task.id,
170-
FileTaskStatus::ProcessingFile(num_pages as u32),
171-
&clickhouse_client,
172-
)
173-
.await?;
174-
175179
Ok(())
176180
}

0 commit comments

Comments
 (0)