Skip to content

Commit 04c490b

Browse files
committed
Fix pre-commit errors
1 parent 00dd209 commit 04c490b

File tree

12 files changed

+8
-22
lines changed

12 files changed

+8
-22
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ repos:
1010
hooks:
1111
- id: check-merge-conflict
1212
- id: mixed-line-ending
13-
exclude: 'rag_system/kotaemon/libs/kotaemon/.*|rag_system/kotaemon/libs/ktem/.*|.*\.ipynb$'
13+
exclude: 'rag_system/old/.*|library/old/.*|policy_analysis/old/.*|.*\.ipynb$'

library/scraping/download_all_pdfs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
from library.database.download_queue_crud import get_papers_to_scrape, mark_paper_failed, mark_paper_scraped
1717
from library.database.models import ScrapingQueue
18-
from library.scraping.download_pdf import download_pdf, start_webdriver
18+
from library.scraping.download_pdf import download_pdf
1919
from library.scraping.extract_pdf_content import get_markdown_pymupdf
2020
from selenium import webdriver
2121
from tqdm import tqdm

library/scraping/extract_sections_from_raw_text.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def process_file(
3434
with ProcessPoolExecutor(max_workers=num_workers) as executor:
3535
futures = {
3636
executor.submit(process_text, text): (idx, oa_id)
37-
for idx, (oa_id, text) in enumerate(zip(df.index, df["text"]))
37+
for idx, (oa_id, text) in enumerate(zip(df.index, df["text"], strict=True))
3838
}
3939

4040
buffer = []

library/src/library/scraping/clean/headers_footers.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
1-
import os
21
import re
3-
import string
42

5-
import numpy as np
6-
import pandas as pd
73

84

95
class TrieNode:

library/src/library/scraping/download_pdf.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
import tempfile
1313

1414
from selenium import webdriver
15-
from selenium.common.exceptions import WebDriverException
1615
from selenium.webdriver.chrome.service import Service
1716
from webdriver_manager.chrome import ChromeDriverManager
1817

policy_analysis/dspy_policies_and_taxonomy_extraction/initial_chunk_cleaning/clean_chunks.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
import pyarrow.parquet as pq
2-
import pyarrow as pa
3-
import pandas as pd
42
import re
53
import json
64

@@ -127,7 +125,7 @@ def apply_regex_cleaning(text, regex_rules):
127125
if TEST_RUN:
128126
df = df.head(SAMPLE_SIZE)
129127

130-
for idx, text in enumerate(df[target_col]):
128+
for text in df[target_col]:
131129
cleaned_text, deleted_count, triggered_rules = apply_regex_cleaning(text, REGEX_RULES)
132130

133131
result = {

policy_analysis/dspy_policies_and_taxonomy_extraction/pipeline_policy_and_taxonomy_extraction.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import os
33
import pickle
44
import pyarrow.parquet as pq
5-
import pandas as pd
65
from dotenv import load_dotenv
76
from tqdm import tqdm
87
import json
@@ -108,7 +107,7 @@ def suppress_output():
108107
# ----------------------------------------------------
109108

110109
for (text, (openalex_id, chunk_idx), p_out, g_out) in zip(
111-
batch_texts, batch_meta, policy_outputs, geo_outputs
110+
batch_texts, batch_meta, policy_outputs, geo_outputs, strict=False
112111
):
113112
policy_dict = p_out.toDict() if hasattr(p_out, "toDict") else p_out
114113
geo_dict = g_out.toDict() if hasattr(g_out, "toDict") else g_out

policy_analysis/dspy_policies_and_taxonomy_extraction/policy_extraction/policy_dspy_model_creation.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,11 @@
44
import json
55
import os
66
from dotenv import load_dotenv
7-
import pandas as pd
87
from sentence_transformers import CrossEncoder
98
import torch
109
from datetime import datetime
11-
from dspy.adapters import JSONAdapter
1210

1311

14-
import math
1512
# Load environment variables
1613
load_dotenv()
1714

policy_analysis/dspy_policies_and_taxonomy_extraction/taxonomy_extraction/taxonomy_dspy_model_creation.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,10 @@
11
import dspy
2-
import csv
3-
from enum import Enum
4-
from typing import List, Dict, Any, Union
52
import os
63

74
from dspy.teleprompt import MIPROv2
85

96
import json
107
from dotenv import load_dotenv
11-
import pandas as pd
128
from datetime import datetime
139

1410
from taxonomy_definition.geographical_taxonomy import (

policy_analysis/dspy_policies_and_taxonomy_extraction/taxonomy_extraction/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import ast
12
import re
23
NULL_STRINGS = {
34
"null",

0 commit comments

Comments
 (0)