Skip to content

Commit c456efe

Browse files
committed
Added option for full_text export
1 parent 8f8d64e commit c456efe

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

sde_collections/management/commands/export_urls_to_csv.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ def add_arguments(self, parser):
3636
parser.add_argument(
3737
"--batch-size", type=int, default=1000, help="Number of records to process in each batch (default: 1000)"
3838
)
39+
parser.add_argument(
40+
"--full_text", action="store_true", default=False, help="Include full text in export (default: False)"
41+
)
3942

4043
def handle(self, *args, **options):
4144
model_name = options["model"]
@@ -77,13 +80,19 @@ def handle(self, *args, **options):
7780
base_fields = [
7881
"url",
7982
"scraped_title",
80-
"scraped_text",
8183
"generated_title",
8284
"visited",
8385
"document_type",
8486
"division",
8587
]
8688

89+
# Add scraped_text only if full_text is True
90+
if options["full_text"]:
91+
base_fields.append("scraped_text")
92+
self.stdout.write("Including full text content in export")
93+
else:
94+
self.stdout.write("Excluding full text content from export (use --full_text to include)")
95+
8796
# Add paired field tags separately
8897
tag_fields = ["tdamm_tag_manual", "tdamm_tag_ml"]
8998

0 commit comments

Comments
 (0)