Skip to content

Commit c198f8d

Browse files
update download_sheets for multi sheet_id
1 parent ca90c8c commit c198f8d

File tree

3 files changed

+98
-23
lines changed

3 files changed

+98
-23
lines changed

common_grade_export/src/base_class.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22
import sys
33
from io import StringIO
4-
from utils.download_file import download_sheet, get_sheets_service_and_token
4+
from utils.download_file import download_sheets, get_sheets_service_and_token
55

66
logging.basicConfig(
77
level=logging.INFO,
@@ -42,9 +42,9 @@ def get_control_data(self) -> StringIO | None:
4242
"""
4343
Получает данные из управляющей таблицы
4444
"""
45-
content = download_sheet(
45+
content = download_sheets(
4646
table_id=self.table_id,
47-
sheet_id=self.sheet_id,
47+
sheet_ids=[self.sheet_id],
4848
google_cred=self.google_cred,
4949
export_format="csv",
5050
write_to_file=False,

common_grade_export/src/spreadsheet_to_yadisk_duplicator.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pathlib import Path
1111

1212
from base_class import BaseGoogleSpreadsheetDataProcessor
13-
from utils.download_file import download_sheet
13+
from utils.download_file import download_sheets
1414
from utils.yadisk_manager import DiskManager
1515

1616
logging.basicConfig(
@@ -99,16 +99,18 @@ def process_data(
9999
100100
Args: данные строки из таблицы
101101
"""
102-
export_success = download_sheet(
102+
sheet_ids = [s.strip() for s in sheet_id.split(';')]
103+
104+
export_success = download_sheets(
103105
table_id=table_id,
104-
sheet_id=sheet_id,
106+
sheet_ids=sheet_ids,
105107
export_format=export_format,
106108
filename=export_name,
107109
google_cred=self.google_cred,
108110
)
109111

110112
if not export_success:
111-
raise Exception(f"download_sheet error")
113+
raise Exception(f"download_sheets error")
112114

113115
public_link = self.upload_file_to_disk(f"{export_name}.{export_format}")
114116
if not public_link:

common_grade_export/src/utils/download_file.py

Lines changed: 89 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44
import logging
55
from openpyxl import load_workbook
66
import requests
7+
from tempfile import NamedTemporaryFile
78
from pathlib import Path
89
from google.oauth2 import service_account
910
from google.auth.transport.requests import Request
11+
from PyPDF2 import PdfMerger
12+
from openpyxl import load_workbook, Workbook
1013

1114
logger = logging.getLogger(__name__)
1215

@@ -28,20 +31,32 @@ def get_sheets_service_and_token(credentials_file="credentials.json"):
2831
return client, creds.token
2932

3033

31-
def download_sheet(
32-
table_id,
33-
sheet_id="0",
34-
filename="export",
35-
export_format="pdf",
36-
google_cred="credentials.json",
37-
write_to_file=True,
34+
def download_sheets(
35+
table_id: str,
36+
sheet_ids: list[str],
37+
filename: str = "export",
38+
export_format: str = "pdf",
39+
google_cred: str = "credentials.json",
40+
write_to_file: bool = True,
3841
) -> bytes | None:
42+
"""
43+
Скачивает несколько листов и объединяет их в один файл
44+
"""
3945
try:
4046
client, access_token = get_sheets_service_and_token(google_cred)
41-
content = export_file(table_id, sheet_id, access_token, export_format)
42-
43-
if export_format == "xlsx" and content:
44-
content = get_excel_with_values(content)
47+
48+
if len(sheet_ids) == 1:
49+
content = export_file(table_id, sheet_ids[0], access_token, export_format)
50+
if export_format == "xlsx" and content:
51+
content = get_excel_with_values(content)
52+
else:
53+
if export_format == "pdf":
54+
content = merge_multiple_pdfs(table_id, sheet_ids, access_token)
55+
elif export_format == "xlsx":
56+
content = merge_multiple_excels(table_id, sheet_ids, access_token)
57+
else:
58+
logger.warning(f"Формат {export_format} не поддерживает множественные листы, используется первый лист")
59+
content = export_file(table_id, sheet_ids[0], access_token, export_format)
4560

4661
if not content:
4762
logger.error(f"Ошибка экспорта файла")
@@ -57,13 +72,72 @@ def download_sheet(
5772

5873
except Exception as e:
5974
logger.error(f"Ошибка при скачивании: {e}")
75+
return None
76+
77+
78+
def merge_multiple_pdfs(table_id: str, sheet_ids: list[str], access_token: str) -> bytes:
79+
"""
80+
Объединяет несколько PDF-файлов в один PDF-файл
81+
"""
82+
merger = PdfMerger()
83+
temp_files = []
84+
85+
try:
86+
for i, sheet_id in enumerate(sheet_ids):
87+
pdf_content = export_file(table_id, sheet_id, access_token, "pdf")
88+
if pdf_content:
89+
with NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
90+
temp_file.write(pdf_content)
91+
temp_files.append(temp_file.name)
92+
merger.append(temp_file.name)
93+
94+
merged_pdf = BytesIO()
95+
merger.write(merged_pdf)
96+
merger.close()
97+
98+
return merged_pdf.getvalue()
99+
100+
finally:
101+
for temp_file in temp_files:
102+
try:
103+
Path(temp_file).unlink(missing_ok=True)
104+
except:
105+
pass
106+
107+
108+
def merge_multiple_excels(table_id: str, sheet_ids: list[str], access_token: str) -> bytes:
109+
"""
110+
Объединяет несколько листов в один XLSX-файл
111+
"""
112+
merged_workbook = Workbook()
113+
try:
114+
for i, sheet_id in enumerate(sheet_ids):
115+
excel_content = export_file(table_id, sheet_id, access_token, "xlsx")
116+
if excel_content:
117+
temp_wb = load_workbook(BytesIO(excel_content), data_only=True)
118+
119+
for sheet_name in temp_wb.sheetnames:
120+
source_sheet = temp_wb[sheet_name]
121+
new_sheet = merged_workbook.create_sheet(title=f"{sheet_name}")
122+
123+
for row in source_sheet.iter_rows():
124+
for cell in row:
125+
new_sheet[cell.coordinate].value = cell.value
126+
127+
output = BytesIO()
128+
merged_workbook.save(output)
129+
output.seek(0)
130+
131+
return output.getvalue()
132+
133+
finally:
134+
merged_workbook.close()
60135

61136

62137
def get_excel_with_values(content: bytes) -> bytes:
63138
"""
64139
Сохраняет значения (не формулы) листа таблицы в XLSX-файл
65140
"""
66-
67141
wb = load_workbook(BytesIO(content), data_only=True)
68142

69143
file_stream = BytesIO()
@@ -89,12 +163,11 @@ def export_file(
89163
logger.error(f"export_file: Ошибка {response.status_code}: {response.text}")
90164
return None
91165

92-
93166
def parse_args():
94167
parser = argparse.ArgumentParser(description="Download Google Sheets")
95168
parser.add_argument("--table_id", required=True, help="Google Sheets table ID")
96169
parser.add_argument(
97-
"--sheet_id", required=True, default="0", type=str, help="Sheet ID (default: 0)"
170+
"--sheet_ids", required=True, default="0", type=lambda x: x.split(";"), help="Sheet IDs separated by ; (default: 0)"
98171
)
99172
parser.add_argument(
100173
"--format", choices=["csv", "pdf", "xlsx"], default="csv", help="Output format"
@@ -110,8 +183,8 @@ def parse_args():
110183
def main():
111184
args = parse_args()
112185

113-
download_sheet(
114-
args.table_id, args.sheet_id, args.filename, args.format, args.google_cred
186+
download_sheets(
187+
args.table_id, args.sheet_ids, args.filename, args.format, args.google_cred
115188
)
116189

117190

0 commit comments

Comments
 (0)