Skip to content

Commit ba64c8f

Browse files
authored
Merge pull request #784 from NanmiCoder/feature/excel-export-and-tests
feat: excel store with other platform
2 parents ebbf86d + 6e858c1 commit ba64c8f

File tree

20 files changed

+478
-107
lines changed

20 files changed

+478
-107
lines changed

.DS_Store

0 Bytes
Binary file not shown.

README.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -228,14 +228,15 @@ python main.py --help
228228

229229
### 使用示例:
230230
```shell
231-
# 使用 Excel 存储数据(推荐用于数据分析)✨ 新功能
232-
uv run main.py --platform xhs --lt qrcode --type search --save_data_option excel
233-
234-
# 初始化 SQLite 数据库(使用'--init_db'时不需要携带其他optional)
231+
# 初始化 SQLite 数据库
235232
uv run main.py --init_db sqlite
236-
# 使用 SQLite 存储数据(推荐个人用户使用)
233+
# 使用 SQLite 存储数据
237234
uv run main.py --platform xhs --lt qrcode --type search --save_data_option sqlite
235+
236+
# 使用 Excel 存储数据(推荐用于数据分析)
237+
uv run main.py --platform xhs --lt qrcode --type search --save_data_option excel
238238
```
239+
239240
```shell
240241
# 初始化 MySQL 数据库
241242
uv run main.py --init_db mysql

cmd_arg/arg.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ class SaveDataOptionEnum(str, Enum):
7171
DB = "db"
7272
JSON = "json"
7373
SQLITE = "sqlite"
74+
MONGODB = "mongodb"
75+
EXCEL = "excel"
7476

7577

7678
class InitDbOptionEnum(str, Enum):
@@ -199,7 +201,7 @@ def main(
199201
SaveDataOptionEnum,
200202
typer.Option(
201203
"--save_data_option",
202-
help="数据保存方式 (csv=CSV文件 | db=MySQL数据库 | json=JSON文件 | sqlite=SQLite数据库)",
204+
help="数据保存方式 (csv=CSV文件 | db=MySQL数据库 | json=JSON文件 | sqlite=SQLite数据库 | mongodb=MongoDB数据库 | excel=Excel文件)",
203205
rich_help_panel="存储配置",
204206
),
205207
] = _coerce_enum(

main.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,14 +87,11 @@ async def main():
8787
# Flush Excel data if using Excel export
8888
if config.SAVE_DATA_OPTION == "excel":
8989
try:
90-
# Get the store instance and flush data
91-
from store.xhs import XhsStoreFactory
92-
store = XhsStoreFactory.create_store()
93-
if hasattr(store, 'flush'):
94-
store.flush()
95-
print(f"[Main] Excel file saved successfully")
90+
from store.excel_store_base import ExcelStoreBase
91+
ExcelStoreBase.flush_all()
92+
print("[Main] Excel files saved successfully")
9693
except Exception as e:
97-
print(f"Error flushing Excel data: {e}")
94+
print(f"[Main] Error flushing Excel data: {e}")
9895

9996
# Generate wordcloud after crawling is complete
10097
# Only for JSON save mode

store/bilibili/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,14 @@ class BiliStoreFactory:
3838
"json": BiliJsonStoreImplement,
3939
"sqlite": BiliSqliteStoreImplement,
4040
"mongodb": BiliMongoStoreImplement,
41+
"excel": BiliExcelStoreImplement,
4142
}
4243

4344
@staticmethod
4445
def create_store() -> AbstractStore:
4546
store_class = BiliStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
4647
if not store_class:
47-
raise ValueError("[BiliStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
48+
raise ValueError("[BiliStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
4849
return store_class()
4950

5051

store/bilibili/_store_impl.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,3 +365,14 @@ async def store_creator(self, creator_item: Dict):
365365
data=creator_item
366366
)
367367
utils.logger.info(f"[BiliMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
368+
369+
370+
class BiliExcelStoreImplement:
371+
"""B站Excel存储实现 - 全局单例"""
372+
373+
def __new__(cls, *args, **kwargs):
374+
from store.excel_store_base import ExcelStoreBase
375+
return ExcelStoreBase.get_instance(
376+
platform="bilibili",
377+
crawler_type=crawler_type_var.get()
378+
)

store/douyin/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,14 @@ class DouyinStoreFactory:
3737
"json": DouyinJsonStoreImplement,
3838
"sqlite": DouyinSqliteStoreImplement,
3939
"mongodb": DouyinMongoStoreImplement,
40+
"excel": DouyinExcelStoreImplement,
4041
}
4142

4243
@staticmethod
4344
def create_store() -> AbstractStore:
4445
store_class = DouyinStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
4546
if not store_class:
46-
raise ValueError("[DouyinStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
47+
raise ValueError("[DouyinStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
4748
return store_class()
4849

4950

store/douyin/_store_impl.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,3 +264,14 @@ async def store_creator(self, creator_item: Dict):
264264
data=creator_item
265265
)
266266
utils.logger.info(f"[DouyinMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
267+
268+
269+
class DouyinExcelStoreImplement:
270+
"""抖音Excel存储实现 - 全局单例"""
271+
272+
def __new__(cls, *args, **kwargs):
273+
from store.excel_store_base import ExcelStoreBase
274+
return ExcelStoreBase.get_instance(
275+
platform="douyin",
276+
crawler_type=crawler_type_var.get()
277+
)

0 commit comments

Comments
 (0)