Coder-Sakura · jiut · Aug 13, 2025 · Aug 13, 2025 · Oct 12, 2025
diff --git a/v2.0/bookmark.py b/v2.0/bookmark.py
@@ -7,15 +7,17 @@
 # import math
 import json
 import time
+import random
 
-from config import BOOKMARK_HIDE_ENABLE,SKIP_ISEXISTS_ILLUST,BOOKMARK_PATH
+from config import BOOKMARK_HIDE_ENABLE,SKIP_ISEXISTS_ILLUST,SKIP_DB_EXISTS_ILLUST,BOOKMARK_PATH
 from downer import Downloader
 from log_record import logger
 from message import TEMP_MSG
 from thread_pool import ThreadPool,callback
 from ptimer import Timer
 # TODO
 from tag import TAG_FLAG_BOOKMARK
+from checker import run_startup_check
 
 
 class Bookmark(object):
@@ -136,6 +138,16 @@ def thread_by_illust(self, *args):
 		pid = args[0]
 		info = None
 
+		# 基于数据库的提前跳过，避免网络与文件系统检测
+		if hasattr(self.db, "pool") and SKIP_DB_EXISTS_ILLUST:
+			try:
+				exists, _ = self.db.check_illust(pid, table="bookmark")
+				if exists:
+					logger.info(f"SKIP_DB_EXISTS_ILLUST - {pid}")
+					return info
+			except Exception:
+				pass
+
 		# 跳过已下载插画的请求
 		if SKIP_ISEXISTS_ILLUST and self.file_manager.search_isExistsPid(
 			BOOKMARK_PATH,"b",*(pid,)):
@@ -202,6 +214,12 @@ def thread_by_illust(self, *args):
 
 	@logger.catch
 	def run(self):
+		# 启动一致性检查（最近 20 条）
+		try:
+			run_startup_check(self.Downloader)
+		except Exception as e:
+			logger.warning(f"启动一致性检查失败: {e}")
+
 		# TDOD TAG COUNT开始工作
 		TAG_FLAG_BOOKMARK = False
 		logger.info(TEMP_MSG["BEGIN_INFO"].format(self.class_name))
@@ -249,8 +267,7 @@ def run(self):
 						pool.put(self.thread_by_illust,(pid,),callback)
 
 					offset += self.bookmark_page_offset
-					# 固定休眠
-					time.sleep(1)
+					# 取消固定/随机等待，统一交由 Downloader 自适应限速（仅对线上接口生效）
 		except Exception as e:
 			logger.warning("Exception {}".format(e))
 		finally:

diff --git a/v2.0/checker.py b/v2.0/checker.py
@@ -0,0 +1,149 @@
+# coding=utf8
+"""
+启动一致性检查：
+- 对 pixiv / bookmark 表各取最近 20 条记录，核对本地文件夹图片数量与 pageCount 是否一致；
+- 使用 IsValidImage 校验图片完整性；
+- 若不一致或存在损坏图片，则触发自动重下；重下后再次校验，仍失败则告警。
+"""
+
+import os
+from typing import List, Tuple, Optional
+
+from downer import Downloader
+from folder import file_manager
+from image_check import IsValidImage
+from log_record import logger
+
+TABLES = ["pixiv", "bookmark"]
+LATEST_LIMIT = 20
+
+
+def list_local_images(dir_path: str, pid: int) -> List[str]:
+    if not dir_path or not os.path.isdir(dir_path):
+        return []
+    files = []
+    try:
+        for name in os.listdir(dir_path):
+            if not name.lower().startswith(str(pid)):
+                continue
+            # 常见图片扩展
+            lower = name.lower()
+            if lower.endswith((".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp")):
+                files.append(os.path.join(dir_path, name))
+    except Exception:
+        return []
+    return files
+
+
+def verify_folder(pid: int, path: str, page_count: int, illust_type: int) -> Tuple[bool, List[str]]:
+    """返回 (是否通过, 需重下的文件列表)。
+    - ugoira(type=2)：zip 或 gif 任一存在且 gif 可读视为通过
+    - 单图/多图：图片数量需达到目标且每张校验通过
+    """
+    if not path or not os.path.isdir(path):
+        return False, []
+
+    # 动图：gif 或 zip 任一存在即可（gif 优先验证完整性）
+    if illust_type == 2:
+        gif = os.path.join(path, f"{pid}.gif")
+        zipf = os.path.join(path, f"{pid}.zip")
+        if os.path.exists(gif):
+            ok = IsValidImage(gif)
+            return (ok, [] if ok else [gif])
+        if os.path.exists(zipf):
+            return True, []
+        return False, []
+
+    images = list_local_images(path, pid)
+    if page_count <= 1:
+        if not images:
+            return False, []
+        # 任取第一张作为单图
+        img = images[0]
+        ok = IsValidImage(img)
+        return (ok, [] if ok else [img])
+
+    # 多图：至少 page_count 张，且全部有效
+    if len(images) < page_count:
+        return False, images
+    invalid = [p for p in images if not IsValidImage(p)]
+    return (len(invalid) == 0, invalid)
+
+
+def redownload_one(d: Downloader, table: str, rec: dict) -> bool:
+    """按表与记录重新下载该 PID，下载完成后再次校验。返回最终是否通过。"""
+    pid = int(rec.get("pid"))
+    illust_type = int(rec.get("illustType", 0))
+    page_count = int(rec.get("pageCount", 1))
+
+    # 推导保存目录（不写 DB）
+    save_path = rec.get("path")
+    if not save_path or save_path == "None":
+        if table == "bookmark":
+            user_dir = file_manager.bk_path
+        else:
+            uid = int(rec.get("uid", 0))
+            user_name = rec.get("userName", "")
+            user_dir = file_manager.select_user_path(uid, user_name)
+        save_path = file_manager.mkdir_illusts(user_dir, pid)
+
+    # 走 Downloader 获取信息与下载
+    info = d.get_illust_info(pid, extra=("bookmark" if table == "bookmark" else "pixiv"))
+    if not info or isinstance(info, str):
+        logger.warning(f"[checker] 获取作品信息失败或不可访问: {pid} -> {info}")
+        return False
+
+    # 再次校验
+    ok, invalid = verify_folder(pid, save_path, page_count, illust_type)
+    if not ok:
+        logger.warning(f"[checker] 重新下载后仍不一致/损坏: {pid} | invalid={len(invalid)}")
+    return ok
+
+
+def run_startup_check(d: Optional[Downloader] = None):
+    d = d or Downloader()
+    for table in TABLES:
+        logger.info(f"[checker] 启动一致性检查 | 表: {table} | 最近 N: {LATEST_LIMIT}")
+        try:
+            latest = d.db.select_latest_records(table=table, limit=LATEST_LIMIT)
+        except Exception:
+            latest = []
+        if not latest:
+            logger.info(f"[checker] 表 {table} 无记录可检")
+            continue
+
+        total = 0
+        ok_cnt = 0
+        fix_cnt = 0
+        fail_cnt = 0
+
+        for rec in latest:
+            pid = int(rec.get("pid"))
+            path = rec.get("path")
+            page_count = int(rec.get("pageCount", 1))
+            illust_type = int(rec.get("illustType", 0))
+            total += 1
+
+            ok, invalid = verify_folder(pid, path, page_count, illust_type)
+            if ok:
+                ok_cnt += 1
+                continue
+
+            logger.warning(f"[checker] 不一致/疑似损坏，准备重下: {table} pid={pid} path={path}")
+            ok2 = redownload_one(d, table, rec)
+            if not ok2:
+                fail_cnt += 1
+                logger.warning(f"[checker] 重下失败：{table} pid={pid}")
+            else:
+                fix_cnt += 1
+                logger.success(f"[checker] 重下完成 | 表:{table} PID:{pid} 再次校验通过")
+
+        logger.info(
+            f"[checker] 完成 | 表:{table} 总数:{total} 通过:{ok_cnt} 修复:{fix_cnt} 失败:{fail_cnt}"
+        )
+
+
+if __name__ == "__main__":
+    run_startup_check()
+
+
diff --git a/v2.0/config.py b/v2.0/config.py
@@ -55,6 +55,11 @@
 # 2.同时也无法更新对应pid在数据库中的记录
 SKIP_ISEXISTS_ILLUST = True
 
+# 基于数据库提前跳过：
+# 1. 若数据库中已存在该 pid 的记录，则不再请求作品信息（不触碰网络/文件系统）
+# 2. 适用于你信任数据库记录已经代表“已下载/已处理”的场景
+SKIP_DB_EXISTS_ILLUST = True
+
 # 关注画师爬虫控制开关
 PIXIV_CRAWLER_ENABLED = False
 
@@ -117,4 +122,14 @@
 # ===============DEBUG===============
 # TODO
 DEBUG = False
-# ===============DEBUG===============
+# ===============DEBUG===============
+
+
+# =============== 自适应限速（仅信息接口） ===============
+# 启用后，仅针对 /ajax/ 或 /touch/ajax/ 的信息请求进行自适应轻量等待；
+# 下载文件不受影响。未命中限流时延迟会快速衰减为 0。
+ADAPTIVE_LIMIT_ENABLED = True
+ADAPTIVE_DELAY_MAX = 1.0           # 最大延迟（秒）
+ADAPTIVE_DELAY_INCREASE = 0.2      # 命中限流后的递增步长（秒）
+ADAPTIVE_DELAY_DECAY_RATIO = 0.5   # 成功请求后的衰减系数（0-1）
+# =======================================================
diff --git a/v2.0/crawler.py b/v2.0/crawler.py
@@ -6,15 +6,17 @@
 """
 import json
 import time
+import random
 import re
 
-from config import SKIP_ISEXISTS_ILLUST,ROOT_PATH
+from config import SKIP_ISEXISTS_ILLUST, SKIP_DB_EXISTS_ILLUST, ROOT_PATH
 from downer import Downloader
 from log_record import logger
 from message import TEMP_MSG
 from thread_pool import ThreadPool,callback
 from tag import TAG_FLAG_USER
 from ptimer import Timer
+from checker import run_startup_check
 
 
 class Crawler(object):
@@ -177,6 +179,16 @@ def thread_by_illust(self, *args):
 		uid = args[1]
 		info = None
 
+		# 基于数据库的提前跳过，避免网络与文件系统检测
+		if hasattr(self.db, "pool") and SKIP_DB_EXISTS_ILLUST:
+			try:
+				exists, _ = self.db.check_illust(pid)
+				if exists:
+					logger.info(f"SKIP_DB_EXISTS_ILLUST - {uid} - {pid}")
+					return info
+			except Exception:
+				pass
+
 		# 跳过已下载插画的请求
 		if SKIP_ISEXISTS_ILLUST and self.file_manager.search_isExistsPid(
 			ROOT_PATH,"c",*(uid,pid,)):
@@ -243,6 +255,12 @@ def thread_by_illust(self, *args):
 
 	@logger.catch
 	def run(self):
+		# 启动一致性检查（最近 20 条）
+		try:
+			run_startup_check(self.Downloader)
+		except Exception as e:
+			logger.warning(f"启动一致性检查失败: {e}")
+
 		# 开始工作
 		TAG_FLAG_USER = False
 		logger.info(TEMP_MSG["BEGIN_INFO"].format(self.class_name))
@@ -288,8 +306,7 @@ def run(self):
 					for pid in all_illust:
 						pool.put(self.thread_by_illust,(pid,u["uid"],),callback)
 
-					# 固定休眠
-					time.sleep(5)
+					# 取消固定/随机等待，统一交由 Downloader 自适应限速（仅对线上接口生效）
 				# 无作品更新
 				else:
 					logger.info(TEMP_MSG["NOW_USER_INFO"].format(self.class_name,position,u["userName"],u["uid"],len(all_illust)))

diff --git a/v2.0/db.py b/v2.0/db.py
@@ -289,6 +289,23 @@ def select_illust(self, pid, table="pixiv"):
 			cur.close()
 			conn.close()
 
+	def select_latest_records(self, table="pixiv", limit=20):
+		"""
+		获取指定表按自增 id 倒序的最近 N 条记录，包含 pid/path/pageCount/illustType/uid/userName。
+		"""
+		conn,cur = self.get_conn()
+		sql = f"""SELECT pid,path,pageCount,illustType,uid,userName FROM {table} ORDER BY id DESC LIMIT %s"""
+		try:
+			cur.execute(sql, (int(limit),))
+			r = cur.fetchall()
+			return r or []
+		except Exception as e:
+			logger.warning(f"<Exception> - {e}")
+			return []
+		finally:
+			cur.close()
+			conn.close()
+
 	def select_user(self, start_id, limit=100, table="pxusers"):
 		"""
 		获取关注列表用户的数据库信息