v1.5.2: 优化JM网页的正则表达式，增加对异常网页的提示处理，修复序号sort问题，优化代码。 (#15)

hect0x7 · web-flow · commit cd3d60ab6620 · 2023-04-17T20:21:00.000+08:00
diff --git a/src/jmcomic/__init__.py b/src/jmcomic/__init__.py
@@ -2,6 +2,6 @@
 # 被依赖方 <--- 使用方
 # config <--- entity <--- toolkit <--- client <--- service <--- option
 
-__version__ = '1.5.1'
+__version__ = '1.5.2'
 
 from .api import *
diff --git a/src/jmcomic/jm_client.py b/src/jmcomic/jm_client.py
@@ -142,8 +142,8 @@ def jm_get(self, url, is_api=True, require_200=True, **kwargs):
                                   f'响应文本过长(len={len(resp.text)})，不打印')
                                  )
 
-        if is_api is True and resp.text.strip() == JmModuleConfig.JM_SERVER_ERROR_HTML:
-            raise AssertionError("【JM异常】Could not connect to mysql! Please check your database settings!")
+        if is_api is True:
+            JmModuleConfig.check_html(resp.text.strip(), url)
 
         return resp
 
@@ -158,7 +158,7 @@ def img_is_not_need_to_decode(cls, data_original: str, _resp):
         return data_original.endswith('.gif')
 
     # noinspection PyAttributeOutsideInit
-    def enable_cache(self):
+    def enable_cache(self, debug=False):
         def wrap_func_cache(func_name, cache_dict_name):
             if hasattr(self, cache_dict_name):
                 return
@@ -168,10 +168,14 @@ def wrap_func_cache(func_name, cache_dict_name):
 
             # 重载本对象的方法
             func = getattr(self, func_name)
+
+            cache_hit_msg = f'【缓存命中】{cache_dict_name} ' + '→ [{}]' if debug is True else None
+            cache_miss_msg = f'【缓存缺失】{cache_dict_name} ' + '← [{}]' if debug is True else None
+
             wrap_func = enable_cache(
                 cache_dict=cache_dict,
-                cache_hit_msg=f'命中 {cache_dict_name} ' + '→ [{}]]',
-                cache_miss_msg=f'缺失 {cache_dict_name} ' + '← [{}]',
+                cache_hit_msg=cache_hit_msg,
+                cache_miss_msg=cache_miss_msg,
             )(func)
 
             setattr(self, func_name, wrap_func)
diff --git a/src/jmcomic/jm_config.py b/src/jmcomic/jm_config.py
@@ -5,9 +5,14 @@ class JmModuleConfig:
     JM_REDIRECT_URL = f'{PROT}jm365.xyz/3YeBdF'  # 永久網域，怕走失的小伙伴收藏起来
     JM_PUB_URL = f'{PROT}jmcomic1.bet'
     JM_CDN_IMAGE_URL_TEMPLATE = PROT + 'cdn-msp.{domain}/media/photos/{photo_id}/{index:05}{suffix}'  # index 从1开始
-    JM_SERVER_ERROR_HTML = "Could not connect to mysql! Please check your database settings!"
     JM_IMAGE_SUFFIX = ['.jpg', '.webp', '.png', '.gif']
 
+    # 访问JM可能会遇到的异常网页
+    JM_ERROR_RESPONSE_HTML = {
+        "Could not connect to mysql! Please check your database settings!": "禁漫服务器内部报错",
+        "Restricted Access!": "禁漫拒绝你所在ip地区的访问，你可以选择: 换域名/换代理",
+    }
+
     # 图片分隔相关
     SCRAMBLE_0 = 220980
     SCRAMBLE_10 = 268850
@@ -73,16 +78,23 @@ def get_jmcomic_url(cls, postman=None):
         """
         if postman is None:
             from common import Postmans
-            postman = Postmans.get_impl_clazz('cffi') \
+            postman = Postmans \
+                .get_impl_clazz('cffi') \
                 .create(headers=cls.headers(cls.JM_REDIRECT_URL))
 
-        domain = postman.with_wrap_resp() \
-            .get(cls.JM_REDIRECT_URL, allow_redirects=False) \
-            .redirect_url
-
-        cls.jm_debug('获取禁漫地址', f'获取成功，最新可用的禁漫地址: {domain}')
+        domain = postman.with_redirect_catching().get(cls.JM_REDIRECT_URL)
+        cls.jm_debug('获取禁漫地址', f'[{cls.JM_REDIRECT_URL}] → [{domain}]')
         return domain
 
+    @classmethod
+    def check_html(cls, html: str, url=None):
+        html = html.strip()
+        error_msg = cls.JM_ERROR_RESPONSE_HTML.get(html, None)
+        if error_msg is None:
+            return
+
+        raise AssertionError(f'{error_msg}' + f': {url}' if url is not None else '')
+
 
 jm_debug = JmModuleConfig.jm_debug
 disable_jm_debug = JmModuleConfig.disable_jm_debug
diff --git a/src/jmcomic/jm_entity.py b/src/jmcomic/jm_entity.py
@@ -239,7 +239,7 @@ def create_photo_detail(self, index) -> Tuple[JmPhotoDetail, Tuple]:
             title=photo_title,
             keywords='',
             series_id=self.album_id,
-            sort=index + 1,
+            sort=episode_info[1] if len(self) != 1 else 1,
             author=self.author,
             from_album=self,
             page_arr=None,
diff --git a/src/jmcomic/jm_toolkit.py b/src/jmcomic/jm_toolkit.py
@@ -29,8 +29,8 @@ class JmcomicText:
 
     # album 作者
     pattern_html_album_author_list = [
-        compile('作者： <span itemprop="author" data-type="author">(\s*<a.*?>(.*?)</a>)*\s*</span>'),
-        compile("<a.*?>(.*?)</a>"),
+        compile('作者： *<span itemprop="author" data-type="author">([\s\S]*?)</span>'),
+        compile("<a[\s\S]*?>(.*?)</a>"),
     ]
 
     @classmethod
@@ -127,7 +127,8 @@ def match_field(field_key: str, pattern: Union[Pattern, List[Pattern]], text):
             field_value = match_field(field_name, pattern_value, html)
 
             if field_value is None:
-                raise AssertionError(f"文本没有匹配上字段：字段名为{field_name}，pattern为cls.{pattern_name}")
+                write_text('./resp.txt', html)  # debug
+                raise AssertionError(f"文本没有匹配上字段：字段名为'{field_name}'，pattern: [{pattern_value.pattern}]")
 
             # 保存字段
             field_dict[field_name] = field_value