-
Notifications
You must be signed in to change notification settings - Fork 685
Expand file tree
/
Copy pathchapter_directory_parser.py
More file actions
132 lines (113 loc) · 4.47 KB
/
chapter_directory_parser.py
File metadata and controls
132 lines (113 loc) · 4.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# chapter_blueprint_parser.py
# -*- coding: utf-8 -*-
import re
def parse_chapter_blueprint(blueprint_text: str):
"""
解析整份章节蓝图文本,返回一个列表,每个元素是一个 dict:
{
"chapter_number": int,
"chapter_title": str,
"chapter_role": str, # 本章定位
"chapter_purpose": str, # 核心作用
"suspense_level": str, # 悬念密度
"foreshadowing": str, # 伏笔操作
"plot_twist_level": str, # 认知颠覆
"chapter_summary": str # 本章简述
}
"""
# 先按空行进行分块,以免多章之间混淆
chunks = re.split(r'\n\s*\n', blueprint_text.strip())
results = []
# 兼容是否使用方括号包裹章节标题
# 例如:
# 第1章 - 紫极光下的预兆
# 或
# 第1章 - [紫极光下的预兆]
chapter_number_pattern = re.compile(r'^第\s*(\d+)\s*章\s*-\s*\[?(.*?)\]?$')
role_pattern = re.compile(r'^本章定位:\s*\[?(.*)\]?$')
purpose_pattern = re.compile(r'^核心作用:\s*\[?(.*)\]?$')
suspense_pattern = re.compile(r'^悬念密度:\s*\[?(.*)\]?$')
foreshadow_pattern = re.compile(r'^伏笔操作:\s*\[?(.*)\]?$')
twist_pattern = re.compile(r'^认知颠覆:\s*\[?(.*)\]?$')
summary_pattern = re.compile(r'^本章简述:\s*\[?(.*)\]?$')
for chunk in chunks:
lines = chunk.strip().splitlines()
if not lines:
continue
chapter_number = None
chapter_title = ""
chapter_role = ""
chapter_purpose = ""
suspense_level = ""
foreshadowing = ""
plot_twist_level = ""
chapter_summary = ""
# 先匹配第一行(或前几行),找到章号和标题
header_match = chapter_number_pattern.match(lines[0].strip())
if not header_match:
# 不符合“第X章 - 标题”的格式,跳过
continue
chapter_number = int(header_match.group(1))
chapter_title = header_match.group(2).strip()
# 从后面的行匹配其他字段
for line in lines[1:]:
line_stripped = line.strip()
if not line_stripped:
continue
m_role = role_pattern.match(line_stripped)
if m_role:
chapter_role = m_role.group(1).strip()
continue
m_purpose = purpose_pattern.match(line_stripped)
if m_purpose:
chapter_purpose = m_purpose.group(1).strip()
continue
m_suspense = suspense_pattern.match(line_stripped)
if m_suspense:
suspense_level = m_suspense.group(1).strip()
continue
m_foreshadow = foreshadow_pattern.match(line_stripped)
if m_foreshadow:
foreshadowing = m_foreshadow.group(1).strip()
continue
m_twist = twist_pattern.match(line_stripped)
if m_twist:
plot_twist_level = m_twist.group(1).strip()
continue
m_summary = summary_pattern.match(line_stripped)
if m_summary:
chapter_summary = m_summary.group(1).strip()
continue
results.append({
"chapter_number": chapter_number,
"chapter_title": chapter_title,
"chapter_role": chapter_role,
"chapter_purpose": chapter_purpose,
"suspense_level": suspense_level,
"foreshadowing": foreshadowing,
"plot_twist_level": plot_twist_level,
"chapter_summary": chapter_summary
})
# 按照 chapter_number 排序后返回
results.sort(key=lambda x: x["chapter_number"])
return results
def get_chapter_info_from_blueprint(blueprint_text: str, target_chapter_number: int):
"""
在已经加载好的章节蓝图文本中,找到对应章号的结构化信息,返回一个 dict。
若找不到则返回一个默认的结构。
"""
all_chapters = parse_chapter_blueprint(blueprint_text)
for ch in all_chapters:
if ch["chapter_number"] == target_chapter_number:
return ch
# 默认返回
return {
"chapter_number": target_chapter_number,
"chapter_title": f"第{target_chapter_number}章",
"chapter_role": "",
"chapter_purpose": "",
"suspense_level": "",
"foreshadowing": "",
"plot_twist_level": "",
"chapter_summary": ""
}