-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalyze_pdf_deep.py
More file actions
159 lines (130 loc) · 5.55 KB
/
analyze_pdf_deep.py
File metadata and controls
159 lines (130 loc) · 5.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/usr/bin/env python3
"""
Analyseur PDF Approfondi pour Scénarios D&D
Extrait et affiche le contenu détaillé pour enrichissement manuel
"""
from pathlib import Path
from src.utils.pdf_reader import PDFScenarioReader
import sys
class DetailedPDFAnalyzer:
"""Analyse approfondie d'un PDF de scénario"""
def __init__(self, pdf_path: Path):
self.pdf_path = pdf_path
self.scenario_name = pdf_path.stem
def analyze(self):
"""Analyser le PDF en profondeur et afficher les résultats"""
print("=" * 80)
print(f"📖 ANALYSE APPROFONDIE: {self.scenario_name}")
print("=" * 80)
with PDFScenarioReader(self.pdf_path) as reader:
# 1. Texte complet
full_text = reader.get_full_text()
print(f"\n📄 TEXTE COMPLET ({len(full_text)} caractères)")
print("-" * 80)
# 2. Sections détaillées
sections = reader.extract_sections()
print(f"\n📚 SECTIONS ({len(sections)} trouvées)")
print("-" * 80)
for i, (section_name, content) in enumerate(sections.items(), 1):
print(f"\n{i}. {section_name.upper()}")
print(" " + "=" * 75)
# Afficher les premiers 1000 caractères de chaque section
preview = content[:1000]
if len(content) > 1000:
preview += "\n [...suite...]"
print(" " + preview.replace("\n", "\n "))
print()
# 3. NPCs
npcs = reader.extract_npcs()
print(f"\n👥 NPCs ({len(npcs)} trouvés)")
print("-" * 80)
for npc in npcs[:15]: # Limiter à 15
print(f"- {npc['name']}: {npc['description'][:100]}")
# 4. Lieux
locations = reader.extract_locations()
print(f"\n🗺️ LIEUX ({len(locations)} trouvés)")
print("-" * 80)
for loc in locations[:20]:
print(f"- {loc}")
# 5. Rencontres
encounters = reader.extract_encounters()
print(f"\n⚔️ RENCONTRES ({len(encounters)} trouvées)")
print("-" * 80)
for enc in encounters[:10]:
print(f"- {enc['count']} {enc['creature']}")
print(f" Contexte: {enc['context'][:100]}...")
# 6. Extraits clés
print(f"\n🔍 EXTRAITS CLÉS")
print("-" * 80)
# Rechercher des mots-clés importants
keywords = [
"mission", "objectif", "quête",
"combat", "rencontre", "ennemi",
"trésor", "récompense",
"PNJ", "personnage",
"lieu", "endroit", "zone"
]
for keyword in keywords:
if keyword.lower() in full_text.lower():
# Trouver contexte autour du mot-clé
idx = full_text.lower().find(keyword.lower())
if idx != -1:
start = max(0, idx - 100)
end = min(len(full_text), idx + 200)
context = full_text[start:end]
print(f"\n[{keyword.upper()}]:")
print(f"...{context}...")
# 7. Sauvegarder l'analyse complète
output_file = Path("analysis") / f"{self.scenario_name}_analysis.txt"
output_file.parent.mkdir(exist_ok=True)
with open(output_file, 'w', encoding='utf-8') as f:
f.write(f"ANALYSE COMPLÈTE: {self.scenario_name}\n")
f.write("=" * 80 + "\n\n")
f.write("TEXTE COMPLET:\n")
f.write("-" * 80 + "\n")
f.write(full_text)
f.write("\n\n")
f.write("SECTIONS:\n")
f.write("-" * 80 + "\n")
for name, content in sections.items():
f.write(f"\n### {name.upper()} ###\n")
f.write(content)
f.write("\n\n")
print(f"\n\n✅ Analyse complète sauvegardée: {output_file}")
print(f"📊 Taille du fichier: {output_file.stat().st_size} octets")
return {
'full_text': full_text,
'sections': sections,
'npcs': npcs,
'locations': locations,
'encounters': encounters,
'output_file': output_file
}
def main():
if len(sys.argv) < 2:
print("Usage: python analyze_pdf_deep.py <nom_du_pdf>")
print("\nExemples:")
print(" python analyze_pdf_deep.py Cryptes-de-Kelemvor")
print(" python analyze_pdf_deep.py Fort-Roanoke")
print(" python analyze_pdf_deep.py Harceles-a-Monteloy")
sys.exit(1)
pdf_name = sys.argv[1]
if not pdf_name.endswith('.pdf'):
pdf_name += '.pdf'
pdf_path = Path("scenarios") / pdf_name
if not pdf_path.exists():
print(f"❌ PDF non trouvé: {pdf_path}")
print("\nPDFs disponibles:")
for p in sorted(Path("scenarios").glob("*.pdf")):
if "Liste" not in p.name and "Tales" not in p.name:
print(f" - {p.stem}")
sys.exit(1)
analyzer = DetailedPDFAnalyzer(pdf_path)
result = analyzer.analyze()
print("\n" + "=" * 80)
print("✅ ANALYSE TERMINÉE")
print("=" * 80)
print(f"\nConsultez le fichier: {result['output_file']}")
print("\nVous pouvez maintenant utiliser ce contenu pour enrichir manuellement le scénario!")
if __name__ == "__main__":
main()