-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfindText.py
More file actions
48 lines (44 loc) · 1.28 KB
/
findText.py
File metadata and controls
48 lines (44 loc) · 1.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from analyseText import *
from setup import *
from DB import *
def get_random_work():
work = False
while not work:
mydb = SQL.connect(
host="localhost",
user="root",
password="admin",
database="corpus"
)
mycursor = mydb.cursor()
print("\nGetting random text from database.")
id = random.randint(52, 884)
textQuery = "SELECT title, textString, authorName FROM texts WHERE textID = %s"
vals = (id,)
mycursor.execute(textQuery, vals)
work = mycursor.fetchall()
title, text, author = work[0]
print(f"Collected {title} by {author}")
return title, text, author
def get_random_passage(docobj, passageLength):
# TODO remove numbers here?
if len(docobj["tokens"]) < passageLength:
print("Work too short.")
return False
else:
print("Getting random passage from text.")
sentences = docobj['sentences']
if len(sentences) < 2:
print("Not enough sentences.")
return False
passage = ""
while len(passage.split()) < passageLength:
start = random.randint(0, len(sentences)-1)
for s in sentences[start:]:
wordcount = len([w for w in passage.split() if w != "."])
if wordcount + len(s.split()) < passageLength * 1.1:
passage = passage + s + " "
else:
break
print(f"Returning passage of {len(passage.split())} words.")
return passage