7
7
8
8
9
9
def summary (text ):
10
-
11
10
# Load the summarization pipeline
12
11
summarizer = pipeline ("summarization" )
13
-
14
12
# Split the text into smaller chunks
15
13
max_tokens_per_chunk = 1024 # Initial value
16
14
max_words_in_summary = 2000000
17
-
18
15
# Calculate the maximum number of chunks needed
19
16
max_num_chunks = (max_words_in_summary // max_tokens_per_chunk ) + 1
20
-
21
17
# Split the text into chunks
22
18
chunks = [text [i :i + max_tokens_per_chunk ] for i in range (0 , len (text ), max_tokens_per_chunk )]
23
19
# for the exceptions
24
20
exceptions = "NULL"
25
-
26
21
# Generate summaries for each chunk
27
22
summaries = []
28
23
len_chunk = len (chunks )
29
24
print ("Note have been divided into chunks:" + str (len_chunk ))
30
25
for i , chunk in enumerate (chunks ):
31
26
# Reduce the chunk size dynamically if it exceeds the maximum sequence length
32
27
while len (chunk ) > max_tokens_per_chunk :
33
- max_tokens_per_chunk -= 50
34
-
28
+ max_tokens_per_chunk -= 50
35
29
try :
36
30
summary = summarizer (chunk , max_length = 200 , min_length = 100 , do_sample = False )
37
31
summaries .append (summary [0 ]['summary_text' ]+ "\n \n " )
@@ -41,10 +35,8 @@ def summary(text):
41
35
except Exception as e :
42
36
print (f"An error occurred while summarizing chunk { i } : { str (e )} " )
43
37
exceptions = "\n " .join (f"An error occurred while summarizing chunk { i } : { str (e )} " )
44
-
45
38
# Combine the summaries into a single summary
46
39
combined_summary = " " .join (summaries )
47
-
48
40
# Print and return the combined summary
49
41
print ("Combined Summary:" )
50
42
print (combined_summary )
@@ -55,7 +47,6 @@ def summary(text):
55
47
56
48
57
49
async def gen_summary (file ):
58
-
59
50
try :
60
51
with open ("dat.txt" , "wb" ) as buffer : # saving file
61
52
shutil .copyfileobj (file .file , buffer )
@@ -75,7 +66,6 @@ async def gen_summary(file):
75
66
@router_summariser .post ("/get-summary" )
76
67
async def get_summary (file : UploadFile = File (...)):
77
68
data = await gen_summary (file )
78
-
79
69
return data
80
70
81
71
0 commit comments