5
5
import os
6
6
import asyncio
7
7
8
+ progress = None # just for tracking progress
8
9
9
10
def summary (text ):
10
-
11
11
# Load the summarization pipeline
12
12
summarizer = pipeline ("summarization" )
13
-
14
13
# Split the text into smaller chunks
15
14
max_tokens_per_chunk = 1024 # Initial value
16
15
max_words_in_summary = 2000000
17
-
18
16
# Calculate the maximum number of chunks needed
19
17
max_num_chunks = (max_words_in_summary // max_tokens_per_chunk ) + 1
20
-
21
18
# Split the text into chunks
22
19
chunks = [text [i :i + max_tokens_per_chunk ] for i in range (0 , len (text ), max_tokens_per_chunk )]
23
20
# for the exceptions
24
21
exceptions = "NULL"
25
-
22
+ global progress
23
+ progress = 0
26
24
# Generate summaries for each chunk
27
25
summaries = []
28
26
len_chunk = len (chunks )
29
27
print ("Note have been divided into chunks:" + str (len_chunk ))
30
28
for i , chunk in enumerate (chunks ):
31
29
# Reduce the chunk size dynamically if it exceeds the maximum sequence length
32
30
while len (chunk ) > max_tokens_per_chunk :
33
- max_tokens_per_chunk -= 50
34
-
31
+ max_tokens_per_chunk -= 50
35
32
try :
36
33
summary = summarizer (chunk , max_length = 200 , min_length = 100 , do_sample = False )
37
34
summaries .append (summary [0 ]['summary_text' ]+ "\n \n " )
38
35
print (summary [0 ]['summary_text' ])
39
36
print ("\n \n STATUS:" + str (i + 1 )+ "/" + str (len_chunk ))
40
- print ("\n \n COMPLETED:" + str ((i + 1 )/ len_chunk * 100 )+ "%" )
37
+ progress = (i + 1 )/ len_chunk * 100
38
+ print ("\n \n COMPLETED:" + str (progress )+ "%" )
41
39
except Exception as e :
42
40
print (f"An error occurred while summarizing chunk { i } : { str (e )} " )
43
41
exceptions = "\n " .join (f"An error occurred while summarizing chunk { i } : { str (e )} " )
44
-
45
42
# Combine the summaries into a single summary
46
43
combined_summary = " " .join (summaries )
47
-
48
44
# Print and return the combined summary
49
45
print ("Combined Summary:" )
50
46
print (combined_summary )
@@ -55,7 +51,6 @@ def summary(text):
55
51
56
52
57
53
async def gen_summary (file ):
58
-
59
54
try :
60
55
with open ("dat.txt" , "wb" ) as buffer : # saving file
61
56
shutil .copyfileobj (file .file , buffer )
@@ -75,8 +70,16 @@ async def gen_summary(file):
75
70
@router_summariser .post ("/get-summary" )
76
71
async def get_summary (file : UploadFile = File (...)):
77
72
data = await gen_summary (file )
78
-
79
73
return data
80
74
81
-
82
-
75
+ @router_summariser .get ("/summary-gen-progress" ) # route to track progress of summarization
76
+ def get_summary_progress ():
77
+ global progress
78
+ if progress is None :
79
+ return {"status" : "No summarisation process in progress" }
80
+ elif progress == 100 :
81
+ return {"status" : "Completed" , "value" : progress }
82
+ elif progress in range (0 ,101 ) :
83
+ return {"status" : progress }
84
+ else :
85
+ return {"invalid data detected" }
0 commit comments