Skip to content

Commit 9f45e56

Browse files
committed
test
1 parent 7084af0 commit 9f45e56

19 files changed

+590
-254
lines changed
0 Bytes
Binary file not shown.
2.91 KB
Binary file not shown.
2.92 KB
Binary file not shown.

Backend/cluster_processor.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import re
2+
3+
def remove_extra_whitespace(input_file, output_file):
4+
with open(input_file, 'r') as file:
5+
content = file.read()
6+
7+
# Remove unwanted characters before numbering
8+
content = re.sub(r'(?<=\n)\s*-+\s*', '', content)
9+
10+
# Remove Roman numbering and add decimal numbering
11+
decimal_number = 1
12+
content = re.sub(r'(?<=\n)([IVXLCDM]+\.)(?=\s)', lambda match: str(decimal_number) + '.', content)
13+
decimal_number += 1
14+
15+
# Remove extra white spaces
16+
content = re.sub(' +', ' ', content)
17+
18+
with open(output_file, 'w') as file:
19+
file.write(content)
20+
21+
22+
def number_questions(input_filename, output_filename):
23+
section_count = 0
24+
question_count = 0
25+
current_section = ""
26+
27+
with open(input_filename, 'r') as input_file:
28+
lines = input_file.readlines()
29+
30+
with open(output_filename, 'w') as output_file:
31+
for line in lines:
32+
# Check if the line starts with "Module X:"
33+
if re.match(r'^Module \d+:', line):
34+
section_count += 1
35+
question_count = 0
36+
current_section = re.findall(r'^Module \d+', line)[0]
37+
output_file.write(line)
38+
# Check if the line starts with "1.", "(a)", or "(i)"
39+
elif re.match(r'^\d+\.|^[(a-z)]\.|^[(i)]\.', line):
40+
question_count += 1
41+
# Modify the line to include the correct question number
42+
modified_line = re.sub(r'^(\d+\.|^[(a-z)]\.|^[(i)]\.)', str(question_count) + '.', line)
43+
# Replace the section number if necessary
44+
modified_line = modified_line.replace(current_section, 'Module ' + str(section_count))
45+
output_file.write(modified_line)
46+
else:
47+
output_file.write(line)
48+
49+
50+
# Usage example
51+
input_file = 'Local_Storage\Generated_Files\cluster_questions.txt' # Replace with your input file path
52+
temp_file = 'temp_output.txt' # Replace with a temporary output file path
53+
output_file = 'final_output.txt' # Replace with your final output file path
54+
55+
# Step 1: Remove extra whitespace
56+
remove_extra_whitespace(input_file, temp_file)
57+
58+
# Step 2: Number the questions
59+
number_questions(temp_file, output_file)
60+
61+
# Step 3: Clean up the temporary file
62+
import os
63+
os.remove(temp_file)

Backend/process_cluster_1.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import re
2+
3+
def remove_extra_whitespace(input_file, output_file):
4+
with open(input_file, 'r') as file:
5+
content = file.read()
6+
7+
# Remove unwanted characters before numbering
8+
content = re.sub(r'(?<=\n)\s*-+\s*', '', content)
9+
10+
# Remove Roman numbering and add decimal numbering
11+
decimal_number = 1
12+
content = re.sub(r'(?<=\n)([IVXLCDM]+\.)(?=\s)', lambda match: str(decimal_number) + '.', content)
13+
decimal_number += 1
14+
15+
# Remove extra white spaces
16+
content = re.sub(' +', ' ', content)
17+
18+
with open(output_file, 'w') as file:
19+
file.write(content)
20+
21+
# Usage example
22+
input_file = 'Local_Storage\Generated_Files\cluster_questions.txt' # Replace with your input file path
23+
output_file = 'output.txt' # Replace with your output file path
24+
remove_extra_whitespace(input_file, output_file)

Backend/process_cluster_2.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import re
2+
3+
def number_questions(input_filename, output_filename):
4+
section_count = 0
5+
question_count = 0
6+
current_section = ""
7+
8+
with open(input_filename, 'r') as input_file:
9+
lines = input_file.readlines()
10+
11+
with open(output_filename, 'w') as output_file:
12+
for line in lines:
13+
# Check if the line starts with "Module X:"
14+
if re.match(r'^Module \d+:', line):
15+
section_count += 1
16+
question_count = 0
17+
current_section = re.findall(r'^Module \d+', line)[0]
18+
output_file.write(line)
19+
# Check if the line starts with "1.", "(a)", or "(i)"
20+
elif re.match(r'^\d+\.|^[(a-z)]\.|^[(i)]\.', line):
21+
question_count += 1
22+
# Modify the line to include the correct question number
23+
modified_line = re.sub(r'^(\d+\.|^[(a-z)]\.|^[(i)]\.)', str(question_count) + '.', line)
24+
# Replace the section number if necessary
25+
modified_line = modified_line.replace(current_section, 'Module ' + str(section_count))
26+
output_file.write(modified_line)
27+
else:
28+
output_file.write(line)
29+
30+
# Usage
31+
input_filename = "output.txt" # Replace with the actual input filename
32+
output_filename = "numbered_questions.txt" # Replace with the desired output filename
33+
number_questions(input_filename, output_filename)

Backend/test.py

Lines changed: 0 additions & 35 deletions
This file was deleted.

Backend/test2.py

Lines changed: 0 additions & 54 deletions
This file was deleted.

Backend/test3.py

Lines changed: 0 additions & 28 deletions
This file was deleted.

Backend/test_proc.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
from pprint import pprint
2+
3+
4+
def process_file(file_path):
5+
# Read the file
6+
with open(file_path, 'r') as file:
7+
lines = file.readlines()
8+
9+
# Process the lines
10+
processed_data = {}
11+
current_section = None
12+
current_subsection = None
13+
current_questions = []
14+
for line in lines:
15+
line = line.strip()
16+
17+
# Skip empty lines
18+
if not line:
19+
continue
20+
21+
# Check if it's a section
22+
if line.startswith("PART"):
23+
current_section = line
24+
processed_data[current_section] = []
25+
current_subsection = None
26+
current_questions = []
27+
continue
28+
29+
# Check if it's a subsection
30+
if line.startswith(("I.", "II.", "III.", "IV.", "V.", "VI.", "VII.", "VIII.", "IX.")):
31+
current_subsection = line
32+
current_questions = []
33+
continue
34+
35+
# Add question to the current subsection's questions
36+
if current_subsection:
37+
current_questions.append(line)
38+
39+
# Check if it's the end of a subsection
40+
if line.startswith("***"):
41+
if current_section and current_subsection:
42+
processed_data[current_section].append({'subsection': current_subsection, 'questions': current_questions})
43+
current_subsection = None
44+
current_questions = []
45+
46+
# Print debug information
47+
print("Processed Data:")
48+
pprint(processed_data)
49+
50+
# Format the extracted information
51+
formatted_data = ''
52+
for section, subsections in processed_data.items():
53+
formatted_data += section + '\n\n'
54+
for subsection_data in subsections:
55+
formatted_data += subsection_data['subsection'] + '\n'
56+
formatted_data += '\n'.join(subsection_data['questions']) + '\n\n'
57+
58+
# Save the processed data
59+
output_file_path = file_path + '_processed.txt'
60+
try:
61+
with open(output_file_path, 'w') as output_file:
62+
output_file.write(formatted_data)
63+
print(f"Processing complete. Processed data saved to: {output_file_path}")
64+
except Exception as e:
65+
print("Error occurred while saving processed data:")
66+
print(str(e))
67+
68+
69+
# Usage: Provide the file path as a parameter to the process_file function
70+
process_file('Local_Storage\pyqs_text\qp.txt')

0 commit comments

Comments
 (0)