@@ -70,64 +70,3 @@ def create_text_chunks(self, axtree, chunk_size=200, overlap=50):
7070 return text_splitter .split_text (axtree )
7171 else :
7272 return get_chunks_from_tokenizer (axtree , self .chunk_size , self .overlap )
73-
74- @staticmethod
75- def extract_bid (line ):
76- """
77- Extracts the bid from a line in the format '[bid] textarea ...'.
78-
79- Parameters:
80- line (str): The input line containing the bid in square brackets.
81-
82- Returns:
83- str: The extracted bid, or None if no bid is found.
84- """
85- match = re .search (r"\[([a-zA-Z0-9]+)\]" , line )
86- if match :
87- return match .group (1 )
88- return None
89-
90- @classmethod
91- def get_elements_around (cls , tree , element_id , n ):
92- """
93- Get n elements around the given element_id from the AXTree while preserving its indentation structure.
94-
95- :param tree: String representing the AXTree with indentations.
96- :param element_id: The element ID to center around (can include alphanumeric IDs like 'a203').
97- :param n: The number of elements to include before and after.
98- :return: String of the AXTree elements around the given element ID, preserving indentation.
99- """
100- # Split the tree into lines
101- lines = tree .splitlines ()
102-
103- # Extract the line indices and content containing element IDs
104- id_lines = [(i , line ) for i , line in enumerate (lines ) if "[" in line and "]" in line ]
105-
106- # Parse the IDs from the lines
107- parsed_ids = []
108- for idx , line in id_lines :
109- try :
110- element_id_in_line = line .split ("[" )[1 ].split ("]" )[0 ]
111- parsed_ids .append ((idx , element_id_in_line , line ))
112- except IndexError :
113- continue
114-
115- # Find the index of the element with the given ID
116- target_idx = next (
117- (i for i , (_ , eid , _ ) in enumerate (parsed_ids ) if eid == element_id ), None
118- )
119-
120- if target_idx is None :
121- raise ValueError (f"Element ID { element_id } not found in the tree." )
122-
123- # Calculate the range of elements to include
124- start_idx = max (0 , target_idx - n )
125- end_idx = min (len (parsed_ids ), target_idx + n + 1 )
126-
127- # Collect the lines to return
128- result_lines = []
129- for idx in range (start_idx , end_idx ):
130- line_idx = parsed_ids [idx ][0 ]
131- result_lines .append (lines [line_idx ])
132-
133- return "\n " .join (result_lines )
0 commit comments