@@ -26,28 +26,28 @@ def get_user_details_by_username(self, username: str) -> dict[str, Any]:
2626class BasePreprocessor :
2727 """Base class for text preprocessing operations."""
2828
29- def __init__ (
30- self , base_url : str = "" , confluence_client : ConfluenceClient | None = None
31- ) -> None :
29+ def __init__ (self , base_url : str = "" ) -> None :
3230 """
3331 Initialize the base text preprocessor.
3432
3533 Args:
3634 base_url: Base URL for API server
37- confluence_client: Optional Confluence client for user lookups
3835 """
3936 self .base_url = base_url .rstrip ("/" ) if base_url else ""
40- self .confluence_client = confluence_client
4137
4238 def process_html_content (
43- self , html_content : str , space_key : str = ""
39+ self ,
40+ html_content : str ,
41+ space_key : str = "" ,
42+ confluence_client : ConfluenceClient | None = None ,
4443 ) -> tuple [str , str ]:
4544 """
4645 Process HTML content to replace user refs and page links.
4746
4847 Args:
4948 html_content: The HTML content to process
5049 space_key: Optional space key for context
50+ confluence_client: Optional Confluence client for user lookups
5151
5252 Returns:
5353 Tuple of (processed_html, processed_markdown)
@@ -57,8 +57,8 @@ def process_html_content(
5757 soup = BeautifulSoup (html_content , "html.parser" )
5858
5959 # Process user mentions
60- self ._process_user_mentions_in_soup (soup )
61- self ._process_user_profile_macros_in_soup (soup )
60+ self ._process_user_mentions_in_soup (soup , confluence_client )
61+ self ._process_user_profile_macros_in_soup (soup , confluence_client )
6262
6363 # Convert to string and markdown
6464 processed_html = str (soup )
@@ -70,12 +70,15 @@ def process_html_content(
7070 logger .error (f"Error in process_html_content: { str (e )} " )
7171 raise
7272
73- def _process_user_mentions_in_soup (self , soup : BeautifulSoup ) -> None :
73+ def _process_user_mentions_in_soup (
74+ self , soup : BeautifulSoup , confluence_client : ConfluenceClient | None = None
75+ ) -> None :
7476 """
7577 Process user mentions in BeautifulSoup object.
7678
7779 Args:
7880 soup: BeautifulSoup object containing HTML
81+ confluence_client: Optional Confluence client for user lookups
7982 """
8083 # Find all ac:link elements that might contain user mentions
8184 user_mentions = soup .find_all ("ac:link" )
@@ -86,7 +89,9 @@ def _process_user_mentions_in_soup(self, soup: BeautifulSoup) -> None:
8689 # Case 1: Direct user reference without link-body
8790 account_id = user_ref .get ("ri:account-id" )
8891 if isinstance (account_id , str ):
89- self ._replace_user_mention (user_element , account_id )
92+ self ._replace_user_mention (
93+ user_element , account_id , confluence_client
94+ )
9095 continue
9196
9297 # Case 2: User reference with link-body containing @
@@ -96,16 +101,21 @@ def _process_user_mentions_in_soup(self, soup: BeautifulSoup) -> None:
96101 if user_ref and user_ref .get ("ri:account-id" ):
97102 account_id = user_ref .get ("ri:account-id" )
98103 if isinstance (account_id , str ):
99- self ._replace_user_mention (user_element , account_id )
104+ self ._replace_user_mention (
105+ user_element , account_id , confluence_client
106+ )
100107
101- def _process_user_profile_macros_in_soup (self , soup : BeautifulSoup ) -> None :
108+ def _process_user_profile_macros_in_soup (
109+ self , soup : BeautifulSoup , confluence_client : ConfluenceClient | None = None
110+ ) -> None :
102111 """
103112 Process Confluence User Profile macros in BeautifulSoup object.
104113 Replaces <ac:structured-macro ac:name="profile">...</ac:structured-macro>
105114 with the user's display name, typically formatted as @DisplayName.
106115
107116 Args:
108117 soup: BeautifulSoup object containing HTML
118+ confluence_client: Optional Confluence client for user lookups
109119 """
110120 profile_macros = soup .find_all (
111121 "ac:structured-macro" , attrs = {"ac:name" : "profile" }
@@ -134,26 +144,24 @@ def _process_user_profile_macros_in_soup(self, soup: BeautifulSoup) -> None:
134144 user_identifier_for_log = account_id or userkey
135145 display_name = None
136146
137- if self . confluence_client and user_identifier_for_log :
147+ if confluence_client and user_identifier_for_log :
138148 try :
139149 if account_id and isinstance (account_id , str ):
140- user_details = (
141- self .confluence_client .get_user_details_by_accountid (
142- account_id
143- )
150+ user_details = confluence_client .get_user_details_by_accountid (
151+ account_id
144152 )
145153 display_name = user_details .get ("displayName" )
146154 elif userkey and isinstance (userkey , str ):
147155 # For Confluence Server/DC, userkey might be the username
148- user_details = (
149- self . confluence_client . get_user_details_by_username ( userkey )
156+ user_details = confluence_client . get_user_details_by_username (
157+ userkey
150158 )
151159 display_name = user_details .get ("displayName" )
152160 except Exception as e :
153161 logger .warning (
154162 f"Error fetching user details for profile macro (user: { user_identifier_for_log } ): { e } "
155163 )
156- elif not self . confluence_client :
164+ elif not confluence_client :
157165 logger .warning (
158166 "Confluence client not available for User Profile Macro processing."
159167 )
@@ -171,18 +179,24 @@ def _process_user_profile_macros_in_soup(self, soup: BeautifulSoup) -> None:
171179 macro_element .replace_with (fallback_text )
172180 logger .debug (f"Using fallback for user profile macro: { fallback_text } " )
173181
174- def _replace_user_mention (self , user_element : Tag , account_id : str ) -> None :
182+ def _replace_user_mention (
183+ self ,
184+ user_element : Tag ,
185+ account_id : str ,
186+ confluence_client : ConfluenceClient | None = None ,
187+ ) -> None :
175188 """
176189 Replace a user mention with the user's display name.
177190
178191 Args:
179192 user_element: The HTML element containing the user mention
180193 account_id: The user's account ID
194+ confluence_client: Optional Confluence client for user lookups
181195 """
182196 try :
183197 # Only attempt to get user details if we have a valid confluence client
184- if self . confluence_client is not None :
185- user_details = self . confluence_client .get_user_details_by_accountid (
198+ if confluence_client is not None :
199+ user_details = confluence_client .get_user_details_by_accountid (
186200 account_id
187201 )
188202 display_name = user_details .get ("displayName" , "" )
0 commit comments