Skip to content

Commit 7714850

Browse files
authored
fix: eliminate circular reference in Confluence preprocessor (sooperset#574)
* fix: eliminate circular reference in Confluence preprocessor Remove circular reference between ConfluenceClient and ConfluencePreprocessor that was preventing proper garbage collection and causing memory leaks. The preprocessor no longer stores a reference to the client. Instead, the client is passed as a parameter to methods that need it for user lookups. This follows the same pattern used by JiraPreprocessor. * fix: update test assertions to include confluence client parameter
1 parent 2de1f12 commit 7714850

File tree

9 files changed

+91
-59
lines changed

9 files changed

+91
-59
lines changed

src/mcp_atlassian/confluence/client.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,9 +121,7 @@ def __init__(self, config: ConfluenceConfig | None = None) -> None:
121121
# Import here to avoid circular imports
122122
from ..preprocessing.confluence import ConfluencePreprocessor
123123

124-
self.preprocessor = ConfluencePreprocessor(
125-
base_url=self.config.url, confluence_client=self.confluence
126-
)
124+
self.preprocessor = ConfluencePreprocessor(base_url=self.config.url)
127125

128126
# Test authentication during initialization (in debug mode only)
129127
if logger.isEnabledFor(logging.DEBUG):
@@ -186,4 +184,6 @@ def _process_html_content(
186184
Returns:
187185
Tuple of (processed_html, processed_markdown)
188186
"""
189-
return self.preprocessor.process_html_content(html_content, space_key)
187+
return self.preprocessor.process_html_content(
188+
html_content, space_key, self.confluence
189+
)

src/mcp_atlassian/confluence/comments.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ def get_page_comments(
4343
# Get the content based on format
4444
body = comment_data["body"]["view"]["value"]
4545
processed_html, processed_markdown = (
46-
self.preprocessor.process_html_content(body, space_key=space_key)
46+
self.preprocessor.process_html_content(
47+
body, space_key=space_key, confluence_client=self.confluence
48+
)
4749
)
4850

4951
# Create a copy of the comment data to modify
@@ -117,6 +119,7 @@ def add_comment(self, page_id: str, content: str) -> ConfluenceComment | None:
117119
processed_html, processed_markdown = self.preprocessor.process_html_content(
118120
response.get("body", {}).get("view", {}).get("value", ""),
119121
space_key=space_key,
122+
confluence_client=self.confluence,
120123
)
121124

122125
# Modify the response to include processed content

src/mcp_atlassian/confluence/pages.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def get_page_content(
5454
space_key = page.get("space", {}).get("key", "")
5555
content = page["body"]["storage"]["value"]
5656
processed_html, processed_markdown = self.preprocessor.process_html_content(
57-
content, space_key=space_key
57+
content, space_key=space_key, confluence_client=self.confluence
5858
)
5959

6060
# Use the appropriate content format based on the convert_to_markdown flag
@@ -169,7 +169,7 @@ def get_page_by_title(
169169

170170
content = page["body"]["storage"]["value"]
171171
processed_html, processed_markdown = self.preprocessor.process_html_content(
172-
content, space_key=space_key
172+
content, space_key=space_key, confluence_client=self.confluence
173173
)
174174

175175
# Use the appropriate content format based on the convert_to_markdown flag
@@ -230,7 +230,7 @@ def get_space_pages(
230230
for page in pages:
231231
content = page["body"]["storage"]["value"]
232232
processed_html, processed_markdown = self.preprocessor.process_html_content(
233-
content, space_key=space_key
233+
content, space_key=space_key, confluence_client=self.confluence
234234
)
235235

236236
# Use the appropriate content format based on the convert_to_markdown flag
@@ -477,7 +477,9 @@ def get_page_children(
477477
content = page.get("body", {}).get("storage", {}).get("value", "")
478478
if content:
479479
_, processed_markdown = self.preprocessor.process_html_content(
480-
content, space_key=space_key
480+
content,
481+
space_key=space_key,
482+
confluence_client=self.confluence,
481483
)
482484
content_override = processed_markdown
483485

src/mcp_atlassian/confluence/search.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,9 @@ def search(
8282
# Process the excerpt as HTML content
8383
space_key = page.space.key if page.space else ""
8484
_, processed_markdown = self.preprocessor.process_html_content(
85-
excerpt, space_key=space_key
85+
excerpt,
86+
space_key=space_key,
87+
confluence_client=self.confluence,
8688
)
8789
# Create a new page with processed content
8890
page.content = processed_markdown

src/mcp_atlassian/preprocessing/base.py

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -26,28 +26,28 @@ def get_user_details_by_username(self, username: str) -> dict[str, Any]:
2626
class BasePreprocessor:
2727
"""Base class for text preprocessing operations."""
2828

29-
def __init__(
30-
self, base_url: str = "", confluence_client: ConfluenceClient | None = None
31-
) -> None:
29+
def __init__(self, base_url: str = "") -> None:
3230
"""
3331
Initialize the base text preprocessor.
3432
3533
Args:
3634
base_url: Base URL for API server
37-
confluence_client: Optional Confluence client for user lookups
3835
"""
3936
self.base_url = base_url.rstrip("/") if base_url else ""
40-
self.confluence_client = confluence_client
4137

4238
def process_html_content(
43-
self, html_content: str, space_key: str = ""
39+
self,
40+
html_content: str,
41+
space_key: str = "",
42+
confluence_client: ConfluenceClient | None = None,
4443
) -> tuple[str, str]:
4544
"""
4645
Process HTML content to replace user refs and page links.
4746
4847
Args:
4948
html_content: The HTML content to process
5049
space_key: Optional space key for context
50+
confluence_client: Optional Confluence client for user lookups
5151
5252
Returns:
5353
Tuple of (processed_html, processed_markdown)
@@ -57,8 +57,8 @@ def process_html_content(
5757
soup = BeautifulSoup(html_content, "html.parser")
5858

5959
# Process user mentions
60-
self._process_user_mentions_in_soup(soup)
61-
self._process_user_profile_macros_in_soup(soup)
60+
self._process_user_mentions_in_soup(soup, confluence_client)
61+
self._process_user_profile_macros_in_soup(soup, confluence_client)
6262

6363
# Convert to string and markdown
6464
processed_html = str(soup)
@@ -70,12 +70,15 @@ def process_html_content(
7070
logger.error(f"Error in process_html_content: {str(e)}")
7171
raise
7272

73-
def _process_user_mentions_in_soup(self, soup: BeautifulSoup) -> None:
73+
def _process_user_mentions_in_soup(
74+
self, soup: BeautifulSoup, confluence_client: ConfluenceClient | None = None
75+
) -> None:
7476
"""
7577
Process user mentions in BeautifulSoup object.
7678
7779
Args:
7880
soup: BeautifulSoup object containing HTML
81+
confluence_client: Optional Confluence client for user lookups
7982
"""
8083
# Find all ac:link elements that might contain user mentions
8184
user_mentions = soup.find_all("ac:link")
@@ -86,7 +89,9 @@ def _process_user_mentions_in_soup(self, soup: BeautifulSoup) -> None:
8689
# Case 1: Direct user reference without link-body
8790
account_id = user_ref.get("ri:account-id")
8891
if isinstance(account_id, str):
89-
self._replace_user_mention(user_element, account_id)
92+
self._replace_user_mention(
93+
user_element, account_id, confluence_client
94+
)
9095
continue
9196

9297
# Case 2: User reference with link-body containing @
@@ -96,16 +101,21 @@ def _process_user_mentions_in_soup(self, soup: BeautifulSoup) -> None:
96101
if user_ref and user_ref.get("ri:account-id"):
97102
account_id = user_ref.get("ri:account-id")
98103
if isinstance(account_id, str):
99-
self._replace_user_mention(user_element, account_id)
104+
self._replace_user_mention(
105+
user_element, account_id, confluence_client
106+
)
100107

101-
def _process_user_profile_macros_in_soup(self, soup: BeautifulSoup) -> None:
108+
def _process_user_profile_macros_in_soup(
109+
self, soup: BeautifulSoup, confluence_client: ConfluenceClient | None = None
110+
) -> None:
102111
"""
103112
Process Confluence User Profile macros in BeautifulSoup object.
104113
Replaces <ac:structured-macro ac:name="profile">...</ac:structured-macro>
105114
with the user's display name, typically formatted as @DisplayName.
106115
107116
Args:
108117
soup: BeautifulSoup object containing HTML
118+
confluence_client: Optional Confluence client for user lookups
109119
"""
110120
profile_macros = soup.find_all(
111121
"ac:structured-macro", attrs={"ac:name": "profile"}
@@ -134,26 +144,24 @@ def _process_user_profile_macros_in_soup(self, soup: BeautifulSoup) -> None:
134144
user_identifier_for_log = account_id or userkey
135145
display_name = None
136146

137-
if self.confluence_client and user_identifier_for_log:
147+
if confluence_client and user_identifier_for_log:
138148
try:
139149
if account_id and isinstance(account_id, str):
140-
user_details = (
141-
self.confluence_client.get_user_details_by_accountid(
142-
account_id
143-
)
150+
user_details = confluence_client.get_user_details_by_accountid(
151+
account_id
144152
)
145153
display_name = user_details.get("displayName")
146154
elif userkey and isinstance(userkey, str):
147155
# For Confluence Server/DC, userkey might be the username
148-
user_details = (
149-
self.confluence_client.get_user_details_by_username(userkey)
156+
user_details = confluence_client.get_user_details_by_username(
157+
userkey
150158
)
151159
display_name = user_details.get("displayName")
152160
except Exception as e:
153161
logger.warning(
154162
f"Error fetching user details for profile macro (user: {user_identifier_for_log}): {e}"
155163
)
156-
elif not self.confluence_client:
164+
elif not confluence_client:
157165
logger.warning(
158166
"Confluence client not available for User Profile Macro processing."
159167
)
@@ -171,18 +179,24 @@ def _process_user_profile_macros_in_soup(self, soup: BeautifulSoup) -> None:
171179
macro_element.replace_with(fallback_text)
172180
logger.debug(f"Using fallback for user profile macro: {fallback_text}")
173181

174-
def _replace_user_mention(self, user_element: Tag, account_id: str) -> None:
182+
def _replace_user_mention(
183+
self,
184+
user_element: Tag,
185+
account_id: str,
186+
confluence_client: ConfluenceClient | None = None,
187+
) -> None:
175188
"""
176189
Replace a user mention with the user's display name.
177190
178191
Args:
179192
user_element: The HTML element containing the user mention
180193
account_id: The user's account ID
194+
confluence_client: Optional Confluence client for user lookups
181195
"""
182196
try:
183197
# Only attempt to get user details if we have a valid confluence client
184-
if self.confluence_client is not None:
185-
user_details = self.confluence_client.get_user_details_by_accountid(
198+
if confluence_client is not None:
199+
user_details = confluence_client.get_user_details_by_accountid(
186200
account_id
187201
)
188202
display_name = user_details.get("displayName", "")

src/mcp_atlassian/preprocessing/confluence.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import shutil
55
import tempfile
66
from pathlib import Path
7-
from typing import Any
87

98
from md2conf.converter import (
109
ConfluenceConverterOptions,
@@ -22,15 +21,14 @@
2221
class ConfluencePreprocessor(BasePreprocessor):
2322
"""Handles text preprocessing for Confluence content."""
2423

25-
def __init__(self, base_url: str, **kwargs: Any) -> None:
24+
def __init__(self, base_url: str) -> None:
2625
"""
2726
Initialize the Confluence text preprocessor.
2827
2928
Args:
3029
base_url: Base URL for Confluence API
31-
**kwargs: Additional arguments for the base class
3230
"""
33-
super().__init__(base_url=base_url, **kwargs)
31+
super().__init__(base_url=base_url)
3432

3533
def markdown_to_confluence_storage(
3634
self, markdown_content: str, *, enable_heading_anchors: bool = False

0 commit comments

Comments
 (0)