Skip to content

Commit b1698ad

Browse files
author
gkowalc
committed
added new get_space_export method
1 parent feb3288 commit b1698ad

File tree

1 file changed

+70
-88
lines changed

1 file changed

+70
-88
lines changed

atlassian/confluence.py

Lines changed: 70 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -2657,120 +2657,101 @@ def get_page_as_word(self, page_id):
26572657
url = "exportword?pageId={pageId}".format(pageId=page_id)
26582658
return self.get(url, headers=headers, not_json_response=True)
26592659

2660-
def export_space_pdf(self, url):
2661-
try:
2662-
running_task = True
2663-
headers = self.form_token_headers
2664-
log.info("Initiate PDF export from Confluence Cloud")
2665-
response = self.session.post(url, headers=headers)
2666-
print(response.text)
2667-
response_string = response.decode(encoding="utf-8", errors="ignore")
2668-
task_id = response_string.split('name="ajs-taskId" content="')[1].split('">')[0]
2669-
poll_url = "/services/api/v1/task/{0}/progress".format(task_id)
2670-
while running_task:
2671-
log.info("Check if export task has completed.")
2672-
progress_response = self.get(poll_url)
2673-
print(progress_response)
2674-
percentage_complete = int(progress_response.get("progress", 0))
2675-
task_state = progress_response.get("state")
2676-
if task_state == "FAILED":
2677-
log.error("PDF conversion not successful.")
2678-
return None
2679-
elif percentage_complete == 100:
2680-
running_task = False
2681-
log.info("Task completed - {task_state}".format(task_state=task_state))
2682-
log.debug("Extract task results to download PDF.")
2683-
task_result_url = progress_response.get("result")
2684-
else:
2685-
log.info(
2686-
"{percentage_complete}% - {task_state}".format(
2687-
percentage_complete=percentage_complete, task_state=task_state
2688-
)
2689-
)
2690-
time.sleep(3)
2691-
log.debug("Task successfully done, querying the task result for the download url")
2692-
# task result url starts with /wiki, remove it.
2693-
task_content = self.get(task_result_url[5:], not_json_response=True)
2694-
download_url = task_content.decode(encoding="utf-8", errors="strict")
2695-
log.debug("Successfully got the download url")
2696-
return download_url
2697-
except IndexError as e:
2698-
log.error(e)
2699-
return None
2700-
27012660
def get_space_export(self, space_key: str, export_type: str) -> str:
2702-
def get_atl_request(url):
2703-
# this is only applicable to html/csv/xml export
2704-
# getting atl_token used for XSRF protection
2705-
response = self.get(url, advanced_mode=True)
2706-
parsed_html = BeautifulSoup(response.text, "html.parser")
2707-
atl_token = parsed_html.find("input", {"name": "atl_token"}).get("value")
2708-
return atl_token
2661+
"""
2662+
Export a Confluence space to a file of the specified type.
2663+
(!) This method was developed for Confluence Cloud and may not work with Confluence on-prem.
2664+
(!) This is an experimental method that does not trigger an officially supported REST endpoint. It may break if Atlassian changes the space export front-end logic.
2665+
2666+
:param space_key: The key of the space to export.
2667+
:param export_type: The type of export to perform. Valid values are: 'html', 'csv', 'xml', 'pdf'.
2668+
:return: The URL to download the exported file.
2669+
"""
2670+
2671+
def get_atl_request(url: str):
2672+
# Nested fucntion used to get atl_token used for XSRF protection. this is only applicable to html/csv/xml spacee exports
2673+
try:
2674+
response = self.get(url, advanced_mode=True)
2675+
parsed_html = BeautifulSoup(response.text, "html.parser")
2676+
atl_token = parsed_html.find("input", {"name": "atl_token"}).get("value")
2677+
return atl_token
2678+
except Exception as e:
2679+
raise ApiError("Problems with getting the atl_token for get_space_export method :", reason=e)
2680+
2681+
# Checks if space_ke parameter is valid and if api_token has relevant permissions to space
2682+
self.get_space(space_key=space_key, expand="permissions")
2683+
27092684
try:
2710-
running_task = True
2711-
headers = self.form_token_headers
2712-
print("Initiate " + str(export_type) + " export from Confluence space " + str(space_key))
2713-
log.info("Initiate " + str(export_type) + " export from Confluence space " + str(space_key))
2714-
form_data = {}
2715-
url = ''
2685+
log.info(
2686+
"Initiated experimental get_space_export method for export type: "
2687+
+ export_type
2688+
+ " from Confluence space: "
2689+
+ space_key
2690+
)
27162691
if export_type == "csv":
27172692
form_data = {
27182693
"atl_token": get_atl_request(f"spaces/exportspacecsv.action?key={space_key}"),
27192694
"exportType": "TYPE_CSV",
27202695
"contentOption": "all",
27212696
"includeComments": "true",
2722-
"confirm": "Export"
2697+
"confirm": "Export",
27232698
}
27242699
elif export_type == "html":
27252700
form_data = {
27262701
"atl_token": get_atl_request(f"spaces/exportspacehtml.action?key={space_key}"),
27272702
"exportType": "TYPE_HTML",
27282703
"contentOption": "visibleOnly",
2729-
"includeComments": True,
2730-
"confirm": "Export"
2704+
"includeComments": "true",
2705+
"confirm": "Export",
27312706
}
27322707
elif export_type == "xml":
27332708
form_data = {
27342709
"atl_token": get_atl_request(f"spaces/exportspacexml.action?key={space_key}"),
27352710
"exportType": "TYPE_XML",
27362711
"contentOption": "all",
27372712
"includeComments": "true",
2738-
"confirm": "Export" }
2739-
elif export_type == "pdf":
2740-
form_data = {
2741-
# "atl_token": get_atl_request(f"spaces/flyingpdf/flyingpdf.action?key={space_key}"),
2742-
"synchronous": "false",
2743-
"contentOption": "visibleOnly",
2744-
"confirm": "Export"
2713+
"confirm": "Export",
27452714
}
2715+
elif export_type == "pdf":
2716+
url = "spaces/flyingpdf/doflyingpdf.action?key=" + space_key
2717+
log.info("Initiate PDF space export from space " + str(space_key))
2718+
return self.get_pdf_download_url_for_confluence_cloud(url)
27462719
else:
2747-
raise ValueError("Invalid export type")
2748-
url = f"/spaces/exportspace.action?key={space_key}"
2749-
# bypass self.confluence_client.post method because it serializes form data as JSON which is wrong
2750-
if export_type == "pdf":
2751-
url = self.url_joiner(url=self.url,
2752-
path=f"spaces/flyingpdf/doflyingpdf.action?key={space_key}")
2753-
elif export_type == "csv" or export_type == "html" or export_type == "xml":
2754-
url = self.url_joiner(url=self.url, path=f"spaces/doexportspace.action?key={space_key}")
2755-
2756-
# Sending a request that trigger the export
2757-
response = self.session.post(url, headers=self.form_token_headers,
2758-
data=form_data)
2720+
raise ValueError("Invalid export_type parameter value. Valid values are: 'html/csv/xml/pdf'")
2721+
url = self.url_joiner(url=self.url, path=f"spaces/doexportspace.action?key={space_key}")
2722+
2723+
# Sending a POST request that triggers the space export.
2724+
response = self.session.post(url, headers=self.form_token_headers, data=form_data)
27592725
parsed_html = BeautifulSoup(response.text, "html.parser")
2760-
# Getting the poll URL to get the export progress status
2761-
poll_url = parsed_html.find("meta", {"name": "ajs-pollURI"}).get("content")
2726+
# Getting the poll URL to get the export progress status
2727+
try:
2728+
poll_url = parsed_html.find("meta", {"name": "ajs-pollURI"}).get("content")
2729+
except Exception as e:
2730+
raise ApiError("Problems with getting the poll_url for get_space_export method :", reason=e)
2731+
running_task = True
27622732
while running_task:
2763-
progress_response = self.get(poll_url)
2764-
if progress_response['complete']:
2765-
parsed_html = BeautifulSoup(progress_response['message'], "html.parser")
2766-
download_url = parsed_html.find("a", {"class": "space-export-download-path"}).get("href")
2767-
return self.url.replace('/wiki', '') + download_url
2768-
time.sleep(15)
2769-
return
2770-
except Exception as e:
2771-
print(e)
2772-
return None
2733+
try:
2734+
progress_response = self.get(poll_url)
2735+
if progress_response["complete"]:
2736+
parsed_html = BeautifulSoup(progress_response["message"], "html.parser")
2737+
download_url = parsed_html.find("a", {"class": "space-export-download-path"}).get("href")
2738+
if self.url in download_url:
2739+
return download_url
2740+
else:
2741+
combined_url = self.url + download_url
2742+
# Ensure only one /wiki is included in the path
2743+
if combined_url.count("/wiki") > 1:
2744+
combined_url = combined_url.replace("/wiki/wiki", "/wiki")
2745+
return combined_url
2746+
time.sleep(15)
2747+
except Exception as e:
2748+
raise ApiError(
2749+
"Encountered error during space export status check from space " + space_key, reason=e
2750+
)
27732751

2752+
return "None" # Return None if the while loop does not return a value
2753+
except Exception as e:
2754+
raise ApiError("Encountered error during space export from space " + space_key, reason=e)
27742755

27752756
def export_page(self, page_id):
27762757
"""
@@ -3020,6 +3001,7 @@ def get_pdf_download_url_for_confluence_cloud(self, url):
30203001
and provides a link to download the PDF once the process completes.
30213002
This functions polls the long-running task page and returns the
30223003
download url of the PDF.
3004+
This method is used in get_space_export() method for space-> PDF export.
30233005
:param url: URL to initiate PDF export
30243006
:return: Download url for PDF file
30253007
"""

0 commit comments

Comments
 (0)