@@ -2657,120 +2657,101 @@ def get_page_as_word(self, page_id):
2657
2657
url = "exportword?pageId={pageId}" .format (pageId = page_id )
2658
2658
return self .get (url , headers = headers , not_json_response = True )
2659
2659
2660
- def export_space_pdf (self , url ):
2661
- try :
2662
- running_task = True
2663
- headers = self .form_token_headers
2664
- log .info ("Initiate PDF export from Confluence Cloud" )
2665
- response = self .session .post (url , headers = headers )
2666
- print (response .text )
2667
- response_string = response .decode (encoding = "utf-8" , errors = "ignore" )
2668
- task_id = response_string .split ('name="ajs-taskId" content="' )[1 ].split ('">' )[0 ]
2669
- poll_url = "/services/api/v1/task/{0}/progress" .format (task_id )
2670
- while running_task :
2671
- log .info ("Check if export task has completed." )
2672
- progress_response = self .get (poll_url )
2673
- print (progress_response )
2674
- percentage_complete = int (progress_response .get ("progress" , 0 ))
2675
- task_state = progress_response .get ("state" )
2676
- if task_state == "FAILED" :
2677
- log .error ("PDF conversion not successful." )
2678
- return None
2679
- elif percentage_complete == 100 :
2680
- running_task = False
2681
- log .info ("Task completed - {task_state}" .format (task_state = task_state ))
2682
- log .debug ("Extract task results to download PDF." )
2683
- task_result_url = progress_response .get ("result" )
2684
- else :
2685
- log .info (
2686
- "{percentage_complete}% - {task_state}" .format (
2687
- percentage_complete = percentage_complete , task_state = task_state
2688
- )
2689
- )
2690
- time .sleep (3 )
2691
- log .debug ("Task successfully done, querying the task result for the download url" )
2692
- # task result url starts with /wiki, remove it.
2693
- task_content = self .get (task_result_url [5 :], not_json_response = True )
2694
- download_url = task_content .decode (encoding = "utf-8" , errors = "strict" )
2695
- log .debug ("Successfully got the download url" )
2696
- return download_url
2697
- except IndexError as e :
2698
- log .error (e )
2699
- return None
2700
-
2701
2660
def get_space_export (self , space_key : str , export_type : str ) -> str :
2702
- def get_atl_request (url ):
2703
- # this is only applicable to html/csv/xml export
2704
- # getting atl_token used for XSRF protection
2705
- response = self .get (url , advanced_mode = True )
2706
- parsed_html = BeautifulSoup (response .text , "html.parser" )
2707
- atl_token = parsed_html .find ("input" , {"name" : "atl_token" }).get ("value" )
2708
- return atl_token
2661
+ """
2662
+ Export a Confluence space to a file of the specified type.
2663
+ (!) This method was developed for Confluence Cloud and may not work with Confluence on-prem.
2664
+ (!) This is an experimental method that does not trigger an officially supported REST endpoint. It may break if Atlassian changes the space export front-end logic.
2665
+
2666
+ :param space_key: The key of the space to export.
2667
+ :param export_type: The type of export to perform. Valid values are: 'html', 'csv', 'xml', 'pdf'.
2668
+ :return: The URL to download the exported file.
2669
+ """
2670
+
2671
+ def get_atl_request (url : str ):
2672
+ # Nested fucntion used to get atl_token used for XSRF protection. this is only applicable to html/csv/xml spacee exports
2673
+ try :
2674
+ response = self .get (url , advanced_mode = True )
2675
+ parsed_html = BeautifulSoup (response .text , "html.parser" )
2676
+ atl_token = parsed_html .find ("input" , {"name" : "atl_token" }).get ("value" )
2677
+ return atl_token
2678
+ except Exception as e :
2679
+ raise ApiError ("Problems with getting the atl_token for get_space_export method :" , reason = e )
2680
+
2681
+ # Checks if space_ke parameter is valid and if api_token has relevant permissions to space
2682
+ self .get_space (space_key = space_key , expand = "permissions" )
2683
+
2709
2684
try :
2710
- running_task = True
2711
- headers = self . form_token_headers
2712
- print ( "Initiate " + str ( export_type ) + " export from Confluence space " + str ( space_key ))
2713
- log . info ( "Initiate " + str ( export_type ) + " export from Confluence space " + str ( space_key ))
2714
- form_data = {}
2715
- url = ''
2685
+ log . info (
2686
+ "Initiated experimental get_space_export method for export type: "
2687
+ + export_type
2688
+ + " from Confluence space: "
2689
+ + space_key
2690
+ )
2716
2691
if export_type == "csv" :
2717
2692
form_data = {
2718
2693
"atl_token" : get_atl_request (f"spaces/exportspacecsv.action?key={ space_key } " ),
2719
2694
"exportType" : "TYPE_CSV" ,
2720
2695
"contentOption" : "all" ,
2721
2696
"includeComments" : "true" ,
2722
- "confirm" : "Export"
2697
+ "confirm" : "Export" ,
2723
2698
}
2724
2699
elif export_type == "html" :
2725
2700
form_data = {
2726
2701
"atl_token" : get_atl_request (f"spaces/exportspacehtml.action?key={ space_key } " ),
2727
2702
"exportType" : "TYPE_HTML" ,
2728
2703
"contentOption" : "visibleOnly" ,
2729
- "includeComments" : True ,
2730
- "confirm" : "Export"
2704
+ "includeComments" : "true" ,
2705
+ "confirm" : "Export" ,
2731
2706
}
2732
2707
elif export_type == "xml" :
2733
2708
form_data = {
2734
2709
"atl_token" : get_atl_request (f"spaces/exportspacexml.action?key={ space_key } " ),
2735
2710
"exportType" : "TYPE_XML" ,
2736
2711
"contentOption" : "all" ,
2737
2712
"includeComments" : "true" ,
2738
- "confirm" : "Export" }
2739
- elif export_type == "pdf" :
2740
- form_data = {
2741
- # "atl_token": get_atl_request(f"spaces/flyingpdf/flyingpdf.action?key={space_key}"),
2742
- "synchronous" : "false" ,
2743
- "contentOption" : "visibleOnly" ,
2744
- "confirm" : "Export"
2713
+ "confirm" : "Export" ,
2745
2714
}
2715
+ elif export_type == "pdf" :
2716
+ url = "spaces/flyingpdf/doflyingpdf.action?key=" + space_key
2717
+ log .info ("Initiate PDF space export from space " + str (space_key ))
2718
+ return self .get_pdf_download_url_for_confluence_cloud (url )
2746
2719
else :
2747
- raise ValueError ("Invalid export type" )
2748
- url = f"/spaces/exportspace.action?key={ space_key } "
2749
- # bypass self.confluence_client.post method because it serializes form data as JSON which is wrong
2750
- if export_type == "pdf" :
2751
- url = self .url_joiner (url = self .url ,
2752
- path = f"spaces/flyingpdf/doflyingpdf.action?key={ space_key } " )
2753
- elif export_type == "csv" or export_type == "html" or export_type == "xml" :
2754
- url = self .url_joiner (url = self .url , path = f"spaces/doexportspace.action?key={ space_key } " )
2755
-
2756
- # Sending a request that trigger the export
2757
- response = self .session .post (url , headers = self .form_token_headers ,
2758
- data = form_data )
2720
+ raise ValueError ("Invalid export_type parameter value. Valid values are: 'html/csv/xml/pdf'" )
2721
+ url = self .url_joiner (url = self .url , path = f"spaces/doexportspace.action?key={ space_key } " )
2722
+
2723
+ # Sending a POST request that triggers the space export.
2724
+ response = self .session .post (url , headers = self .form_token_headers , data = form_data )
2759
2725
parsed_html = BeautifulSoup (response .text , "html.parser" )
2760
- # Getting the poll URL to get the export progress status
2761
- poll_url = parsed_html .find ("meta" , {"name" : "ajs-pollURI" }).get ("content" )
2726
+ # Getting the poll URL to get the export progress status
2727
+ try :
2728
+ poll_url = parsed_html .find ("meta" , {"name" : "ajs-pollURI" }).get ("content" )
2729
+ except Exception as e :
2730
+ raise ApiError ("Problems with getting the poll_url for get_space_export method :" , reason = e )
2731
+ running_task = True
2762
2732
while running_task :
2763
- progress_response = self .get (poll_url )
2764
- if progress_response ['complete' ]:
2765
- parsed_html = BeautifulSoup (progress_response ['message' ], "html.parser" )
2766
- download_url = parsed_html .find ("a" , {"class" : "space-export-download-path" }).get ("href" )
2767
- return self .url .replace ('/wiki' , '' ) + download_url
2768
- time .sleep (15 )
2769
- return
2770
- except Exception as e :
2771
- print (e )
2772
- return None
2733
+ try :
2734
+ progress_response = self .get (poll_url )
2735
+ if progress_response ["complete" ]:
2736
+ parsed_html = BeautifulSoup (progress_response ["message" ], "html.parser" )
2737
+ download_url = parsed_html .find ("a" , {"class" : "space-export-download-path" }).get ("href" )
2738
+ if self .url in download_url :
2739
+ return download_url
2740
+ else :
2741
+ combined_url = self .url + download_url
2742
+ # Ensure only one /wiki is included in the path
2743
+ if combined_url .count ("/wiki" ) > 1 :
2744
+ combined_url = combined_url .replace ("/wiki/wiki" , "/wiki" )
2745
+ return combined_url
2746
+ time .sleep (15 )
2747
+ except Exception as e :
2748
+ raise ApiError (
2749
+ "Encountered error during space export status check from space " + space_key , reason = e
2750
+ )
2773
2751
2752
+ return "None" # Return None if the while loop does not return a value
2753
+ except Exception as e :
2754
+ raise ApiError ("Encountered error during space export from space " + space_key , reason = e )
2774
2755
2775
2756
def export_page (self , page_id ):
2776
2757
"""
@@ -3020,6 +3001,7 @@ def get_pdf_download_url_for_confluence_cloud(self, url):
3020
3001
and provides a link to download the PDF once the process completes.
3021
3002
This functions polls the long-running task page and returns the
3022
3003
download url of the PDF.
3004
+ This method is used in get_space_export() method for space-> PDF export.
3023
3005
:param url: URL to initiate PDF export
3024
3006
:return: Download url for PDF file
3025
3007
"""
0 commit comments