Skip to content

Commit 6a085e3

Browse files
authored
[Confluence] new method added confluence_get_tables_from_page +requirements.txt (#1281)
Co-authored-by: gkowalc <gkowalc>
1 parent 3e1ef05 commit 6a085e3

File tree

5 files changed

+64
-0
lines changed

5 files changed

+64
-0
lines changed

atlassian/confluence.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from requests import HTTPError
88
import requests
99
from deprecated import deprecated
10+
from bs4 import BeautifulSoup
1011
from atlassian import utils
1112
from .errors import ApiError, ApiNotFoundError, ApiPermissionError, ApiValueError, ApiConflictError, ApiNotAcceptable
1213
from .rest_client import AtlassianRestAPI
@@ -356,6 +357,46 @@ def get_page_by_id(self, page_id, expand=None, status=None, version=None):
356357

357358
return response
358359

360+
def get_tables_from_page(self, page_id):
361+
"""
362+
Fetches html tables added to confluence page
363+
:param page_id: integer confluence page_id
364+
:return: json object with page_id, number_of_tables_in_page and list of list tables_content representing scrapepd tables
365+
"""
366+
try:
367+
page_content = self.get_page_by_id(page_id, expand="body.storage")["body"]["storage"]["value"]
368+
369+
if page_content:
370+
tables_raw = [
371+
[[cell.text for cell in row("th") + row("td")] for row in table("tr")]
372+
for table in BeautifulSoup(page_content, features="lxml")("table")
373+
]
374+
if len(tables_raw) > 0:
375+
return json.dumps(
376+
{
377+
"page_id": page_id,
378+
"number_of_tables_in_page": len(tables_raw),
379+
"tables_content": tables_raw,
380+
}
381+
)
382+
else:
383+
return {
384+
"No tables found for page: ": page_id,
385+
}
386+
else:
387+
return {"Page content is empty"}
388+
except HTTPError as e:
389+
if e.response.status_code == 404:
390+
# Raise ApiError as the documented reason is ambiguous
391+
log.error("Couldn't retrieve tables from page", page_id)
392+
raise ApiError(
393+
"There is no content with the given pageid, pageid params is not an integer "
394+
"or the calling user does not have permission to view the page",
395+
reason=e,
396+
)
397+
except Exception as e:
398+
log.error("Error occured", e)
399+
359400
def get_page_labels(self, page_id, prefix=None, start=None, limit=None):
360401
"""
361402
Returns the list of labels on a piece of Content.

docs/confluence.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ Page actions
152152
# Add comment into page
153153
confluence.add_comment(page_id, text)
154154
155+
# Fetch tables from Confluence page
156+
confluence.get_page_tables(page_id)
157+
155158
Template actions
156159
----------------
157160

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from atlassian import Confluence
2+
import logging
3+
4+
confluence = Confluence(
5+
url="<instance_url>",
6+
username="<user_enamil>",
7+
password="api_key",
8+
)
9+
page_id = 393464
10+
logging.basicConfig(level=logging.INFO)
11+
# Page_id is the page id of the page you want to get the tables from.
12+
13+
result = confluence.get_tables_from_page(page_id)
14+
print(result)
15+
# Let's say page has two table, each one has 3 columns and 2 rows'
16+
# Method should return following output: {"page_id": 393464, "number_of_tables_in_page": 2, "tables_content": [[["header1", "header2", "header3"], ["h1r1", "h2r1", "h3r1"], ["h1r2", "h2r2", "h3r2"]], [["table2 header1", "table2 header2", "table2 header3"], ["h1r1", "h2r1", "h3r1"], ["h1r2", "h2r2", "h3r2"]]]}
17+
# tables_content is a list of lists of lists. Each nested list represents a table. Each nested list inside a table represents a row.

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@ six
44
oauthlib
55
requests_oauthlib
66
requests-kerberos==0.14.0
7+
bs4
8+
lxml

tox.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ deps =
1111
pytest-cov
1212
coverage
1313
requests
14+
bs4
1415
commands =
1516
coverage erase
1617
pytest -v --cov=atlassian --cov-branch --cov-report=xml

0 commit comments

Comments
 (0)