diff --git a/CHANGELOG.md b/CHANGELOG.md
index af33bf5cf..c9fd765d3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- import: Support for nested arrays in odcs v3 importer
- lint: ODCS schema is now checked before converting
+- Markdown exporter generates bigger tables instead of including HTML break to create multiline table cells (#832)
+- Markdown exporter adds a newline at the beginning of each bulletpoint, to improve compatibility with some readers, like markdown-to-confluence (#832)
### Fixed
diff --git a/datacontract/export/markdown_converter.py b/datacontract/export/markdown_converter.py
index 9ddebfd9c..da0202eba 100644
--- a/datacontract/export/markdown_converter.py
+++ b/datacontract/export/markdown_converter.py
@@ -87,16 +87,55 @@ def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_
def servers_to_markdown(servers: Dict[str, Server]) -> str:
+ """
+ Generate Markdown representation for all server models inside the given dictionary.
+
+ Args:
+ servers (Dict): The dictionary with the following structure:
+ server_name (str): The name of the server.
+ server (Server): The server model.
+
+ The table will not contain parameters of server models that are left as `None` or unset.
+ Parameters exclusive to at least one server model will be included in the table,
+ and for the servers that have them, but will generate a blank string for server that don't have them.
+
+ Returns:
+ str: The Markdown representation of a table, that describes all servers from the dictionary.
+ """
if not servers:
return ""
- markdown_parts = [
- "| Name | Type | Attributes |",
- "| ---- | ---- | ---------- |",
- ]
+ attributes = []
+ for server in tuple(servers.keys()):
+ parameters = servers[server].model_dump(exclude_unset=True)
+ parameters.pop("type")
+ for parameter in tuple(parameters.keys()):
+ if parameters[parameter] is not None:
+ if parameter not in attributes:
+ attributes.append(parameter)
+ attributes = tuple(attributes)
+ dashed_attributes = tuple(["-" * len(attr) for attr in attributes])
+ markdown_parts = ["| Server | Type |", "| ------ | ---- |"]
+ there_are_attributes = len(attributes) > 0
+ if there_are_attributes:
+ markdown_parts[0] += f" {' | '.join(attributes)} |"
+ markdown_parts[1] += f" {' | '.join(dashed_attributes)} |"
for server_name, server in servers.items():
- markdown_parts.append(
- f"| {server_name} | {server.type or ''} | {obj_attributes_to_markdown(server, {'type'}, True)} |"
- )
+ attributes_for_this_server = server.model_dump(exclude_unset=True)
+ attributes_for_this_server.pop("type")
+ add_these_attributes = []
+ for attribute in tuple(attributes_for_this_server.keys()):
+ if attributes_for_this_server[attribute] is None:
+ attributes_for_this_server.pop(attribute)
+ for attribute in attributes:
+ if attribute not in attributes_for_this_server:
+ add_these_attributes.append("")
+ else:
+ add_these_attributes.append(str(attributes_for_this_server[attribute]))
+ add_these_attributes = " | ".join(add_these_attributes)
+ append_this = f"| {server_name} | {server.type or ''} |"
+ if there_are_attributes:
+ append_this += f" {add_these_attributes} |"
+ markdown_parts.append(append_this)
return "\n".join(markdown_parts)
@@ -115,16 +154,37 @@ def model_to_markdown(model_name: str, model: Model) -> str:
Returns:
str: The Markdown representation of the model.
"""
- parts = [
- f"### {model_name}",
- f"*{description_to_markdown(model.description)}*",
- "",
- "| Field | Type | Attributes |",
- "| ----- | ---- | ---------- |",
- ]
+ attributes = []
+ for field in tuple(model.fields.keys()):
+ parameters = model.fields[field].model_dump(exclude_unset=True)
+ for parameter in tuple(parameters.keys()):
+ if parameters[parameter] is not None:
+ if parameter not in attributes:
+ attributes.append(parameter)
+ attributes = tuple(attributes)
+ dashed_attributes = tuple(["-" * len(attr) for attr in attributes])
+
+ parts = [f"### {model_name}", f"*{description_to_markdown(model.description)}*", ""]
+ there_are_attributes = len(attributes) > 0
+ assert there_are_attributes
+ parts.append(f"| {' | '.join(attributes)} |")
+ parts.append(f"| {' | '.join(dashed_attributes)} |")
# Append generated field rows
- parts.append(fields_to_markdown(model.fields))
+ for field in tuple(model.fields.keys()):
+ append_this = []
+ parameters = model.fields[field].model_dump(exclude_unset=True)
+ for parameter in tuple(parameters.keys()):
+ if parameters[parameter] is None:
+ parameters.pop(parameter)
+ for attribute in attributes:
+ if attribute in parameters:
+ append_this.append(str(parameters[attribute]))
+ else:
+ append_this.append("")
+ append_this = f"| {' | '.join(append_this)} |"
+ parts.append(append_this)
+
return "\n".join(parts)
diff --git a/tests/fixtures/markdown/export/expected.md b/tests/fixtures/markdown/export/expected.md
index 15e83af1b..b0b01f8ab 100644
--- a/tests/fixtures/markdown/export/expected.md
+++ b/tests/fixtures/markdown/export/expected.md
@@ -7,10 +7,10 @@
- **contact:** {'name': 'John Doe (Data Product Owner)', 'url': 'https://teams.microsoft.com/l/channel/example/checkout'}
## Servers
-| Name | Type | Attributes |
-| ---- | ---- | ---------- |
-| production | s3 | *One folder per model. One file per day.*
• **environment:** prod
• **format:** json
• **delimiter:** new_line
• **location:** s3://datacontract-example-orders-latest/v2/{model}/*.json
• **roles:** [{'name': 'analyst_us', 'description': 'Access to the data for US region'}, {'name': 'analyst_cn', 'description': 'Access to the data for China region'}] |
-| development | s3 | *One folder per model. One file per day.*
• **environment:** dev
• **format:** json
• **delimiter:** new_line
• **location:** s3://datacontract-example-orders-latest/v2/{model}/*.json
• **roles:** [{'name': 'analyst_us', 'description': 'Access to the data for US region'}, {'name': 'analyst_cn', 'description': 'Access to the data for China region'}] |
+| Server | Type | description | environment | format | delimiter | location | roles |
+| ------ | ---- | ----------- | ----------- | ------ | --------- | -------- | ----- |
+| production | s3 | One folder per model. One file per day. | prod | json | new_line | s3://datacontract-example-orders-latest/v2/{model}/*.json | [{'name': 'analyst_us', 'description': 'Access to the data for US region'}, {'name': 'analyst_cn', 'description': 'Access to the data for China region'}] |
+| development | s3 | One folder per model. One file per day. | dev | json | new_line | s3://datacontract-example-orders-latest/v2/{model}/*.json | [{'name': 'analyst_us', 'description': 'Access to the data for US region'}, {'name': 'analyst_cn', 'description': 'Access to the data for China region'}] |
## Terms
*No description.*
@@ -50,22 +50,22 @@ This can help improve customer satisfaction and increase sales.
### orders
*One record per order. Includes cancelled and deleted orders.*
-| Field | Type | Attributes |
-| ----- | ---- | ---------- |
-| order_id | None | *No description.*
• **ref:** #/definitions/order_id
• `required`
• `primaryKey`
• `unique` |
-| order_timestamp | timestamp | *The business timestamp in UTC when the order was successfully registered in the source system and the payment was successful.*
• `required`
• **tags:** ['business-timestamp']
• **examples:** ['2024-09-09T08:30:00Z'] |
-| order_total | long | *Total amount the smallest monetary unit (e.g., cents).*
• `required`
• **examples:** [9999] |
-| customer_id | text | *Unique identifier for the customer.*
• **minLength:** 10
• **maxLength:** 20 |
-| customer_email_address | text | *The email address, as entered by the customer.*
• **format:** email
• `required`
• `pii`
• **classification:** sensitive
• **quality:** [{'type': 'text', 'description': 'The email address is not verified and may be invalid.'}]
• **lineage:** {'inputFields': [{'namespace': 'com.example.service.checkout', 'name': 'checkout_db.orders', 'field': 'email_address'}]} |
-| processed_timestamp | timestamp | *The timestamp when the record was processed by the data platform.*
• `required`
• **config:** {'jsonType': 'string', 'jsonFormat': 'date-time'} |
+| ref | title | type | format | required | primaryKey | unique | description | pii | classification | tags | examples | minLength | maxLength | quality | lineage | config |
+| --- | ----- | ---- | ------ | -------- | ---------- | ------ | ----------- | --- | -------------- | ---- | -------- | --------- | --------- | ------- | ------- | ------ |
+| #/definitions/order_id | Order ID | text | uuid | True | True | True | An internal ID that identifies an order in the online shop. | True | restricted | ['orders'] | ['243c25e5-a081-43a9-aeab-6d5d5b6cb5e2'] | | | | | |
+| | | timestamp | | True | | | The business timestamp in UTC when the order was successfully registered in the source system and the payment was successful. | | | ['business-timestamp'] | ['2024-09-09T08:30:00Z'] | | | | | |
+| | | long | | True | | | Total amount the smallest monetary unit (e.g., cents). | | | | [9999] | | | | | |
+| | | text | | | | | Unique identifier for the customer. | | | | | 10 | 20 | | | |
+| | | text | email | True | | | The email address, as entered by the customer. | True | sensitive | | | | | [{'type': 'text', 'description': 'The email address is not verified and may be invalid.'}] | {'inputFields': [{'namespace': 'com.example.service.checkout', 'name': 'checkout_db.orders', 'field': 'email_address'}]} | |
+| | | timestamp | | True | | | The timestamp when the record was processed by the data platform. | | | | | | | | | {'jsonType': 'string', 'jsonFormat': 'date-time'} |
### line_items
*A single article that is part of an order.*
-| Field | Type | Attributes |
-| ----- | ---- | ---------- |
-| line_item_id | text | *Primary key of the lines_item_id table*
• `required` |
-| order_id | None | *No description.*
• **ref:** #/definitions/order_id
• **references:** orders.order_id |
-| sku | None | *The purchased article number*
• **ref:** #/definitions/sku |
+| type | required | description | ref | title | format | references | pii | classification | tags | examples | pattern | links |
+| ---- | -------- | ----------- | --- | ----- | ------ | ---------- | --- | -------------- | ---- | -------- | ------- | ----- |
+| text | True | Primary key of the lines_item_id table | | | | | | | | | | |
+| text | | An internal ID that identifies an order in the online shop. | #/definitions/order_id | Order ID | uuid | orders.order_id | True | restricted | ['orders'] | ['243c25e5-a081-43a9-aeab-6d5d5b6cb5e2'] | | |
+| text | | The purchased article number | #/definitions/sku | Stock Keeping Unit | | | | | ['inventory'] | ['96385074'] | ^[A-Za-z0-9]{8,14}$ | {'wikipedia': 'https://en.wikipedia.org/wiki/Stock_keeping_unit'} |
## Definitions
| Name | Type | Domain | Attributes |