diff --git a/CHANGELOG.md b/CHANGELOG.md index af33bf5cf..c9fd765d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - import: Support for nested arrays in odcs v3 importer - lint: ODCS schema is now checked before converting +- Markdown exporter generates bigger tables instead of including HTML break to create multiline table cells (#832) +- Markdown exporter adds a newline at the beginning of each bulletpoint, to improve compatibility with some readers, like markdown-to-confluence (#832) ### Fixed diff --git a/datacontract/export/markdown_converter.py b/datacontract/export/markdown_converter.py index 9ddebfd9c..da0202eba 100644 --- a/datacontract/export/markdown_converter.py +++ b/datacontract/export/markdown_converter.py @@ -87,16 +87,55 @@ def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_ def servers_to_markdown(servers: Dict[str, Server]) -> str: + """ + Generate Markdown representation for all server models inside the given dictionary. + + Args: + servers (Dict): The dictionary with the following structure: + server_name (str): The name of the server. + server (Server): The server model. + + The table will not contain parameters of server models that are left as `None` or unset. + Parameters exclusive to at least one server model will be included in the table, + and for the servers that have them, but will generate a blank string for server that don't have them. + + Returns: + str: The Markdown representation of a table, that describes all servers from the dictionary. + """ if not servers: return "" - markdown_parts = [ - "| Name | Type | Attributes |", - "| ---- | ---- | ---------- |", - ] + attributes = [] + for server in tuple(servers.keys()): + parameters = servers[server].model_dump(exclude_unset=True) + parameters.pop("type") + for parameter in tuple(parameters.keys()): + if parameters[parameter] is not None: + if parameter not in attributes: + attributes.append(parameter) + attributes = tuple(attributes) + dashed_attributes = tuple(["-" * len(attr) for attr in attributes]) + markdown_parts = ["| Server | Type |", "| ------ | ---- |"] + there_are_attributes = len(attributes) > 0 + if there_are_attributes: + markdown_parts[0] += f" {' | '.join(attributes)} |" + markdown_parts[1] += f" {' | '.join(dashed_attributes)} |" for server_name, server in servers.items(): - markdown_parts.append( - f"| {server_name} | {server.type or ''} | {obj_attributes_to_markdown(server, {'type'}, True)} |" - ) + attributes_for_this_server = server.model_dump(exclude_unset=True) + attributes_for_this_server.pop("type") + add_these_attributes = [] + for attribute in tuple(attributes_for_this_server.keys()): + if attributes_for_this_server[attribute] is None: + attributes_for_this_server.pop(attribute) + for attribute in attributes: + if attribute not in attributes_for_this_server: + add_these_attributes.append("") + else: + add_these_attributes.append(str(attributes_for_this_server[attribute])) + add_these_attributes = " | ".join(add_these_attributes) + append_this = f"| {server_name} | {server.type or ''} |" + if there_are_attributes: + append_this += f" {add_these_attributes} |" + markdown_parts.append(append_this) return "\n".join(markdown_parts) @@ -115,16 +154,37 @@ def model_to_markdown(model_name: str, model: Model) -> str: Returns: str: The Markdown representation of the model. """ - parts = [ - f"### {model_name}", - f"*{description_to_markdown(model.description)}*", - "", - "| Field | Type | Attributes |", - "| ----- | ---- | ---------- |", - ] + attributes = [] + for field in tuple(model.fields.keys()): + parameters = model.fields[field].model_dump(exclude_unset=True) + for parameter in tuple(parameters.keys()): + if parameters[parameter] is not None: + if parameter not in attributes: + attributes.append(parameter) + attributes = tuple(attributes) + dashed_attributes = tuple(["-" * len(attr) for attr in attributes]) + + parts = [f"### {model_name}", f"*{description_to_markdown(model.description)}*", ""] + there_are_attributes = len(attributes) > 0 + assert there_are_attributes + parts.append(f"| {' | '.join(attributes)} |") + parts.append(f"| {' | '.join(dashed_attributes)} |") # Append generated field rows - parts.append(fields_to_markdown(model.fields)) + for field in tuple(model.fields.keys()): + append_this = [] + parameters = model.fields[field].model_dump(exclude_unset=True) + for parameter in tuple(parameters.keys()): + if parameters[parameter] is None: + parameters.pop(parameter) + for attribute in attributes: + if attribute in parameters: + append_this.append(str(parameters[attribute])) + else: + append_this.append("") + append_this = f"| {' | '.join(append_this)} |" + parts.append(append_this) + return "\n".join(parts) diff --git a/tests/fixtures/markdown/export/expected.md b/tests/fixtures/markdown/export/expected.md index 15e83af1b..b0b01f8ab 100644 --- a/tests/fixtures/markdown/export/expected.md +++ b/tests/fixtures/markdown/export/expected.md @@ -7,10 +7,10 @@ - **contact:** {'name': 'John Doe (Data Product Owner)', 'url': 'https://teams.microsoft.com/l/channel/example/checkout'} ## Servers -| Name | Type | Attributes | -| ---- | ---- | ---------- | -| production | s3 | *One folder per model. One file per day.*
• **environment:** prod
• **format:** json
• **delimiter:** new_line
• **location:** s3://datacontract-example-orders-latest/v2/{model}/*.json
• **roles:** [{'name': 'analyst_us', 'description': 'Access to the data for US region'}, {'name': 'analyst_cn', 'description': 'Access to the data for China region'}] | -| development | s3 | *One folder per model. One file per day.*
• **environment:** dev
• **format:** json
• **delimiter:** new_line
• **location:** s3://datacontract-example-orders-latest/v2/{model}/*.json
• **roles:** [{'name': 'analyst_us', 'description': 'Access to the data for US region'}, {'name': 'analyst_cn', 'description': 'Access to the data for China region'}] | +| Server | Type | description | environment | format | delimiter | location | roles | +| ------ | ---- | ----------- | ----------- | ------ | --------- | -------- | ----- | +| production | s3 | One folder per model. One file per day. | prod | json | new_line | s3://datacontract-example-orders-latest/v2/{model}/*.json | [{'name': 'analyst_us', 'description': 'Access to the data for US region'}, {'name': 'analyst_cn', 'description': 'Access to the data for China region'}] | +| development | s3 | One folder per model. One file per day. | dev | json | new_line | s3://datacontract-example-orders-latest/v2/{model}/*.json | [{'name': 'analyst_us', 'description': 'Access to the data for US region'}, {'name': 'analyst_cn', 'description': 'Access to the data for China region'}] | ## Terms *No description.* @@ -50,22 +50,22 @@ This can help improve customer satisfaction and increase sales. ### orders *One record per order. Includes cancelled and deleted orders.* -| Field | Type | Attributes | -| ----- | ---- | ---------- | -| order_id | None | *No description.*
• **ref:** #/definitions/order_id
• `required`
• `primaryKey`
• `unique` | -| order_timestamp | timestamp | *The business timestamp in UTC when the order was successfully registered in the source system and the payment was successful.*
• `required`
• **tags:** ['business-timestamp']
• **examples:** ['2024-09-09T08:30:00Z'] | -| order_total | long | *Total amount the smallest monetary unit (e.g., cents).*
• `required`
• **examples:** [9999] | -| customer_id | text | *Unique identifier for the customer.*
• **minLength:** 10
• **maxLength:** 20 | -| customer_email_address | text | *The email address, as entered by the customer.*
• **format:** email
• `required`
• `pii`
• **classification:** sensitive
• **quality:** [{'type': 'text', 'description': 'The email address is not verified and may be invalid.'}]
• **lineage:** {'inputFields': [{'namespace': 'com.example.service.checkout', 'name': 'checkout_db.orders', 'field': 'email_address'}]} | -| processed_timestamp | timestamp | *The timestamp when the record was processed by the data platform.*
• `required`
• **config:** {'jsonType': 'string', 'jsonFormat': 'date-time'} | +| ref | title | type | format | required | primaryKey | unique | description | pii | classification | tags | examples | minLength | maxLength | quality | lineage | config | +| --- | ----- | ---- | ------ | -------- | ---------- | ------ | ----------- | --- | -------------- | ---- | -------- | --------- | --------- | ------- | ------- | ------ | +| #/definitions/order_id | Order ID | text | uuid | True | True | True | An internal ID that identifies an order in the online shop. | True | restricted | ['orders'] | ['243c25e5-a081-43a9-aeab-6d5d5b6cb5e2'] | | | | | | +| | | timestamp | | True | | | The business timestamp in UTC when the order was successfully registered in the source system and the payment was successful. | | | ['business-timestamp'] | ['2024-09-09T08:30:00Z'] | | | | | | +| | | long | | True | | | Total amount the smallest monetary unit (e.g., cents). | | | | [9999] | | | | | | +| | | text | | | | | Unique identifier for the customer. | | | | | 10 | 20 | | | | +| | | text | email | True | | | The email address, as entered by the customer. | True | sensitive | | | | | [{'type': 'text', 'description': 'The email address is not verified and may be invalid.'}] | {'inputFields': [{'namespace': 'com.example.service.checkout', 'name': 'checkout_db.orders', 'field': 'email_address'}]} | | +| | | timestamp | | True | | | The timestamp when the record was processed by the data platform. | | | | | | | | | {'jsonType': 'string', 'jsonFormat': 'date-time'} | ### line_items *A single article that is part of an order.* -| Field | Type | Attributes | -| ----- | ---- | ---------- | -| line_item_id | text | *Primary key of the lines_item_id table*
• `required` | -| order_id | None | *No description.*
• **ref:** #/definitions/order_id
• **references:** orders.order_id | -| sku | None | *The purchased article number*
• **ref:** #/definitions/sku | +| type | required | description | ref | title | format | references | pii | classification | tags | examples | pattern | links | +| ---- | -------- | ----------- | --- | ----- | ------ | ---------- | --- | -------------- | ---- | -------- | ------- | ----- | +| text | True | Primary key of the lines_item_id table | | | | | | | | | | | +| text | | An internal ID that identifies an order in the online shop. | #/definitions/order_id | Order ID | uuid | orders.order_id | True | restricted | ['orders'] | ['243c25e5-a081-43a9-aeab-6d5d5b6cb5e2'] | | | +| text | | The purchased article number | #/definitions/sku | Stock Keeping Unit | | | | | ['inventory'] | ['96385074'] | ^[A-Za-z0-9]{8,14}$ | {'wikipedia': 'https://en.wikipedia.org/wiki/Stock_keeping_unit'} | ## Definitions | Name | Type | Domain | Attributes |