Skip to content
Merged
Show file tree
Hide file tree
Changes from 77 commits
Commits
Show all changes
82 commits
Select commit Hold shift + click to select a range
576ae66
feat:spec-dataframe-utility
Feb 21, 2025
35406ef
fix:QuerySpecsOptionalResponseModel
Feb 24, 2025
50a4e00
Merge branch 'users/sam-rishi/fix-spec-response-model' of https://git…
Feb 24, 2025
9a0c7a8
fix:PullResponseModelBranch
Feb 24, 2025
52dfaee
fix:PR Comments
Feb 24, 2025
4fcd9c8
Merge branch 'users/sam-rishi/fix-spec-response-model' of https://git…
Feb 24, 2025
1362c3d
test:IntegrationTests
Feb 24, 2025
58e5bd8
feat: add projection and update response models
Shri2109 Feb 24, 2025
abba747
fix:PRComments
Feb 25, 2025
b6df853
refactor: update query products response model
Shri2109 Feb 25, 2025
fee23c0
feat:NewResponseModel
Feb 25, 2025
4ec24f7
Merge branch 'users/sam-rishi/fix-spec-response-model' of https://git…
Feb 25, 2025
8a53dd2
fix:PRComments
Feb 25, 2025
f94c805
refactor: include a common products response
Shri2109 Feb 25, 2025
256b80f
fix:DeleteUnwantedResponseModel
Feb 25, 2025
c10dd6e
Merge branch 'users/sam-rishi/fix-spec-response-model' of https://git…
Feb 25, 2025
c84db16
refactor: include unqiue models for product request and response
Shri2109 Feb 25, 2025
06118e0
refactor: change from ProductRequest to Product
Shri2109 Feb 26, 2025
b619beb
fix:PRComments
Feb 26, 2025
38a69f0
Merge branch 'users/sam-rishi/fix-spec-response-model' of https://git…
Feb 26, 2025
de85d37
fix:PRComments
Feb 26, 2025
7872a98
Merge branch 'users/sam-rishi/fix-projection' of https://github.com/s…
Feb 26, 2025
10714ad
fix:PRComments
Feb 26, 2025
a48b2a8
Merge branch 'users/sam-rishi/fix-spec-response-model' of https://git…
Feb 26, 2025
b1ec916
fix:PRCommnets
Feb 26, 2025
3cf7a28
refactor: add separate projection and order-by enums and add update r…
Shri2109 Feb 26, 2025
fb9653a
refactor: update request models and product enums
Shri2109 Feb 26, 2025
f0eefca
Merge pull request #3 from shri2k2/users/sam-rishi/fix-spec-response-…
sam-rishi Feb 27, 2025
8ddd06a
fix:PRComments
Feb 27, 2025
ac3e92f
fix:PRCommentsexit
Feb 27, 2025
cfb7835
refactor: update specification enum name
Shri2109 Feb 27, 2025
e35715b
feat: add retry mechanism for both specs and product clients
Shri2109 Feb 28, 2025
8d3d411
fix:PRComments
Feb 28, 2025
5ad070b
Merge branch 'users/shriram/fix-projection' of https://github.com/shr…
Feb 28, 2025
dff0ac1
fix:MergeChanges
Feb 28, 2025
edd39ed
fix:PRComments
Mar 1, 2025
dd17ab0
fix:PRComments
Mar 1, 2025
8286a63
fix:PRComments
Mar 1, 2025
1e73458
test: reformat spec column projection test case
Shri2109 Mar 3, 2025
070d68f
refactor: change from ProductResponse to Product
Shri2109 Mar 3, 2025
475a06c
fix:PRComments
Mar 3, 2025
454b2bc
Merge branch 'users/shriram/fix-projection' of https://github.com/shr…
Mar 3, 2025
4f88b24
fix:PRCOmments
Mar 3, 2025
8f83083
fix:PRComments
Mar 3, 2025
9b9bcdc
Merge branch 'users/shriram/fix-projection' of https://github.com/shr…
Mar 3, 2025
023e3a7
fix:PRComments
Mar 3, 2025
18de35d
Merge branch 'users/shriram/fix-projection' of https://github.com/shr…
Mar 3, 2025
d4c6507
fix:SpecsResponseMerge
Mar 3, 2025
d7edfdf
fix:PRComments
Mar 3, 2025
874a38a
Merge branch 'users/shriram/fix-projection' of https://github.com/shr…
Mar 3, 2025
27f6967
fix:PRComments
Mar 3, 2025
1fab252
fix:PRComments
Mar 3, 2025
55bed9b
fix:PRComments
Mar 3, 2025
9606ab8
Merge branch 'users/shriram/fix-projection' of https://github.com/shr…
Mar 3, 2025
8e2cce9
fix:MergeSpecsModel
Mar 3, 2025
110c2da
refactor:SpecsModelsAndProductDocstring
Mar 4, 2025
c22cc97
Merge branch 'users/shriram/fix-projection' of https://github.com/shr…
Mar 4, 2025
32d94a7
fix:PRComments
Mar 4, 2025
9410c88
refactor:ConditionTestcase
Mar 4, 2025
fc4b24a
fix:PRComment
Mar 4, 2025
19d2a3d
Merge branch 'master' of https://github.com/shri2k2/nisystemlink-clie…
Mar 5, 2025
4ee7ea9
refactor:DataframeUtilitiesWithoutBatchQuery
Mar 6, 2025
b5de173
refactor:UnitTestClassName
Mar 6, 2025
79666a5
refactor:DataframeUtilityAndUnitTests
RSam-NI Mar 6, 2025
8b0d443
refactor:ExpectedsDataframeHelperMethod
Mar 7, 2025
b68266a
fix:PRComments
Mar 7, 2025
b11ec4a
refactor:CallableConditionFormatArgument
Mar 7, 2025
2961d83
refactor:docstring
Mar 7, 2025
3fdcc65
fix:PRComments
Mar 7, 2025
bf83d70
fix:DebugStatements
Mar 7, 2025
68acd58
fix:PRComments
Mar 10, 2025
719c965
test:serialize_conditions_to_string
Mar 10, 2025
ab76142
refactor:ConditionFormatting
Mar 11, 2025
fc8cabd
fix:PoetryLockFile
Mar 11, 2025
7e0a9da
refactor:DocstringForPublicMethod
Mar 11, 2025
f89ae64
refactor:ExposeConditionFormatMethod
Mar 11, 2025
9e1c940
fix:PRComments
Mar 11, 2025
b39df67
refactor:AddCondtionPerRow
Mar 12, 2025
85f4c14
fix:PRComments
Mar 13, 2025
1bdd4e6
fix:pyproject.toml
Mar 13, 2025
f79f0ff
refactor:StringLiteralSyntax
Mar 13, 2025
9322228
fix:StringLiteral
Mar 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions nisystemlink/clients/spec/utilities/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from ._dataframe_utilities import (
convert_specs_to_dataframe,
summarize_conditions_as_a_string,
)

# flake8: noqa
6 changes: 6 additions & 0 deletions nisystemlink/clients/spec/utilities/_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class DataFrameHeaders:
CONDITION_COLUMN_HEADER_PREFIX = "condition_"

PROPERTY_COLUMN_HEADER_PREFIX = "properties."

KEYWORDS_COLUMN_HEADER = "keywords"
330 changes: 330 additions & 0 deletions nisystemlink/clients/spec/utilities/_dataframe_utilities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,330 @@
from typing import Any, Callable, Dict, List, Optional, Union

import pandas as pd
from nisystemlink.clients.spec.models._condition import (
Condition,
NumericConditionValue,
StringConditionValue,
)
from nisystemlink.clients.spec.models._specification import (
Specification,
SpecificationLimit,
SpecificationType,
)
from nisystemlink.clients.spec.utilities._constants import DataFrameHeaders


def summarize_conditions_as_a_string(
conditions: List[Condition],
) -> List[Dict[str, str]]:
"""Converts the condition values to an easily readable string format that summarizes
either of numeric or string condition.

Args:
conditions: List of all conditions in a spec.

Returns:
Conditions as a list of dictionary. The dictionary key will be
"condition_<conditionName>(<conditionUnit>)".
The dictionary value will be "[min: num; max: num, step: num], num, num"
where data within the '[]' is numeric condition range and other num
values are numeric condition discrete values.
The dictionary value will be "str, str, str" - where str values are the
condition discrete values for a string condition. If the condition doesn't
have a name and value, it will be skipped.
"""
return [
{
__generate_condition_column_header(condition): ", ".join(
__serialize_condition_value(condition)
)
for condition in conditions
if condition.name and condition.value
}
]


def convert_specs_to_dataframe(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add an optional parameter set_spec_id_product_id_workspace_as_index which can be useful for further mapping with other df?

@suriyaprashath-ni, @santhoshramaraj?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all three combined only make a unique combination

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We didn't discuss it in the meeting this morning, but what you do with the conditions mapping function would impact what you'd want the index to be. Its also not clear to me with this parameter if that would mean you're creating a MultiIndex index or a new column that is the serialized combination of these columns. I would expect a MultiIndex would be more useful. Either way doing this would impact your data access in ways that probably would not help usability. Workspace and product IDs are not very commonly known, compared with the their name and part number equivalents.

I'm inclined to leave this unindexed or indexed by id if it is available in the projection (not specId) and let the caller reindex in whatever way makes sense for them.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unindexed is fine if the multi-index might not be that useful. Indexing by Id of spec might also not be that useful as it is not generally used that much in other scenarios. For eg, for mapping a spec to a step, SpecId is generally used by users instead of id.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm no pandas expert but as far as I can tell you do not need to join by index, you can join (merge technically) by any column. Presumably it is faster if you join by index, and it looks like the index is maintained across the join (as a MultiIndex) which can have some value.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general, MultiIndex is an advanced topic in pandas. It is uncommon for users to encounter it initially. It is best to leave it unindexed and let the user decide what columns to merge by.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need to create an example for this similar to how we create example for client APIs as we are exposing a callback function. We can have the use the same code of our default condition format as the example too

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Examples for the other data frames would be useful, too, but it doesn't need to be in the same PR as the functionality if that is easier for your team.

specs: List[Specification],
condition_format: Optional[
Callable[[List[Condition]], List[Dict[str, Any]]]
] = None,
) -> pd.DataFrame:
"""Creates a Pandas DataFrame for the specs.

Args:
specs: List of specs.
condition_format: A callback function which takes in a list of condition of a spec and returns
a list of dictionary of condition and its values. The dictionary keys
should be the condition name and the values should be the condition
value in any format you need. Dataframe rows will be constructed based on
these list of dictionaries. Each dictionary in the list indicates a row.
If there is more than one dictionary in the list, it will be considered as a new
row and other spec column data will be duplicated. Keys will be used as the dataframe
column header and values will be used as the row cells for the
respective column header.
If not passed or None is passed, condition column header will be condition name and
corresponding row value will be condition value.
For all the condition columns to be grouped together in the dataframe,
the dictionary key should have the prefix "condition_".
If condition value is needed as a string summary of condition data, the public method
`summarize_conditions_as_a_string` can be provided as this callback function.

Returns:
A Pandas DataFrame with the each spec fields having a separate column.
Following fields are split into sub-columns.
- conditions: format of the condition columns are decided by the `condition_format`
argument of this function.
- Properties: All the unique properties across all specs will be split into separate columns.
For example, properties.property1, properties.property2, etc.
"""
condition_format = condition_format or __default_condition_formatting

specs_dict = [
__convert_spec_to_dict(spec=spec, condition=condition)
for spec in specs
for condition in (
condition_format(spec.conditions) if spec.conditions else [{}]
)
]

specs_dataframe = pd.json_normalize(specs_dict)
specs_dataframe = __format_specs_columns(specs_dataframe=specs_dataframe)
specs_dataframe.dropna(axis="columns", how="all", inplace=True)

return specs_dataframe


def __convert_spec_to_dict(
spec: Specification, condition: Dict[str, Any]
) -> Dict[str, Any]:
"""Converts a spec into dictionary.

Args:
spec: Spec object.
condition: Condition as a dictionary which is added to the output spec dictionary.

Returns:
Spec as a dictionary with the provided condition dictionary included.
"""
return {
**{
key: value
for key, value in vars(spec).items()
if key not in ["type", "limit", "conditions"]
},
**(__serialize_type(spec.type) if spec.type else {}),
**(__serialize_limits(spec.limit) if spec.limit else {}),
**{key: value for key, value in condition.items()},
}


def __default_condition_formatting(
conditions: List[Condition],
) -> List[Dict[str, Any]]:
"""Convert conditions into list of dictionaries where dictionary key will be condition name
and dictionary value will be condition value.

Args:
conditions: List of all conditions in a spec.

Returns:
Conditions as a list of dictionary. The key will be
the condition name and the value will be the condition value which is
either Numeric Condition Value, String Condition Value or None.
"""
return [
{
condition.name: condition.value
for condition in conditions
if condition.name and condition.value
}
]


def __serialize_limits(limit: SpecificationLimit) -> Dict[str, str]:
"""Serialize limit into limit.min, limit.typical and limit.max.

Args:
limit: Limit of a spec.

Returns:
Limit as a dictionary.
"""
return {f"limit.{key}": value for key, value in vars(limit).items()}


def __serialize_type(type: SpecificationType) -> Dict[str, str]:
"""Serialize type into it's string value.

Args:
type: Type of a spec.

Returns:
Type as a dictionary.
"""
return {"type": type.name}


def __format_specs_columns(specs_dataframe: pd.DataFrame) -> pd.DataFrame:
"""Format specs column to group conditions and keep properties and keywords at the end.

Args:
specs_dataframe: Dataframe of specs.

Returns:
Formatted dataframe of specs.
"""
column_headers = specs_dataframe.columns.to_list()
standard_column_headers = [
header for header in column_headers if __is_standard_column_header(header)
]
condition_headers = [
header for header in column_headers if __is_condition_header(header=header)
]
properties_headers = [
header for header in column_headers if __is_property_header(header=header)
]
formatted_column_headers = (
standard_column_headers
+ condition_headers
+ (["keywords"] if "keywords" in column_headers else [])
+ properties_headers
)

return specs_dataframe.reindex(columns=formatted_column_headers, copy=False)


def __is_standard_column_header(header: str) -> bool:
"""Check if column header is not a condition, property or keywords.

Args:
header: column header for specs dataframe.

Returns:
True if header doesn't start with condition_, properties. or keywords. Else returns false.

"""
return not (
__is_condition_header(header=header)
or __is_property_header(header=header)
or __is_keywords_header(header=header)
)


def __is_condition_header(header: str) -> bool:
"""Check if column header is not a condition.

Args:
header: column header for specs dataframe.

Returns:
True if header contains 'condition_'. Else returns false.

"""
return header.startswith(DataFrameHeaders.CONDITION_COLUMN_HEADER_PREFIX)


def __is_property_header(header: str) -> bool:
"""Check if column header is not a property.

Args:
header: column header for specs dataframe.

Returns:
True if header contains 'properties.'. Else returns false.

"""
return header.startswith(DataFrameHeaders.PROPERTY_COLUMN_HEADER_PREFIX)


def __is_keywords_header(header: str) -> bool:
"""Check if column header is not a keywords.

Args:
header: column header for specs dataframe.

Returns:
True if header equals 'keywords'. Else returns false.

"""
return header == DataFrameHeaders.KEYWORDS_COLUMN_HEADER


def __generate_condition_column_header(condition: Condition) -> str:
"""Generate column header for a condition.

Args:
condition: Condition object for generating column header.

Returns:
The column header for the given condition.
"""
name = condition.name or ""
unit = (
f"({condition.value.unit})"
if isinstance(condition.value, NumericConditionValue) and condition.value.unit
else ""
)

return f"condition_{name}{unit}"


def __serialize_condition_value(condition: Condition) -> List[str]:
"""Get ranges and discrete values of a condition.

Args:
condition: Condition for getting values.

Returns:
The list of values of the given condition in a specific format.
"""
if not condition.value:
return []

values = []

if isinstance(condition.value, NumericConditionValue):
values.extend(__serialize_numeric_condition_range(value=condition.value))

values.extend(__serialize_condition_discrete_values(value=condition.value))

return values


def __serialize_numeric_condition_range(value: NumericConditionValue) -> List[str]:
"""Serialize ranges of a numeric condition value.

Args:
value: A condition's value with NumericConditionValue type.

Returns:
The list of ranges of the given condition where each range will be in
string format `[min: <value>; max: <value>; step: <value>]` if the corresponding
fields are not none.
"""
if not value.range:
return []

return [
f"[{'; '.join(
f'{range_key}: {range_value}'
for range_key, range_value in vars(range).items()
if range_value is not None
)}]"
for range in value.range
]


def __serialize_condition_discrete_values(
value: Union[NumericConditionValue, StringConditionValue]
) -> List[str]:
"""Serialize discrete values of a value.

Args:
value: A condition's value with either NumericConditionValue type or StringConditionValue type.

Returns:
The list of discrete values of the given value in a string format.
"""
return [str(discrete) for discrete in (value.discrete or [])]
Loading