Skip to content
Merged
Show file tree
Hide file tree
Changes from 72 commits
Commits
Show all changes
82 commits
Select commit Hold shift + click to select a range
576ae66
feat:spec-dataframe-utility
Feb 21, 2025
35406ef
fix:QuerySpecsOptionalResponseModel
Feb 24, 2025
50a4e00
Merge branch 'users/sam-rishi/fix-spec-response-model' of https://git…
Feb 24, 2025
9a0c7a8
fix:PullResponseModelBranch
Feb 24, 2025
52dfaee
fix:PR Comments
Feb 24, 2025
4fcd9c8
Merge branch 'users/sam-rishi/fix-spec-response-model' of https://git…
Feb 24, 2025
1362c3d
test:IntegrationTests
Feb 24, 2025
58e5bd8
feat: add projection and update response models
Shri2109 Feb 24, 2025
abba747
fix:PRComments
Feb 25, 2025
b6df853
refactor: update query products response model
Shri2109 Feb 25, 2025
fee23c0
feat:NewResponseModel
Feb 25, 2025
4ec24f7
Merge branch 'users/sam-rishi/fix-spec-response-model' of https://git…
Feb 25, 2025
8a53dd2
fix:PRComments
Feb 25, 2025
f94c805
refactor: include a common products response
Shri2109 Feb 25, 2025
256b80f
fix:DeleteUnwantedResponseModel
Feb 25, 2025
c10dd6e
Merge branch 'users/sam-rishi/fix-spec-response-model' of https://git…
Feb 25, 2025
c84db16
refactor: include unqiue models for product request and response
Shri2109 Feb 25, 2025
06118e0
refactor: change from ProductRequest to Product
Shri2109 Feb 26, 2025
b619beb
fix:PRComments
Feb 26, 2025
38a69f0
Merge branch 'users/sam-rishi/fix-spec-response-model' of https://git…
Feb 26, 2025
de85d37
fix:PRComments
Feb 26, 2025
7872a98
Merge branch 'users/sam-rishi/fix-projection' of https://github.com/s…
Feb 26, 2025
10714ad
fix:PRComments
Feb 26, 2025
a48b2a8
Merge branch 'users/sam-rishi/fix-spec-response-model' of https://git…
Feb 26, 2025
b1ec916
fix:PRCommnets
Feb 26, 2025
3cf7a28
refactor: add separate projection and order-by enums and add update r…
Shri2109 Feb 26, 2025
fb9653a
refactor: update request models and product enums
Shri2109 Feb 26, 2025
f0eefca
Merge pull request #3 from shri2k2/users/sam-rishi/fix-spec-response-…
sam-rishi Feb 27, 2025
8ddd06a
fix:PRComments
Feb 27, 2025
ac3e92f
fix:PRCommentsexit
Feb 27, 2025
cfb7835
refactor: update specification enum name
Shri2109 Feb 27, 2025
e35715b
feat: add retry mechanism for both specs and product clients
Shri2109 Feb 28, 2025
8d3d411
fix:PRComments
Feb 28, 2025
5ad070b
Merge branch 'users/shriram/fix-projection' of https://github.com/shr…
Feb 28, 2025
dff0ac1
fix:MergeChanges
Feb 28, 2025
edd39ed
fix:PRComments
Mar 1, 2025
dd17ab0
fix:PRComments
Mar 1, 2025
8286a63
fix:PRComments
Mar 1, 2025
1e73458
test: reformat spec column projection test case
Shri2109 Mar 3, 2025
070d68f
refactor: change from ProductResponse to Product
Shri2109 Mar 3, 2025
475a06c
fix:PRComments
Mar 3, 2025
454b2bc
Merge branch 'users/shriram/fix-projection' of https://github.com/shr…
Mar 3, 2025
4f88b24
fix:PRCOmments
Mar 3, 2025
8f83083
fix:PRComments
Mar 3, 2025
9b9bcdc
Merge branch 'users/shriram/fix-projection' of https://github.com/shr…
Mar 3, 2025
023e3a7
fix:PRComments
Mar 3, 2025
18de35d
Merge branch 'users/shriram/fix-projection' of https://github.com/shr…
Mar 3, 2025
d4c6507
fix:SpecsResponseMerge
Mar 3, 2025
d7edfdf
fix:PRComments
Mar 3, 2025
874a38a
Merge branch 'users/shriram/fix-projection' of https://github.com/shr…
Mar 3, 2025
27f6967
fix:PRComments
Mar 3, 2025
1fab252
fix:PRComments
Mar 3, 2025
55bed9b
fix:PRComments
Mar 3, 2025
9606ab8
Merge branch 'users/shriram/fix-projection' of https://github.com/shr…
Mar 3, 2025
8e2cce9
fix:MergeSpecsModel
Mar 3, 2025
110c2da
refactor:SpecsModelsAndProductDocstring
Mar 4, 2025
c22cc97
Merge branch 'users/shriram/fix-projection' of https://github.com/shr…
Mar 4, 2025
32d94a7
fix:PRComments
Mar 4, 2025
9410c88
refactor:ConditionTestcase
Mar 4, 2025
fc4b24a
fix:PRComment
Mar 4, 2025
19d2a3d
Merge branch 'master' of https://github.com/shri2k2/nisystemlink-clie…
Mar 5, 2025
4ee7ea9
refactor:DataframeUtilitiesWithoutBatchQuery
Mar 6, 2025
b5de173
refactor:UnitTestClassName
Mar 6, 2025
79666a5
refactor:DataframeUtilityAndUnitTests
RSam-NI Mar 6, 2025
8b0d443
refactor:ExpectedsDataframeHelperMethod
Mar 7, 2025
b68266a
fix:PRComments
Mar 7, 2025
b11ec4a
refactor:CallableConditionFormatArgument
Mar 7, 2025
2961d83
refactor:docstring
Mar 7, 2025
3fdcc65
fix:PRComments
Mar 7, 2025
bf83d70
fix:DebugStatements
Mar 7, 2025
68acd58
fix:PRComments
Mar 10, 2025
719c965
test:serialize_conditions_to_string
Mar 10, 2025
ab76142
refactor:ConditionFormatting
Mar 11, 2025
fc8cabd
fix:PoetryLockFile
Mar 11, 2025
7e0a9da
refactor:DocstringForPublicMethod
Mar 11, 2025
f89ae64
refactor:ExposeConditionFormatMethod
Mar 11, 2025
9e1c940
fix:PRComments
Mar 11, 2025
b39df67
refactor:AddCondtionPerRow
Mar 12, 2025
85f4c14
fix:PRComments
Mar 13, 2025
1bdd4e6
fix:pyproject.toml
Mar 13, 2025
f79f0ff
refactor:StringLiteralSyntax
Mar 13, 2025
9322228
fix:StringLiteral
Mar 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions nisystemlink/clients/spec/utilities/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from ._dataframe_utilities import convert_specs_to_dataframe

# flake8: noqa
6 changes: 6 additions & 0 deletions nisystemlink/clients/spec/utilities/_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class DataFrameHeaders:
CONDITION_COLUMN_HEADER_PREFIX = "condition_"

PROPERTY_COLUMN_HEADER_PREFIX = "properties."

KEYWORDS_COLUMN_HEADER = "keywords"
288 changes: 288 additions & 0 deletions nisystemlink/clients/spec/utilities/_dataframe_utilities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,288 @@
from typing import Callable, Dict, List, Optional, Union

import pandas as pd
from nisystemlink.clients.spec.models._condition import (
Condition,
NumericConditionValue,
StringConditionValue,
)
from nisystemlink.clients.spec.models._specification import (
Specification,
SpecificationLimit,
SpecificationType,
)
from nisystemlink.clients.spec.utilities._constants import DataFrameHeaders


def serialize_conditions_to_string(conditions: List[Condition]) -> Dict[str, str]:
"""Serialize conditions into desired format.

Args:
conditions: List of all conditions in a spec.

Returns:
Conditions as a dictionary. The column header will be
"condition_<conditionName>(<conditionUnit>)".
The column value will be "[min: num; max: num, step: num], num, num"
where data within the '[]' is numeric condition range and other num
values are numeric condition discrete values.
The column value will be "str, str, str" - where str values are the
condition discrete values for a string condition. If the condition doesn't
have values, it will not be added to the dataframe.
"""
return {
__generate_condition_column_header(condition): ", ".join(
__get_condition_values(condition)
)
for condition in conditions
if condition.name and condition.value
}


def convert_specs_to_dataframe(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add an optional parameter set_spec_id_product_id_workspace_as_index which can be useful for further mapping with other df?

@suriyaprashath-ni, @santhoshramaraj?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all three combined only make a unique combination

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We didn't discuss it in the meeting this morning, but what you do with the conditions mapping function would impact what you'd want the index to be. Its also not clear to me with this parameter if that would mean you're creating a MultiIndex index or a new column that is the serialized combination of these columns. I would expect a MultiIndex would be more useful. Either way doing this would impact your data access in ways that probably would not help usability. Workspace and product IDs are not very commonly known, compared with the their name and part number equivalents.

I'm inclined to leave this unindexed or indexed by id if it is available in the projection (not specId) and let the caller reindex in whatever way makes sense for them.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unindexed is fine if the multi-index might not be that useful. Indexing by Id of spec might also not be that useful as it is not generally used that much in other scenarios. For eg, for mapping a spec to a step, SpecId is generally used by users instead of id.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm no pandas expert but as far as I can tell you do not need to join by index, you can join (merge technically) by any column. Presumably it is faster if you join by index, and it looks like the index is maintained across the join (as a MultiIndex) which can have some value.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general, MultiIndex is an advanced topic in pandas. It is uncommon for users to encounter it initially. It is best to leave it unindexed and let the user decide what columns to merge by.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need to create an example for this similar to how we create example for client APIs as we are exposing a callback function. We can have the use the same code of our default condition format as the example too

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Examples for the other data frames would be useful, too, but it doesn't need to be in the same PR as the functionality if that is easier for your team.

specs: List[Specification],
condition_format: Optional[
Callable[[List[Condition]], Dict]
] = serialize_conditions_to_string,
) -> pd.DataFrame:
"""Creates a Pandas DataFrame for the specs.

Args:
specs: List of specs.
condition_format: Function which takes in a list of condition objects and returns
a dictionary of condition and its values. The dictionary keys
should be the condition name and the values should be the condition
value in any format you need. Keys will be used as the dataframe
column header and values will be used as the row cells for the
respective column header. If not passed, default condition format will be used
and if None is passed, conditions won't be included in the dataframe.
By default, for all the condition columns to be grouped
together in the dataframe, the dictionary key should have the prefix "condition_".
This is an optional parameter. By default column header will be
"condition_<conditionName>(<conditionUnit>)".
The column value will be "[min: num; max: num, step: num], num, num"
where data within the '[]' is numeric condition range and other num
values are numeric condition discrete values.
The column value will be "str, str, str" - where str values are the
condition discrete values for a string condition. If the condition doesn't
have values, it will not be added to the dataframe.

Returns:
A Pandas DataFrame with the each spec fields having a separate column.
Following fields are split into sub-columns.
- conditions: format of the condition columns are decided by the `condition_format`
argument of this function.
- Properties: All the unique properties across all specs will be split into separate columns.
For example, properties.property1, properties.property2, etc.
"""
specs_dict = [
{
**{
key: value
for key, value in vars(spec).items()
if key not in ["type", "limit", "conditions"]
},
**(__serialize_type(spec.type) if spec.type else {}),
**(__serialize_limits(spec.limit) if spec.limit else {}),
**(
condition_format(spec.conditions)
if condition_format and spec.conditions
else {}
),
}
for spec in specs
]

specs_dataframe = pd.json_normalize(specs_dict)
specs_dataframe = __format_specs_columns(specs_dataframe=specs_dataframe)
specs_dataframe.dropna(axis="columns", how="all", inplace=True)

return specs_dataframe


def __serialize_limits(limit: SpecificationLimit) -> Dict[str, str]:
"""Serialize limit into limit.min, limit.typical and limit.max.

Args:
limit: Limit of a spec.

Returns:
Limit as a dictionary.
"""
return {f"limit.{key}": value for key, value in vars(limit).items()}


def __serialize_type(type: SpecificationType) -> Dict[str, str]:
"""Serialize type into it's string value.

Args:
type: Type of a spec.

Returns:
Type as a dictionary.
"""
return {"type": type.name}


def __format_specs_columns(specs_dataframe: pd.DataFrame) -> pd.DataFrame:
"""Format specs column to group conditions and keep properties and keywords at the end.

Args:
specs_dataframe: Dataframe of specs.

Returns:
Formatted dataframe of specs.
"""
column_headers = specs_dataframe.columns.to_list()
standard_column_headers = [
header for header in column_headers if __is_standard_column_header(header)
]
condition_headers = [
header for header in column_headers if __is_condition_header(header=header)
]
properties_headers = [
header for header in column_headers if __is_property_header(header=header)
]
formatted_column_headers = (
standard_column_headers
+ condition_headers
+ (["keywords"] if "keywords" in column_headers else [])
+ properties_headers
)

return specs_dataframe.reindex(columns=formatted_column_headers)


def __is_standard_column_header(header: str) -> bool:
"""Check if column header is not a condition, property or keywords.

Args:
header: column header for specs dataframe.

Returns:
True if header doesn't start with condition_, properties. or keywords. Else returns false.

"""
return not (
__is_condition_header(header=header)
or __is_property_header(header=header)
or __is_keywords_header(header=header)
)


def __is_condition_header(header: str) -> bool:
"""Check if column header is not a condition.

Args:
header: column header for specs dataframe.

Returns:
True if header contains 'condition_'. Else returns false.

"""
return header.startswith(DataFrameHeaders.CONDITION_COLUMN_HEADER_PREFIX)


def __is_property_header(header: str) -> bool:
"""Check if column header is not a property.

Args:
header: column header for specs dataframe.

Returns:
True if header contains 'properties.'. Else returns false.

"""
return header.startswith(DataFrameHeaders.PROPERTY_COLUMN_HEADER_PREFIX)


def __is_keywords_header(header: str) -> bool:
"""Check if column header is not a keywords.

Args:
header: column header for specs dataframe.

Returns:
True if header equals 'keywords'. Else returns false.

"""
return header == DataFrameHeaders.KEYWORDS_COLUMN_HEADER


def __generate_condition_column_header(condition: Condition) -> str:
"""Generate column header for a condition.

Args:
condition: Condition object for generating column header.

Returns:
The column header for the given condition.
"""
name = condition.name or ""
unit = (
f"({condition.value.unit})"
if isinstance(condition.value, NumericConditionValue) and condition.value.unit
else ""
)

return f"condition_{name}{unit}"


def __get_condition_values(condition: Condition) -> List[str]:
"""Get ranges and discrete values of a condition.

Args:
condition: Condition for getting values.

Returns:
The list of values of the given condition in a specific format.
"""
if not condition.value:
return []

values = []

if isinstance(condition.value, NumericConditionValue):
values.extend(__serialize_numeric_condition_range(value=condition.value))

values.extend(__serialize_condition_discrete_values(value=condition.value))

return values


def __serialize_numeric_condition_range(value: NumericConditionValue) -> List[str]:
"""Serialize ranges of a numeric condition value.

Args:
value: A condition's value with NumericConditionValue type.

Returns:
The list of ranges of the given condition where each range will be in
string format `[min: <value>; max: <value>; step: <value>]` if the corresponding
fields are not none.
"""
if not value.range:
return []

return [
f"[{'; '.join(
f'{range_key}: {range_value}'
for range_key, range_value in vars(range).items()
if range_value is not None
)}]"
for range in value.range
]


def __serialize_condition_discrete_values(
value: Union[NumericConditionValue, StringConditionValue]
) -> List[str]:
"""Serialize discrete values of a value.

Args:
value: A condition's value with either NumericConditionValue type or StringConditionValue type.

Returns:
The list of discrete values of the given value in a string format.
"""
return [str(discrete) for discrete in (value.discrete or [])]
Loading