Skip to content

Commit ce2172d

Browse files
correct scope for new function in the optimization context
1 parent 67a9e79 commit ce2172d

File tree

10 files changed

+1556
-9
lines changed

10 files changed

+1556
-9
lines changed
Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,2 @@
11
DEFAULT_API_URL = "https://api.galileo.ai/"
22
DEFAULT_APP_URL = "https://app.galileo.ai/"
3-
4-
5-
# function_names: GalileoApiClient.get_console_url
6-
# module_abs_path : /home/mohammed/Work/galileo-python/src/galileo/api_client.py
7-
# preexisting_objects: {('GalileoApiClient', ()), ('_set_destination', ()), ('get_console_url', (FunctionParent(name='GalileoApiClient', type='ClassDef'),))}
8-
# project_root_path: /home/mohammed/Work/galileo-python/src
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
from __future__ import annotations
2+
3+
import base64
4+
import json
5+
import zlib
6+
from copy import deepcopy
7+
from typing import Any, Iterable
8+
from utils import Point
9+
10+
from coordinates import PixelSpace
11+
from elements import (
12+
TYPE_TO_TEXT_ELEMENT_MAP,
13+
CheckBox,
14+
Element,
15+
ElementMetadata,
16+
)
17+
18+
# ================================================================================================
19+
# SERIALIZATION/DESERIALIZATION (SERDE) RELATED FUNCTIONS
20+
# ================================================================================================
21+
# These serde functions will likely relocate to `unstructured.documents.elements` since they are
22+
# so closely related to elements and this staging "brick" is deprecated.
23+
# ================================================================================================
24+
25+
# == DESERIALIZERS ===============================
26+
27+
28+
def elements_from_base64_gzipped_json(b64_encoded_elements: str) -> list[Element]:
29+
"""Restore Base64-encoded gzipped JSON elements to element objects.
30+
31+
This is used to when deserializing `ElementMetadata.orig_elements` from its compressed form in
32+
JSON and dict forms and perhaps for other purposes.
33+
"""
34+
# -- Base64 str -> gzip-encoded (JSON) bytes --
35+
decoded_b64_bytes = base64.b64decode(b64_encoded_elements)
36+
# -- undo gzip compression --
37+
elements_json_bytes = zlib.decompress(decoded_b64_bytes)
38+
# -- JSON (bytes) to JSON (str) --
39+
elements_json_str = elements_json_bytes.decode("utf-8")
40+
# -- JSON (str) -> dicts --
41+
element_dicts = json.loads(elements_json_str)
42+
# -- dicts -> elements --
43+
return elements_from_dicts(element_dicts)
44+
45+
46+
def elements_from_dicts(element_dicts: Iterable[dict[str, Any]]) -> list[Element]:
47+
"""Convert a list of element-dicts to a list of elements."""
48+
elements: list[Element] = []
49+
50+
for item in element_dicts:
51+
element_id: str = item.get("element_id", None)
52+
metadata = (
53+
ElementMetadata()
54+
if item.get("metadata") is None
55+
else ElementMetadata.from_dict(item["metadata"])
56+
)
57+
58+
if item.get("type") in TYPE_TO_TEXT_ELEMENT_MAP:
59+
ElementCls = TYPE_TO_TEXT_ELEMENT_MAP[item["type"]]
60+
elements.append(ElementCls(text=item["text"], element_id=element_id, metadata=metadata))
61+
elif item.get("type") == "CheckBox":
62+
elements.append(
63+
CheckBox(checked=item["checked"], element_id=element_id, metadata=metadata)
64+
)
65+
66+
return elements
67+
68+
def elements_to_base64_gzipped_json(elements: Iterable[Element]) -> str:
69+
"""Convert `elements` to Base64-encoded gzipped JSON.
70+
71+
This is used to when serializing `ElementMetadata.orig_elements` to make it as compact as
72+
possible when transported as JSON, for example in an HTTP response. This compressed form is also
73+
present when elements are in dict form ("element_dicts"). This function is not coupled to that
74+
purpose however and could have other uses.
75+
"""
76+
# -- adjust floating-point precision of coordinates down for a more compact str value --
77+
precision_adjusted_elements = _fix_metadata_field_precision(elements)
78+
# -- serialize elements as dicts --
79+
element_dicts = elements_to_dicts(precision_adjusted_elements)
80+
# -- serialize the dicts to JSON (bytes) --
81+
json_bytes = json.dumps(element_dicts, sort_keys=True).encode("utf-8")
82+
# -- compress the JSON bytes with gzip compression --
83+
deflated_bytes = zlib.compress(json_bytes)
84+
# -- base64-encode those bytes so they can be serialized as a JSON string value --
85+
b64_deflated_bytes = base64.b64encode(deflated_bytes)
86+
# -- convert to a string suitable for serializing in JSON --
87+
return b64_deflated_bytes.decode("utf-8")
88+
89+
90+
def elements_to_dicts(elements: Iterable[Element]) -> list[dict[str, Any]]:
91+
"""Convert document elements to element-dicts."""
92+
return [e.to_dict() for e in elements]
93+
94+
95+
def _fix_metadata_field_precision(elements: Iterable[Element]) -> list[Element]:
96+
out_elements: list[Element] = []
97+
for element in elements:
98+
el = deepcopy(element)
99+
if el.metadata.coordinates:
100+
precision = 1 if isinstance(el.metadata.coordinates.system, PixelSpace) else 2
101+
points = el.metadata.coordinates.points
102+
assert points is not None
103+
rounded_points: list[Point] = []
104+
for point in points:
105+
x, y = point
106+
rounded_point = (round(x, precision), round(y, precision))
107+
rounded_points.append(rounded_point)
108+
el.metadata.coordinates.points = tuple(rounded_points)
109+
110+
if el.metadata.detection_class_prob:
111+
el.metadata.detection_class_prob = round(el.metadata.detection_class_prob, 5)
112+
113+
out_elements.append(el)
114+
115+
return out_elements
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
from __future__ import annotations
2+
3+
from enum import Enum
4+
from typing import Any, Dict, Sequence, Tuple, Union
5+
6+
7+
class Orientation(Enum):
8+
SCREEN = (1, -1) # Origin in top left, y increases in the down direction
9+
CARTESIAN = (1, 1) # Origin in bottom left, y increases in upward direction
10+
11+
12+
def convert_coordinate(old_t, old_t_max, new_t_max, t_orientation):
13+
"""Convert a coordinate into another system along an axis using a linear transformation"""
14+
return (
15+
(1 - old_t / old_t_max) * (1 - t_orientation) / 2
16+
+ old_t / old_t_max * (1 + t_orientation) / 2
17+
) * new_t_max
18+
19+
20+
class CoordinateSystem:
21+
"""A finite coordinate plane with given width and height."""
22+
23+
orientation: Orientation
24+
25+
def __init__(self, width: Union[int, float], height: Union[int, float]):
26+
self.width = width
27+
self.height = height
28+
29+
def __eq__(self, other: object):
30+
if not isinstance(other, CoordinateSystem):
31+
return False
32+
return (
33+
str(self.__class__.__name__) == str(other.__class__.__name__)
34+
and self.width == other.width
35+
and self.height == other.height
36+
and self.orientation == other.orientation
37+
)
38+
39+
def convert_from_relative(
40+
self,
41+
x: Union[float, int],
42+
y: Union[float, int],
43+
) -> Tuple[Union[float, int], Union[float, int]]:
44+
"""Convert to this coordinate system from a relative coordinate system."""
45+
x_orientation, y_orientation = self.orientation.value
46+
new_x = convert_coordinate(x, 1, self.width, x_orientation)
47+
new_y = convert_coordinate(y, 1, self.height, y_orientation)
48+
return new_x, new_y
49+
50+
def convert_to_relative(
51+
self,
52+
x: Union[float, int],
53+
y: Union[float, int],
54+
) -> Tuple[Union[float, int], Union[float, int]]:
55+
"""Convert from this coordinate system to a relative coordinate system."""
56+
x_orientation, y_orientation = self.orientation.value
57+
new_x = convert_coordinate(x, self.width, 1, x_orientation)
58+
new_y = convert_coordinate(y, self.height, 1, y_orientation)
59+
return new_x, new_y
60+
61+
def convert_coordinates_to_new_system(
62+
self,
63+
new_system: CoordinateSystem,
64+
x: Union[float, int],
65+
y: Union[float, int],
66+
) -> Tuple[Union[float, int], Union[float, int]]:
67+
"""Convert from this coordinate system to another given coordinate system."""
68+
rel_x, rel_y = self.convert_to_relative(x, y)
69+
return new_system.convert_from_relative(rel_x, rel_y)
70+
71+
def convert_multiple_coordinates_to_new_system(
72+
self,
73+
new_system: CoordinateSystem,
74+
coordinates: Sequence[Tuple[Union[float, int], Union[float, int]]],
75+
) -> Tuple[Tuple[Union[float, int], Union[float, int]], ...]:
76+
"""Convert (x, y) coordinates from current system to another coordinate system."""
77+
new_system_coordinates = []
78+
for x, y in coordinates:
79+
new_system_coordinates.append(
80+
self.convert_coordinates_to_new_system(new_system=new_system, x=x, y=y),
81+
)
82+
return tuple(new_system_coordinates)
83+
84+
85+
class RelativeCoordinateSystem(CoordinateSystem):
86+
"""Relative coordinate system where x and y are on a scale from 0 to 1."""
87+
88+
orientation = Orientation.CARTESIAN
89+
90+
def __init__(self):
91+
self.width = 1
92+
self.height = 1
93+
94+
95+
class PixelSpace(CoordinateSystem):
96+
"""Coordinate system representing a pixel space, such as an image. The origin is at the top
97+
left."""
98+
99+
orientation = Orientation.SCREEN
100+
101+
102+
class PointSpace(CoordinateSystem):
103+
"""Coordinate system representing a point space, such as a pdf. The origin is at the bottom
104+
left."""
105+
106+
orientation = Orientation.CARTESIAN
107+
108+
109+
TYPE_TO_COORDINATE_SYSTEM_MAP: Dict[str, Any] = {
110+
"PixelSpace": PixelSpace,
111+
"PointSpace": PointSpace,
112+
"CoordinateSystem": CoordinateSystem,
113+
}

0 commit comments

Comments
 (0)