Skip to content

Commit 6dd6a67

Browse files
committed
add better estimation for non-ascii characters
1 parent 76e4b47 commit 6dd6a67

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/logs/aws_batch_log_record_processor.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -175,12 +175,12 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: # pylint: di
175175
if next_val is None:
176176
continue
177177

178-
if isinstance(next_val, (str, bytes)):
178+
if isinstance(next_val, bytes):
179179
size += len(next_val)
180180
continue
181181

182-
if isinstance(next_val, (float, int, bool)):
183-
size += len(str(next_val))
182+
if isinstance(next_val, (str, float, int, bool)):
183+
size += AwsCloudWatchOtlpBatchLogRecordProcessor._estimate_utf8_size(str(next_val))
184184
continue
185185

186186
# next_val must be Sequence["AnyValue"] or Mapping[str, "AnyValue"]
@@ -210,3 +210,17 @@ def _estimate_log_size(self, log: LogData, depth: int = 3) -> int: # pylint: di
210210
queue = new_queue
211211

212212
return size
213+
214+
@staticmethod
215+
def _estimate_utf8_size(s: str):
216+
ascii_count = 0
217+
non_ascii_count = 0
218+
219+
for char in s:
220+
if ord(char) < 128:
221+
ascii_count += 1
222+
else:
223+
non_ascii_count += 1
224+
225+
# Estimate: ASCII chars (1 byte) + upper bound of non-ASCII chars 4 bytes
226+
return ascii_count + (non_ascii_count * 4)

aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/logs/test_aws_batch_log_record_processor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@ def test_process_log_data_nested_structure_size_exceeds_max_log_size(self):
116116

117117
def test_process_log_data_primitive(self):
118118

119-
primitives: List[AnyValue] = ["test", b"test", 1, 1.2, True, False, None]
120-
expected_sizes = [4, 4, 1, 3, 4, 5, 0]
119+
primitives: List[AnyValue] = ["test", b"test", 1, 1.2, True, False, None, "深入 Python", "café"]
120+
expected_sizes = [4, 4, 1, 3, 4, 5, 0, 2 * 4 + len(" Python"), 1 * 4 + len("caf")]
121121

122122
for index, primitive in enumerate(primitives):
123123
log = self.generate_test_log_data(log_body=primitive, count=1)

0 commit comments

Comments
 (0)