Skip to content
This repository was archived by the owner on Sep 17, 2025. It is now read-only.

Commit 593c443

Browse files
colincadamsc24t
authored andcommitted
Allow underscores in label keys (#448)
Sanitize label keys in the stackdriver exporter by replacing sequences of invalid chars with underscores.
1 parent 84166d9 commit 593c443

File tree

2 files changed

+49
-17
lines changed

2 files changed

+49
-17
lines changed

opencensus/stats/exporters/stackdriver_exporter.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import os
1818
import platform
1919
import re
20+
import string
2021

2122
from datetime import datetime
2223
from google.api_core.gapic_v1 import client_info
@@ -440,13 +441,13 @@ def new_label_descriptors(defaults, keys):
440441
label_descriptors = []
441442
for key, lbl in defaults.items():
442443
label = {}
443-
label["key"] = remove_non_alphanumeric(key)
444+
label["key"] = sanitize_label(key)
444445
label["description"] = lbl
445446
label_descriptors.append(label)
446447

447448
for tag_key in keys:
448449
label = {}
449-
label["key"] = remove_non_alphanumeric(tag_key)
450+
label["key"] = sanitize_label(tag_key)
450451
label_descriptors.append(label)
451452
label_descriptors.append({"key": OPENCENSUS_TASK,
452453
"description": OPENCENSUS_TASK_DESCRIPTION})
@@ -461,11 +462,22 @@ def set_metric_labels(series, view, tag_values):
461462

462463
for key, value in zip(view.columns, tag_values):
463464
if value is not None:
464-
series.metric.labels[remove_non_alphanumeric(key)] = value
465+
series.metric.labels[sanitize_label(key)] = value
465466
series.metric.labels[OPENCENSUS_TASK] = get_task_value()
466467

467468

468-
def remove_non_alphanumeric(text):
469-
""" Remove characters not accepted in labels key
469+
def sanitize_label(text):
470+
"""Remove characters not accepted in labels key
471+
472+
This replaces any non-word characters (alphanumeric or underscore), with
473+
an underscore. It also ensures that the first character is a letter by
474+
prepending with 'key' if necessary, and trims the text to 100 characters.
470475
"""
471-
return str(re.sub('[^0-9a-zA-Z ]+', '', text)).replace(" ", "")
476+
if not text:
477+
return text
478+
text = re.sub('\\W+', '_', text)
479+
if text[0] in string.digits:
480+
text = "key_" + text
481+
elif text[0] == '_':
482+
text = "key" + text
483+
return text[:100]

tests/unit/stats/exporter/test_stackdriver_stats.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@
3737
FRONTEND_KEY_INT = tag_key_module.TagKey("my.org/keys/frontend-INT")
3838
FRONTEND_KEY_STR = tag_key_module.TagKey("my.org/keys/frontend-STR")
3939

40-
FRONTEND_KEY_CLEAN = "myorgkeysfrontend"
41-
FRONTEND_KEY_FLOAT_CLEAN = "myorgkeysfrontendFLOAT"
42-
FRONTEND_KEY_INT_CLEAN = "myorgkeysfrontendINT"
43-
FRONTEND_KEY_STR_CLEAN = "myorgkeysfrontendSTR"
40+
FRONTEND_KEY_CLEAN = "my_org_keys_frontend"
41+
FRONTEND_KEY_FLOAT_CLEAN = "my_org_keys_frontend_FLOAT"
42+
FRONTEND_KEY_INT_CLEAN = "my_org_keys_frontend_INT"
43+
FRONTEND_KEY_STR_CLEAN = "my_org_keys_frontend_STR"
4444

4545
VIDEO_SIZE_MEASURE = measure_module.MeasureInt(
4646
"my.org/measure/video_size_test2", "size of processed videos", "By")
@@ -142,16 +142,36 @@ def test_client_info_user_agent(self):
142142
self.assertIn(stackdriver.get_user_agent_slug(),
143143
exporter.client.client_info.to_user_agent())
144144

145-
def test_remove_invalid_chars(self):
146-
invalid_chars = "@#$"
147-
valid_chars = "abc"
148-
149-
result = stackdriver.remove_non_alphanumeric(invalid_chars)
145+
def test_sanitize(self):
146+
# empty
147+
result = stackdriver.sanitize_label("")
150148
self.assertEqual(result, "")
151149

152-
result = stackdriver.remove_non_alphanumeric(valid_chars)
150+
# all invalid
151+
result = stackdriver.sanitize_label("/*^#$")
152+
self.assertEqual(result, "key_")
153+
154+
# all valid
155+
result = stackdriver.sanitize_label("abc")
153156
self.assertEqual(result, "abc")
154157

158+
# mixed
159+
result = stackdriver.sanitize_label("a.b/c")
160+
self.assertEqual(result, "a_b_c")
161+
162+
# starts with '_'
163+
result = stackdriver.sanitize_label("_abc")
164+
self.assertEqual(result, "key_abc")
165+
166+
# starts with digit
167+
result = stackdriver.sanitize_label("0abc")
168+
self.assertEqual(result, "key_0abc")
169+
170+
# too long
171+
result = stackdriver.sanitize_label("0123456789" * 10)
172+
self.assertEqual(len(result), 100)
173+
self.assertEqual(result, "key_" + "0123456789" * 9 + "012345")
174+
155175
def test_singleton_with_params(self):
156176
default_labels = {'key1': 'value1'}
157177
patch_client = mock.patch(
@@ -884,7 +904,7 @@ def test_create_timeseries_from_distribution(self):
884904
[time_series] = time_series_list
885905

886906
self.assertCorrectLabels(time_series.metric.labels,
887-
{'tagkey': 'tag_value'},
907+
{'tag_key': 'tag_value'},
888908
include_opencensus=True)
889909
self.assertEqual(len(time_series.points), 1)
890910
[point] = time_series.points

0 commit comments

Comments
 (0)