Skip to content

Commit 11d4362

Browse files
authored
Merge pull request #5174 from grondo/f576
support emoji encoding for Flux jobids
2 parents c0e977c + 631e983 commit 11d4362

File tree

13 files changed

+478
-4
lines changed

13 files changed

+478
-4
lines changed

doc/man1/flux-jobs.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,9 @@ The field names that can be specified are:
274274
**id.words**
275275
job ID in mnemonic encoding
276276

277+
**id.emoji**
278+
job ID in emoji encoding
279+
277280
**userid**
278281
job submitter's userid
279282

src/bindings/python/flux/job/JobID.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class JobID(int):
4545
- dotted hex (dothex) (xxxx.xxxx.xxxx.xxxx)
4646
- kvs dir (dotted hex with `job.` prefix)
4747
- RFC19 F58: (Base58 encoding with prefix `ƒ` or `f`)
48+
- basemoji (emoji encoding)
4849
4950
A JobID object also has properties for encoding a JOBID into each
5051
of the above representations, e.g. jobid.f85, jobid.words, jobid.dothex...
@@ -92,6 +93,11 @@ def words(self):
9293
"""Return words (mnemonic) representation of a JobID"""
9394
return self.encode("words")
9495

96+
@property
97+
def emoji(self):
98+
"""Return emoji representation of a JobID"""
99+
return self.encode("emoji")
100+
95101
@property
96102
def kvs(self):
97103
"""Return KVS directory path of a JobID"""

src/bindings/python/flux/job/info.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,7 @@ def job_fields_to_attrs(fields):
605605
"id.dec": (),
606606
"id.hex": (),
607607
"id.f58": (),
608+
"id.emoji": (),
608609
"id.kvs": (),
609610
"id.words": (),
610611
"id.dothex": (),
@@ -697,6 +698,7 @@ class JobInfoFormat(flux.util.OutputFormat):
697698
"id.dec": "JOBID",
698699
"id.hex": "JOBID",
699700
"id.f58": "JOBID",
701+
"id.emoji": "JOBID",
700702
"id.kvs": "JOBID",
701703
"id.words": "JOBID",
702704
"id.dothex": "JOBID",

src/common/libjob/id.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ int flux_job_id_encode (flux_jobid_t id,
8888
t = FLUID_STRING_MNEMONIC;
8989
else if (strcasecmp (type, "f58") == 0)
9090
t = FLUID_STRING_F58;
91+
else if (strcasecmp (type, "emoji") == 0)
92+
t = FLUID_STRING_EMOJI;
9193
else {
9294
/* Return EPROTO for invalid type to differentiate from
9395
* other invalid arguments.

src/common/libjob/test/job.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,7 @@ struct jobid_parse_test jobid_parse_tests[] = {
369369
{ "dothex", 0, "0000.0000.0000.0000" },
370370
{ "kvs", 0, "job.0000.0000.0000.0000" },
371371
{ "words", 0, "academy-academy-academy--academy-academy-academy" },
372+
{ "emoji", 0, "😃" },
372373
#if ASSUME_BROKEN_LOCALE
373374
{ "f58", 0, "f1" },
374375
#else
@@ -380,6 +381,7 @@ struct jobid_parse_test jobid_parse_tests[] = {
380381
{ "dothex", 1, "0000.0000.0000.0001" },
381382
{ "kvs", 1, "job.0000.0000.0000.0001" },
382383
{ "words", 1, "acrobat-academy-academy--academy-academy-academy" },
384+
{ "emoji", 1, "😄" },
383385
#if ASSUME_BROKEN_LOCALE
384386
{ "f58", 1, "f2" },
385387
#else
@@ -391,6 +393,7 @@ struct jobid_parse_test jobid_parse_tests[] = {
391393
{ "dothex", 65535, "0000.0000.0000.ffff" },
392394
{ "kvs", 65535, "job.0000.0000.0000.ffff" },
393395
{ "words", 65535, "nevada-archive-academy--academy-academy-academy" },
396+
{ "emoji", 65535, "💁📚" },
394397
#if ASSUME_BROKEN_LOCALE
395398
{ "f58", 65535, "fLUv" },
396399
#else
@@ -402,6 +405,7 @@ struct jobid_parse_test jobid_parse_tests[] = {
402405
{ "dothex", 6787342413402046, "0018.1d0d.4d85.0fbe" },
403406
{ "kvs", 6787342413402046, "job.0018.1d0d.4d85.0fbe" },
404407
{ "words", 6787342413402046, "cake-plume-nepal--neuron-pencil-academy" },
408+
{ "emoji", 6787342413402046, "👴😱🔚🎮🕙🚩" },
405409
#if ASSUME_BROKEN_LOCALE
406410
{ "f58", 6787342413402046, "fuzzybunny" },
407411
#else

src/common/libutil/Makefile.am

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,9 @@ libutil_la_SOURCES = \
9898
slice.c \
9999
slice.h \
100100
strstrip.c \
101-
strstrip.h
101+
strstrip.h \
102+
basemoji.h \
103+
basemoji.c
102104

103105
EXTRA_DIST = veb_mach.c
104106

@@ -132,7 +134,8 @@ TESTS = test_sha1.t \
132134
test_strstrip.t \
133135
test_slice.t \
134136
test_timestamp.t \
135-
test_environment.t
137+
test_environment.t \
138+
test_basemoji.t
136139

137140
test_ldadd = \
138141
$(top_builddir)/src/common/libutil/libutil.la \
@@ -283,3 +286,7 @@ test_timestamp_t_LDADD = $(test_ldadd)
283286
test_environment_t_SOURCES = test/environment.c
284287
test_environment_t_CPPFLAGS = $(test_cppflags)
285288
test_environment_t_LDADD = $(test_ldadd)
289+
290+
test_basemoji_t_SOURCES = test/basemoji.c
291+
test_basemoji_t_CPPFLAGS = $(test_cppflags)
292+
test_basemoji_t_LDADD = $(test_ldadd)

src/common/libutil/basemoji.c

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
/************************************************************\
2+
* Copyright 2023 Lawrence Livermore National Security, LLC
3+
* (c.f. AUTHORS, NOTICE.LLNS, COPYING)
4+
*
5+
* This file is part of the Flux resource manager framework.
6+
* For details, see https://github.com/flux-framework.
7+
*
8+
* SPDX-License-Identifier: LGPL-3.0
9+
\************************************************************/
10+
11+
/* basemoji.c - an emoji encoding for unsigned 64 bit integers
12+
*/
13+
14+
#if HAVE_CONFIG_H
15+
#include "config.h"
16+
#endif
17+
18+
#include <stdio.h>
19+
#include <stdlib.h>
20+
#include <stdint.h>
21+
#include <string.h>
22+
#include <errno.h>
23+
#include <stdbool.h>
24+
25+
#include "ccan/array_size/array_size.h"
26+
#include "basemoji.h"
27+
28+
/* Minimum length of a b576 string is 1 emoji, or 4 bytes */
29+
#define BASEMOJI_MINLEN 4
30+
31+
/* Maximum number of emoji "digits" in a basemoji string is
32+
*
33+
* ceil (ln (2^64-1)/ln (576)) = 7
34+
*
35+
* 4 bytes per emoji, so 4*7 = 28 bytes.
36+
*/
37+
#define BASEMOJI_MAXLEN 28
38+
39+
/* The following is a Selection of 576 emoji in CLDR[1] collation order[2]
40+
* taken from the version 2010 Unicode emoji set[3]. Note: Selected code
41+
* points are all represented in 4 bytes, which is assumed in the
42+
* implementation in this module. Additionally, every character in this
43+
* selected set has a common first two bytes of F0 9F in UTF-8 encoding,
44+
* which aids in detection of a valid basemoji string.
45+
*
46+
* 1. https://cldr.unicode.org
47+
* 2. https://unicode.org/emoji/charts-12.1/emoji-ordering.txt
48+
* 3. https://unicode.org/emoji/charts/emoji-versions.html
49+
*
50+
*/
51+
const char *emojis[] = {
52+
"😃", "😄", "😁", "😆", "😅", "😂", "😉", "😊", "😍", "😘", "😚", "😋",
53+
"😜", "😝", "😏", "😒", "😌", "😔", "😪", "😷", "😵", "😲", "😳", "😨",
54+
"😰", "😥", "😢", "😭", "😱", "😖", "😣", "😞", "😓", "😩", "😫", "😤",
55+
"😡", "😠", "👿", "💀", "💩", "👹", "👺", "👻", "👽", "👾", "😺", "😸",
56+
"😹", "😻", "😼", "😽", "🙀", "😿", "😾", "🙈", "🙉", "🙊", "💌", "💘",
57+
"💝", "💖", "💗", "💓", "💞", "💕", "💟", "💔", "💛", "💚", "💙", "💜",
58+
"💋", "💯", "💢", "💥", "💫", "💦", "💨", "💬", "💤", "👋", "👌", "👈",
59+
"👉", "👆", "👇", "👍", "👎", "👊", "👏", "🙌", "👐", "🙏", "💅", "💪",
60+
"👂", "👃", "👀", "👅", "👄", "👶", "👦", "👧", "👱", "👨", "👩", "👴",
61+
"👵", "🙍", "🙎", "🙅", "🙆", "💁", "🙋", "🙇", "👮", "💂", "👷", "👸",
62+
"👳", "👲", "👰", "👼", "🎅", "💆", "💇", "🚶", "🏃", "💃", "👯", "🏂",
63+
"🏄", "🏊", "🛀", "👫", "💏", "💑", "👪", "👤", "👣", "🐵", "🐒", "🐶",
64+
"🐩", "🐺", "🐱", "🐯", "🐴", "🐎", "🐮", "🐷", "🐗", "🐽", "🐑", "🐫",
65+
"🐘", "🐭", "🐹", "🐰", "🐻", "🐨", "🐼", "🐾", "🐔", "🐣", "🐤", "🐥",
66+
"🐦", "🐧", "🐸", "🐢", "🐍", "🐲", "🐳", "🐬", "🐟", "🐠", "🐡", "🐙",
67+
"🐚", "🐌", "🐛", "🐜", "🐝", "🐞", "💐", "🌸", "💮", "🌹", "🌺", "🌻",
68+
"🌼", "🌷", "🌱", "🌴", "🌵", "🌾", "🌿", "🍀", "🍁", "🍂", "🍃", "🍄",
69+
"🍇", "🍈", "🍉", "🍊", "🍌", "🍍", "🍎", "🍏", "🍑", "🍒", "🍓", "🍅",
70+
"🍆", "🌽", "🌰", "🍞", "🍖", "🍗", "🍔", "🍟", "🍕", "🍳", "🍲", "🍱",
71+
"🍘", "🍙", "🍚", "🍛", "🍜", "🍝", "🍠", "🍢", "🍣", "🍤", "🍥", "🍡",
72+
"🍦", "🍧", "🍨", "🍩", "🍪", "🎂", "🍰", "🍫", "🍬", "🍭", "🍮", "🍯",
73+
"🍵", "🍶", "🍷", "🍸", "🍹", "🍺", "🍻", "🍴", "🔪", "🌏", "🗾", "🌋",
74+
"🗻", "🏠", "🏡", "🏢", "🏣", "🏥", "🏦", "🏨", "🏩", "🏪", "🏫", "🏬",
75+
"🏭", "🏯", "🏰", "💒", "🗼", "🗽", "🌁", "🌃", "🌄", "🌅", "🌆", "🌇",
76+
"🌉", "🎠", "🎡", "🎢", "💈", "🎪", "🚃", "🚄", "🚅", "🚇", "🚉", "🚌",
77+
"🚑", "🚒", "🚓", "🚕", "🚗", "🚙", "🚚", "🚲", "🚏", "🚨", "🚥", "🚧",
78+
"🚤", "🚢", "💺", "🚀", "🕛", "🕐", "🕑", "🕒", "🕓", "🕔", "🕕", "🕖",
79+
"🕗", "🕘", "🕙", "🕚", "🌑", "🌓", "🌔", "🌕", "🌙", "🌛", "🌟", "🌠",
80+
"🌌", "🌀", "🌈", "🌂", "🔥", "💧", "🌊", "🎃", "🎄", "🎆", "🎇", "🎈",
81+
"🎉", "🎊", "🎋", "🎍", "🎎", "🎏", "🎐", "🎑", "🎀", "🎁", "🎫", "🏆",
82+
"🏀", "🏈", "🎾", "🎳", "🎣", "🎽", "🎿", "🎯", "🔫", "🎱", "🔮", "🎮",
83+
"🎰", "🎲", "🃏", "🀄", "🎴", "🎭", "🎨", "👓", "👔", "👕", "👖", "👗",
84+
"👘", "👙", "👚", "👛", "👜", "👝", "🎒", "👞", "👟", "👠", "👡", "👢",
85+
"👑", "👒", "🎩", "🎓", "💄", "💍", "💎", "🔊", "📢", "📣", "🔔", "🎼",
86+
"🎵", "🎶", "🎤", "🎧", "📻", "🎷", "🎸", "🎹", "🎺", "🎻", "📱", "📲",
87+
"📞", "📟", "📠", "🔋", "🔌", "💻", "💽", "💾", "💿", "📀", "🎥", "🎬",
88+
"📺", "📷", "📹", "📼", "🔍", "🔎", "💡", "🔦", "🏮", "📔", "📕", "📖",
89+
"📗", "📘", "📙", "📚", "📓", "📒", "📃", "📜", "📄", "📰", "📑", "🔖",
90+
"💰", "💴", "💵", "💸", "💳", "💹", "📧", "📨", "📩", "📤", "📥", "📦",
91+
"📫", "📪", "📮", "📝", "💼", "📁", "📂", "📅", "📆", "📇", "📈", "📉",
92+
"📊", "📋", "📌", "📍", "📎", "📏", "📐", "🔒", "🔓", "🔏", "🔐", "🔑",
93+
"🔨", "💣", "🔧", "🔩", "🔗", "📡", "💉", "💊", "🚪", "🚽", "🚬", "🗿",
94+
"🏧", "🚹", "🚺", "🚻", "🚼", "🚾", "🚫", "🚭", "🔞", "🔃", "🔙", "🔚",
95+
"🔛", "🔜", "🔝", "🔯", "🔼", "🔽", "🎦", "📶", "📳", "📴", "💱", "💲",
96+
"🔱", "📛", "🔰", "🔟", "🔠", "🔡", "🔢", "🔣", "🔤", "🆎", "🆑", "🆒",
97+
"🆓", "🆔", "🆕", "🆖", "🆗", "🆘", "🆙", "🆚", "🈁", "🈶", "🈯", "🉐",
98+
"🈹", "🈚", "🈲", "🉑", "🈸", "🈴", "🈳", "🈺", "🈵", "🔴", "🔵", "🔶",
99+
"🔷", "🔸", "🔹", "🔺", "🔻", "💠", "🔘", "🔳", "🔲", "🏁", "🚩", "🎌",
100+
};
101+
102+
bool is_basemoji_string (const char *s)
103+
{
104+
int len = strlen (s);
105+
106+
/* This code assumes length of emoji array is 576
107+
* Generate error at build time if this becomes untrue:
108+
*/
109+
BUILD_ASSERT(ARRAY_SIZE(emojis) == 576);
110+
111+
/* Check for expected length of a basemoji string, and if the
112+
* first two bytes match the expected UTF-8 encoding.
113+
* This doesn't guarantee that `s` is a valid basemoji string,
114+
* but this will catch most obvious cases and other invalid strings
115+
* are left to be detected in decode.
116+
*/
117+
if (len >= BASEMOJI_MINLEN
118+
&& len <= BASEMOJI_MAXLEN
119+
&& len % 4 == 0
120+
&& (uint8_t)s[0] == 0xf0
121+
&& (uint8_t)s[1] == 0x9f)
122+
return true;
123+
return false;
124+
}
125+
126+
/* Encode id into buf in reverse (i.e. higher order bytes are encoded
127+
* and placed first into 'buf' since we're doing progressive division.)
128+
*/
129+
static int emoji_revenc (char *buf, int buflen, uint64_t id)
130+
{
131+
int index = 0;
132+
memset (buf, 0, buflen);
133+
if (id == 0) {
134+
memcpy (buf, emojis[0], 4);
135+
return 4;
136+
}
137+
while (id > 0) {
138+
int rem = id % 576;
139+
memcpy (buf+index, emojis[rem], 4);
140+
index += 4;
141+
id = id / 576;
142+
}
143+
return index;
144+
}
145+
146+
int uint64_basemoji_encode (uint64_t id, char *buf, int buflen)
147+
{
148+
int count;
149+
int n;
150+
char reverse[BASEMOJI_MAXLEN+1];
151+
152+
if (buf == NULL || buflen <= 0) {
153+
errno = EINVAL;
154+
return -1;
155+
}
156+
157+
/* Encode bytes to emoji (in reverse), which also gives us a count
158+
* of the total bytes required for this encoding.
159+
*/
160+
if ((count = emoji_revenc (reverse, sizeof (reverse), id)) < 0) {
161+
errno = EINVAL;
162+
return -1;
163+
}
164+
165+
/* Check for overflow of provided buffer:
166+
* Need space for count bytes for emoji + NUL
167+
*/
168+
if (count + 1 > buflen) {
169+
errno = EOVERFLOW;
170+
return -1;
171+
}
172+
173+
memset (buf, 0, buflen);
174+
n = 0;
175+
176+
/* Copy 4-byte emojis back in order so that most significant bits are
177+
* on the left:
178+
*/
179+
for (int i = count - 4; i >= 0; i-=4) {
180+
memcpy (buf+n, reverse+i, 4);
181+
n+=4;
182+
}
183+
return 0;
184+
}
185+
186+
187+
static int basemoji_lookup (const char *c, int *result)
188+
{
189+
for (int i = 0; i < 576; i++) {
190+
if (memcmp (c, emojis[i], 4) == 0) {
191+
*result = i;
192+
return 0;
193+
}
194+
}
195+
errno = EINVAL;
196+
return -1;
197+
}
198+
199+
int uint64_basemoji_decode (const char *str, uint64_t *idp)
200+
{
201+
uint64_t id = 0;
202+
uint64_t scale = 1;
203+
int len;
204+
205+
if (str == NULL
206+
|| idp == NULL
207+
|| !is_basemoji_string (str)) {
208+
errno = EINVAL;
209+
return -1;
210+
}
211+
212+
/* Move through basemoji string in reverse since least significant
213+
* bits are at the end. Since all emoji are 4 bytes, start at 4 from
214+
* the end to point to the final emoji.
215+
*/
216+
len = strlen (str);
217+
for (int i = len - 4; i >= 0; i-=4) {
218+
int c;
219+
if (basemoji_lookup (str+i, &c) < 0) {
220+
errno = EINVAL;
221+
return -1;
222+
}
223+
id += c * scale;
224+
scale *= 576;
225+
}
226+
*idp = id;
227+
return 0;
228+
}

src/common/libutil/basemoji.h

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/************************************************************\
2+
* Copyright 2023 Lawrence Livermore National Security, LLC
3+
* (c.f. AUTHORS, NOTICE.LLNS, COPYING)
4+
*
5+
* This file is part of the Flux resource manager framework.
6+
* For details, see https://github.com/flux-framework.
7+
*
8+
* SPDX-License-Identifier: LGPL-3.0
9+
\************************************************************/
10+
11+
#ifndef _UTIL_BASEMOJI_H
12+
#define _UTIL_BASEMOJI_H
13+
14+
#include <stdint.h>
15+
#include <stdbool.h>
16+
17+
/* basemoji - an implementation the RFC 19 FLUID emoji encoding
18+
*/
19+
20+
/* Convert a 64 bit unsigned integer to basemoji, placing the result
21+
* in buffer 'buf' of size 'buflen'.
22+
*
23+
* Returns 0 on success, -1 on failure with errno set:
24+
* EINVAL: Invalid arguments
25+
* EOVERFLOW: buffer too small for encoded string
26+
*/
27+
int uint64_basemoji_encode (uint64_t id, char *buf, int buflen);
28+
29+
/* Decode a string in basemoji to an unsigned 64 bit integer.
30+
*
31+
* Returns 0 on success, -1 on failure with errno set:
32+
* EINVAL: Invalid arguments
33+
*/
34+
int uint64_basemoji_decode (const char *str, uint64_t *idp);
35+
36+
/* Return true if 's' could be a basemoji string, i.e. it falls
37+
* within the minimum and maximum lengths, and starts with the
38+
* expected bytes.
39+
*/
40+
bool is_basemoji_string (const char *s);
41+
42+
#endif /* !_UTIL_BASEMOJI_H */
43+
44+
/*
45+
* vi:tabstop=4 shiftwidth=4 expandtab
46+
*/

0 commit comments

Comments
 (0)