Skip to content

Commit c6c0f06

Browse files
committed
Add normalized utf-8 creator
1 parent 07a9aed commit c6c0f06

File tree

6 files changed

+137
-17
lines changed

6 files changed

+137
-17
lines changed

src/main/java/at/favre/lib/bytes/Bytes.java

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import java.nio.charset.Charset;
3131
import java.nio.charset.StandardCharsets;
3232
import java.security.SecureRandom;
33+
import java.text.Normalizer;
3334
import java.util.*;
3435

3536
/**
@@ -40,7 +41,8 @@
4041
* <li>Helper functions like: indexOf, count, entropy</li>
4142
* <li>Transformations like: append, reverse, xor, and, resize, ...</li>
4243
* <li>Conversation to other types: primitives, List, object array, ByteBuffer, BigInteger, ...</li>
43-
* <li>Making it mutable</li>
44+
* <li>Validation: built-in or provided</li>
45+
* <li>Making it mutable or read-only</li>
4446
* </ul>
4547
* <p>
4648
* It supports byte ordering (little/big endianness).
@@ -61,7 +63,7 @@ public class Bytes implements Comparable<Bytes>, AbstractBytes {
6163
/* FACTORY ***************************************************************************************************/
6264

6365
/**
64-
* Creates a new instance with an empty array filled zeros.
66+
* Creates a new instance with an empty array filled with zeros.
6567
*
6668
* @param length of the internal array
6769
* @return new instance
@@ -277,6 +279,17 @@ public static Bytes from(String utf8String) {
277279
return from(utf8String, StandardCharsets.UTF_8);
278280
}
279281

282+
/**
283+
* Creates a new instance from normalized form of given utf-8 encoded string
284+
*
285+
* @param utf8String to get the internal byte array from
286+
* @param form to normalize, usually you want {@link java.text.Normalizer.Form#NFKD} for compatibility
287+
* @return new instance
288+
*/
289+
public static Bytes from(String utf8String, Normalizer.Form form) {
290+
return from(Normalizer.normalize(utf8String, form), StandardCharsets.UTF_8);
291+
}
292+
280293
/**
281294
* Creates a new instance from given string
282295
*
@@ -363,7 +376,6 @@ public static Bytes random(int length) {
363376
return random(length, new SecureRandom());
364377
}
365378

366-
367379
/**
368380
* A new instance with random bytes.
369381
*
@@ -402,7 +414,10 @@ public static Bytes random(int length, Random random) {
402414
/* TRANSFORMER **********************************************************************************************/
403415

404416
/**
405-
* Creates a new instance with the current array appended with the provided data (ie. append at the end)
417+
* Creates a new instance with the current array appended to the provided data (ie. append at the end).
418+
* <p>
419+
* This will create a new byte array internally, so it is not suitable to use as extensive builder pattern -
420+
* use {@link ByteBuffer} or {@link java.io.ByteArrayOutputStream} for that.
406421
*
407422
* @param bytes to append
408423
* @return appended instance
@@ -412,17 +427,17 @@ public Bytes append(Bytes bytes) {
412427
}
413428

414429
/**
415-
* Creates a new instance with the current array appended with the provided data (ie. append at the end)
430+
* Creates a new instance with the current array appended to the provided data (ie. append at the end)
416431
*
417432
* @param singleByte to append
418433
* @return appended instance
419434
*/
420435
public Bytes append(byte singleByte) {
421-
return append(new byte[]{singleByte});
436+
return append(Bytes.from(singleByte));
422437
}
423438

424439
/**
425-
* Creates a new instance with the current array appended with the provided data (ie. append at the end)
440+
* Creates a new instance with the current array appended to the provided data (ie. append at the end)
426441
*
427442
* @param char2Bytes to append
428443
* @return appended instance
@@ -432,7 +447,7 @@ public Bytes append(char char2Bytes) {
432447
}
433448

434449
/**
435-
* Creates a new instance with the current array appended with the provided data (ie. append at the end)
450+
* Creates a new instance with the current array appended to the provided data (ie. append at the end)
436451
*
437452
* @param short2Bytes to append
438453
* @return appended instance
@@ -442,7 +457,7 @@ public Bytes append(short short2Bytes) {
442457
}
443458

444459
/**
445-
* Creates a new instance with the current array appended with the provided data (ie. append at the end)
460+
* Creates a new instance with the current array appended to the provided data (ie. append at the end)
446461
*
447462
* @param integer4Bytes to append
448463
* @return appended instance
@@ -452,7 +467,7 @@ public Bytes append(int integer4Bytes) {
452467
}
453468

454469
/**
455-
* Creates a new instance with the current array appended with the provided data (ie. append at the end)
470+
* Creates a new instance with the current array appended to the provided data (ie. append at the end)
456471
*
457472
* @param long8Bytes to append
458473
* @return appended instance
@@ -462,7 +477,7 @@ public Bytes append(long long8Bytes) {
462477
}
463478

464479
/**
465-
* Creates a new instance with the current array appended with the provided data (ie. append at the end)
480+
* Creates a new instance with the current array appended to the provided data (ie. append at the end)
466481
*
467482
* @param secondArray to append
468483
* @return appended instance
@@ -746,7 +761,7 @@ public boolean isEmpty() {
746761
}
747762

748763
/**
749-
* Get the set byte order or endianness. Default in Java is {@link ByteOrder#BIG_ENDIAN}.
764+
* Get the set byte order/endianness. Default in Java is {@link ByteOrder#BIG_ENDIAN}.
750765
*
751766
* @return either {@link ByteOrder#BIG_ENDIAN} or {@link ByteOrder#LITTLE_ENDIAN}
752767
* @see <a href="https://en.wikipedia.org/wiki/Endianness">Endianness</a>
@@ -1132,7 +1147,7 @@ public BitSet toBitSet() {
11321147
}
11331148

11341149
/**
1135-
* If the underlying byte array is smaller than 1 byte / 8 bit returns unsigned two-complement
1150+
* If the underlying byte array is smaller than or equal to 1 byte / 8 bit returns unsigned two-complement
11361151
* representation for a Java byte value.
11371152
*
11381153
* @return the byte representation
@@ -1147,7 +1162,7 @@ public byte toByte() {
11471162
}
11481163

11491164
/**
1150-
* If the underlying byte array is smaller than 2 byte / 16 bit returns unsigned two-complement
1165+
* If the underlying byte array is smaller than or equal to 2 byte / 16 bit returns unsigned two-complement
11511166
* representation for a Java char integer value. The output is dependent on the set {@link #byteOrder()}.
11521167
*
11531168
* @return the int representation
@@ -1162,7 +1177,7 @@ public char toChar() {
11621177
}
11631178

11641179
/**
1165-
* If the underlying byte array is smaller than 2 byte / 16 bit returns signed two-complement
1180+
* If the underlying byte array is smaller than or equal to 2 byte / 16 bit returns signed two-complement
11661181
* representation for a Java short integer value. The output is dependent on the set {@link #byteOrder()}.
11671182
*
11681183
* @return the int representation
@@ -1177,7 +1192,7 @@ public short toShort() {
11771192
}
11781193

11791194
/**
1180-
* If the underlying byte array is smaller than 4 byte / 32 bit returns signed two-complement
1195+
* If the underlying byte array is smaller than or equal to 4 byte / 32 bit returns signed two-complement
11811196
* representation for a Java signed integer value. The output is dependent on the set {@link #byteOrder()}.
11821197
*
11831198
* @return the int representation
@@ -1192,7 +1207,7 @@ public int toInt() {
11921207
}
11931208

11941209
/**
1195-
* If the underlying byte array is smaller than 8 byte / 64 bit returns signed two-complement
1210+
* If the underlying byte array is smaller than or equal to 8 byte / 64 bit returns signed two-complement
11961211
* representation for a Java signed long integer value. The output is dependent on the set {@link #byteOrder()}.
11971212
*
11981213
* @return the long representation

src/main/java/at/favre/lib/bytes/BytesValidator.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,24 @@
1+
/*
2+
* Copyright 2017 Patrick Favre-Bulle
3+
*
4+
* Licensed to the Apache Software Foundation (ASF) under one
5+
* or more contributor license agreements. See the NOTICE file
6+
* distributed with this work for additional information
7+
* regarding copyright ownership. The ASF licenses this file
8+
* to you under the Apache License, Version 2.0 (the
9+
* "License"); you may not use this file except in compliance
10+
* with the License. You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing,
15+
* software distributed under the License is distributed on an
16+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17+
* KIND, either express or implied. See the License for the
18+
* specific language governing permissions and limitations
19+
* under the License.
20+
*/
21+
122
package at.favre.lib.bytes;
223

324
/**

src/main/java/at/favre/lib/bytes/BytesValidators.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,24 @@
1+
/*
2+
* Copyright 2017 Patrick Favre-Bulle
3+
*
4+
* Licensed to the Apache Software Foundation (ASF) under one
5+
* or more contributor license agreements. See the NOTICE file
6+
* distributed with this work for additional information
7+
* regarding copyright ownership. The ASF licenses this file
8+
* to you under the Apache License, Version 2.0 (the
9+
* "License"); you may not use this file except in compliance
10+
* with the License. You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing,
15+
* software distributed under the License is distributed on an
16+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17+
* KIND, either express or implied. See the License for the
18+
* specific language governing permissions and limitations
19+
* under the License.
20+
*/
21+
122
package at.favre.lib.bytes;
223

324
/**

src/test/java/at/favre/lib/bytes/Base64Test.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,24 @@
1+
/*
2+
* Copyright 2017 Patrick Favre-Bulle
3+
*
4+
* Licensed to the Apache Software Foundation (ASF) under one
5+
* or more contributor license agreements. See the NOTICE file
6+
* distributed with this work for additional information
7+
* regarding copyright ownership. The ASF licenses this file
8+
* to you under the Apache License, Version 2.0 (the
9+
* "License"); you may not use this file except in compliance
10+
* with the License. You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing,
15+
* software distributed under the License is distributed on an
16+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17+
* KIND, either express or implied. See the License for the
18+
* specific language governing permissions and limitations
19+
* under the License.
20+
*/
21+
122
package at.favre.lib.bytes;
223

324
import org.junit.Test;

src/test/java/at/favre/lib/bytes/BinaryToTextEncodingTest.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,24 @@
1+
/*
2+
* Copyright 2017 Patrick Favre-Bulle
3+
*
4+
* Licensed to the Apache Software Foundation (ASF) under one
5+
* or more contributor license agreements. See the NOTICE file
6+
* distributed with this work for additional information
7+
* regarding copyright ownership. The ASF licenses this file
8+
* to you under the Apache License, Version 2.0 (the
9+
* "License"); you may not use this file except in compliance
10+
* with the License. You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing,
15+
* software distributed under the License is distributed on an
16+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17+
* KIND, either express or implied. See the License for the
18+
* specific language governing permissions and limitations
19+
* under the License.
20+
*/
21+
122
package at.favre.lib.bytes;
223

324
import org.junit.Test;

src/test/java/at/favre/lib/bytes/UtilTest.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,24 @@
1+
/*
2+
* Copyright 2017 Patrick Favre-Bulle
3+
*
4+
* Licensed to the Apache Software Foundation (ASF) under one
5+
* or more contributor license agreements. See the NOTICE file
6+
* distributed with this work for additional information
7+
* regarding copyright ownership. The ASF licenses this file
8+
* to you under the Apache License, Version 2.0 (the
9+
* "License"); you may not use this file except in compliance
10+
* with the License. You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing,
15+
* software distributed under the License is distributed on an
16+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17+
* KIND, either express or implied. See the License for the
18+
* specific language governing permissions and limitations
19+
* under the License.
20+
*/
21+
122
package at.favre.lib.bytes;
223

324
import org.junit.Test;

0 commit comments

Comments
 (0)