|
8 | 8 | import static com.softwaremill.jox.structured.Scopes.unsupervised; |
9 | 9 | import static java.lang.Thread.sleep; |
10 | 10 |
|
| 11 | +import java.nio.ByteBuffer; |
| 12 | +import java.nio.charset.Charset; |
| 13 | +import java.nio.charset.StandardCharsets; |
11 | 14 | import java.time.Duration; |
12 | 15 | import java.util.ArrayList; |
13 | 16 | import java.util.Arrays; |
|
55 | 58 | * Running a flow is possible using one of the `run*` methods, such as {@link Flow#runToList}, {@link Flow#runToChannel} or {@link Flow#runFold}. |
56 | 59 | */ |
57 | 60 | public class Flow<T> { |
58 | | - protected final FlowStage<T> last; |
| 61 | + final FlowStage<T> last; |
59 | 62 |
|
60 | 63 | public Flow(FlowStage<T> last) { |
61 | 64 | this.last = last; |
@@ -677,6 +680,109 @@ public Flow<Void> drain() { |
677 | 680 | return Flows.usingEmit(_ -> last.run(_ -> {})); |
678 | 681 | } |
679 | 682 |
|
| 683 | + /** Decodes a stream of chunks of bytes into UTF-8 Strings. This function is able to handle UTF-8 characters encoded on multiple bytes |
| 684 | + * that are split across chunks. |
| 685 | + * |
| 686 | + * @return |
| 687 | + * a flow of Strings decoded from incoming bytes. |
| 688 | + */ |
| 689 | + public Flow<String> decodeStringUtf8() { |
| 690 | + return ChunksUtf8Decoder.decodeStringUtf8(last); |
| 691 | + } |
| 692 | + |
| 693 | + /** |
| 694 | + * Encodes a flow of `String` into a flow of bytes using UTF-8. |
| 695 | + */ |
| 696 | + public Flow<byte[]> encodeUtf8() { |
| 697 | + return map(s -> { |
| 698 | + if (s instanceof String string) { |
| 699 | + return string.getBytes(StandardCharsets.UTF_8); |
| 700 | + } |
| 701 | + throw new IllegalArgumentException("requirement failed: method can be called only on flow containing String"); |
| 702 | + }); |
| 703 | + } |
| 704 | + |
| 705 | + /** |
| 706 | + * Transforms a flow of byte arrays such that each emitted `String` is a text line from the input decoded using UTF-8 charset. |
| 707 | + * |
| 708 | + * @return |
| 709 | + * a flow emitting lines read from the input byte chunks, assuming they represent text. |
| 710 | + */ |
| 711 | + public Flow<String> linesUtf8() { |
| 712 | + return lines(StandardCharsets.UTF_8); |
| 713 | + } |
| 714 | + |
| 715 | + /** |
| 716 | + * Transforms a flow of byte arrays such that each emitted `String` is a text line from the input. |
| 717 | + * |
| 718 | + * @param charset the charset to use for decoding the bytes into text. |
| 719 | + * @return a flow emitting lines read from the input byte arrays, assuming they represent text. |
| 720 | + */ |
| 721 | + public Flow<String> lines(Charset charset) { |
| 722 | + // buffer == Optional.empty() is a special state for handling empty chunks in onComplete, in order to tell them apart from empty lines |
| 723 | + return mapStatefulConcat(Optional::<byte[]>empty, |
| 724 | + (buffer, nextChunk) -> { |
| 725 | + if (!byte[].class.isInstance(nextChunk)) { |
| 726 | + throw new IllegalArgumentException("requirement failed: method can be called only on flow containing byte[]"); |
| 727 | + } |
| 728 | + // get next incoming chunk |
| 729 | + byte[] chunk = (byte[]) nextChunk; |
| 730 | + if (chunk.length == 0) { |
| 731 | + return Map.entry(Optional.empty(), Collections.emptyList()); |
| 732 | + } |
| 733 | + |
| 734 | + // check if chunk contains newline character, if not proceed to the next chunk |
| 735 | + int newLineIndex = getNewLineIndex(chunk); |
| 736 | + if (newLineIndex == -1) { |
| 737 | + if (buffer.isEmpty()) { |
| 738 | + return Map.entry(Optional.of(chunk), Collections.emptyList()); |
| 739 | + } |
| 740 | + var b = buffer.get(); |
| 741 | + byte[] newBuffer = Arrays.copyOf(b, b.length + chunk.length); |
| 742 | + newBuffer = ByteBuffer.wrap(newBuffer).put(b.length, chunk).array(); |
| 743 | + return Map.entry(Optional.of(newBuffer), Collections.emptyList()); |
| 744 | + } |
| 745 | + |
| 746 | + // buffer for lines, if chunk contains more than one newline character |
| 747 | + List<byte[]> lines = new ArrayList<>(); |
| 748 | + |
| 749 | + // variable used to clear buffer after using it |
| 750 | + byte[] bufferFromPreviousChunk = buffer.orElse(new byte[0]); |
| 751 | + while (chunk.length > 0 && newLineIndex != -1) { |
| 752 | + byte[] line = new byte[newLineIndex]; |
| 753 | + byte[] newChunk = new byte[chunk.length - newLineIndex - 1]; |
| 754 | + ByteBuffer.wrap(chunk) |
| 755 | + .get(line, 0, newLineIndex) |
| 756 | + .get(newLineIndex + 1, newChunk, 0, chunk.length - newLineIndex - 1); |
| 757 | + |
| 758 | + if (bufferFromPreviousChunk.length > 0) { |
| 759 | + // concat accumulated buffer and line |
| 760 | + byte[] buf = Arrays.copyOf(bufferFromPreviousChunk, bufferFromPreviousChunk.length + line.length); |
| 761 | + lines.add(ByteBuffer.wrap(buf).put(bufferFromPreviousChunk.length, line).array()); |
| 762 | + // cleanup buffer |
| 763 | + bufferFromPreviousChunk = new byte[0]; |
| 764 | + } else { |
| 765 | + lines.add(line); |
| 766 | + } |
| 767 | + chunk = newChunk; |
| 768 | + newLineIndex = getNewLineIndex(chunk); |
| 769 | + } |
| 770 | + return Map.entry(Optional.of(chunk), lines); |
| 771 | + }, |
| 772 | + buf -> buf |
| 773 | + ) |
| 774 | + .map(chunk -> new String(chunk, charset)); |
| 775 | + } |
| 776 | + |
| 777 | + private int getNewLineIndex(byte[] chunk) { |
| 778 | + for (int i = 0; i < chunk.length; i++) { |
| 779 | + if (chunk[i] == '\n') { |
| 780 | + return i; |
| 781 | + } |
| 782 | + } |
| 783 | + return -1; |
| 784 | + } |
| 785 | + |
680 | 786 | /** |
681 | 787 | * Always runs `f` after the flow completes, whether it's because all elements are emitted, or when there's an error. |
682 | 788 | */ |
@@ -929,6 +1035,28 @@ public <U> Flow<U> interleave(Flow<U> other, int segmentSize, boolean eagerCompl |
929 | 1035 | return Flows.interleaveAll(Arrays.asList((Flow<U>) this, other), segmentSize, eagerComplete, bufferCapacity); |
930 | 1036 | } |
931 | 1037 |
|
| 1038 | + /** |
| 1039 | + * Emits a given number of elements (determined by `segmentSize`) from this flow to the returned flow, then emits the same number of |
| 1040 | + * elements from the `other` flow and repeats. The order of elements in both flows is preserved. |
| 1041 | + * <p> |
| 1042 | + * If one of the flows is done before the other, the behavior depends on the `eagerComplete` flag. When set to `true`, the returned flow is |
| 1043 | + * completed immediately, otherwise the remaining elements from the other flow are emitted by the returned flow. |
| 1044 | + * <p> |
| 1045 | + * Both flows are run concurrently and asynchronously. The size of used buffer is determined by the {@link Channel#BUFFER_SIZE} that is in scope, or default {@link Channel#DEFAULT_BUFFER_SIZE} is used. |
| 1046 | + * |
| 1047 | + * @param other |
| 1048 | + * The flow whose elements will be interleaved with the elements of this flow. |
| 1049 | + * @param segmentSize |
| 1050 | + * The number of elements sent from each flow before switching to the other one. |
| 1051 | + * @param eagerComplete |
| 1052 | + * If `true`, the returned flow is completed as soon as either of the flow completes. If `false`, the remaining elements of the |
| 1053 | + * non-completed flow are sent downstream. |
| 1054 | + */ |
| 1055 | + public <U> Flow<U> interleave(Flow<U> other, int segmentSize, boolean eagerComplete) { |
| 1056 | + //noinspection unchecked |
| 1057 | + return Flows.interleaveAll(Arrays.asList((Flow<U>) this, other), segmentSize, eagerComplete); |
| 1058 | + } |
| 1059 | + |
932 | 1060 | /** |
933 | 1061 | * Applies the given mapping function `f`, to each element emitted by this source, transforming it into an `Iterable` of results, |
934 | 1062 | * then the returned flow emits the results one by one. Can be used to unfold incoming sequences of elements into single elements. |
|
0 commit comments