Skip to content

Commit dd91998

Browse files
authored
Add more detailed docs, examples and readme (#23)
1 parent 79cc3b9 commit dd91998

File tree

11 files changed

+1086
-663
lines changed

11 files changed

+1086
-663
lines changed

.github/release.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ changelog:
44
labels: ["new"]
55
- title: 🐛 Fixes
66
labels: ["fix", "bug"]
7-
- title: 📝 Dependencies
7+
- title: 📖 Documentation
8+
labels: ["documentation", "docs"]
9+
- title: 🔗 Dependencies
810
labels: ["dependencies"]
911
- title: 📦 Other
1012
labels: ["*"]

README.md

Lines changed: 275 additions & 304 deletions
Large diffs are not rendered by default.

batch.go

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,29 @@ import (
66
"github.com/destel/rill/internal/core"
77
)
88

9-
// Batch groups items from an input channel into batches based on a maximum size and a timeout.
10-
// A batch is emitted when it reaches the maximum size, the timeout expires, or the input channel closes.
11-
// To emit batches only when full, set the timeout to -1. This function never emits empty batches.
12-
// The timeout countdown starts when the first item is added to a new batch.
13-
// Zero timeout is not supported and will panic.
9+
// Batch take a stream of items and returns a stream of batches based on a maximum size and a timeout.
10+
//
11+
// A batch is emitted when one of the following conditions is met:
12+
// - The batch reaches the size of n items
13+
// - The time since the first item was added to the batch exceeds the timeout
14+
// - The input stream is closed
15+
//
16+
// This function never emits empty batches. To disable the timeout and emit batches only based on the size,
17+
// set the timeout to -1. Setting the timeout to zero is not supported and will result in a panic
18+
//
19+
// This is a non-blocking ordered function that processes items sequentially.
20+
//
21+
// See the package documentation for more information on non-blocking ordered functions and error handling.
1422
func Batch[A any](in <-chan Try[A], n int, timeout time.Duration) <-chan Try[[]A] {
1523
values, errs := ToChans(in)
1624
batches := core.Batch(values, n, timeout)
1725
return FromChans(batches, errs)
1826
}
1927

20-
// Unbatch is the inverse of [Batch]. It takes a channel of batches and emits individual items.
28+
// Unbatch is the inverse of [Batch]. It takes a stream of batches and returns a stream of individual items.
29+
//
30+
// This is a non-blocking ordered function that processes items sequentially.
31+
// See the package documentation for more information on non-blocking ordered functions and error handling.
2132
func Unbatch[A any](in <-chan Try[[]A]) <-chan Try[A] {
2233
batches, errs := ToChans(in)
2334
values := core.Unbatch(batches)

consume.go

Lines changed: 27 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@ import (
77
"github.com/destel/rill/internal/core"
88
)
99

10-
// ForEach applies a function f to each item in an input channel using n goroutines for parallel processing. The function
11-
// blocks until all items are processed or an error is encountered, either from the function f itself or from upstream.
12-
// In case of an error leading to early termination, ForEach ensures the input channel is drained to avoid goroutine leaks,
13-
// making it safe for use in environments where cleanup is crucial. The function returns the first encountered error, or nil
14-
// if all items were processed successfully.
15-
// While this function does not guarantee the order of item processing due to its concurrent nature,
16-
// using n = 1 results in sequential processing, as in a simple for-range loop.
10+
// ForEach applies a function f to each item in an input stream.
11+
//
12+
// This is a blocking unordered function that processes items concurrently using n goroutines.
13+
// The case when n = 1 is optimized: it does not spawn additional goroutines and processes items sequentially,
14+
// making the function ordered and similar to a regular for-range loop.
15+
//
16+
// See the package documentation for more information on blocking unordered functions and error handling.
1717
func ForEach[A any](in <-chan Try[A], n int, f func(A) error) error {
1818
if n == 1 {
1919
for a := range in {
@@ -66,15 +66,10 @@ func onceFunc1[T any](f func(T)) func(T) {
6666
}
6767
}
6868

69-
// Err returns the first error encountered in the input channel.
70-
// This function blocks until:
71-
// - An error is found.
72-
// - The end of the input channel is reached.
69+
// Err returns the first error encountered in the input stream or nil if there were no errors.
7370
//
74-
// If Err terminates early (before the input channel is fully consumed),
75-
// it initiates background draining of the remaining items in the channel. This is done
76-
// to prevent goroutine leaks by ensuring that all goroutines feeding the channel are allowed to complete.
77-
// The input channel should not be used anymore after calling this function.
71+
// This is a blocking ordered function that processes items sequentially.
72+
// See the package documentation for more information on blocking ordered functions and error handling.
7873
func Err[A any](in <-chan Try[A]) error {
7974
defer DrainNB(in)
8075

@@ -87,16 +82,11 @@ func Err[A any](in <-chan Try[A]) error {
8782
return nil
8883
}
8984

90-
// First returns the first value or error encountered in the input channel.
91-
// This function blocks until:
92-
// - A value is found. In this case, the found flag is set to true.
93-
// - The end of the input channel is reached. In this case, the found flag is set to false.
94-
// - An error is encountered in the input channel.
85+
// First returns the first item or error encountered in the input stream, whichever comes first.
86+
// The found return flag is set to false if the stream was empty, otherwise it is set to true.
9587
//
96-
// If First terminates early (before the input channel is fully consumed),
97-
// it initiates background draining of the remaining items in the channel. This is done
98-
// to prevent goroutine leaks by ensuring that all goroutines feeding the channel are allowed to complete.
99-
// The input channel should not be used anymore after calling this function.
88+
// This is a blocking ordered function that processes items sequentially.
89+
// See the package documentation for more information on blocking ordered functions and error handling.
10090
func First[A any](in <-chan Try[A]) (value A, found bool, err error) {
10191
defer DrainNB(in)
10292

@@ -108,16 +98,14 @@ func First[A any](in <-chan Try[A]) (value A, found bool, err error) {
10898
return
10999
}
110100

111-
// Any checks if there is an item in the input channel that satisfies the condition f.
112-
// This function uses n goroutines for concurrency. It blocks execution until either:
113-
// - A matching item is found
114-
// - All items have been checked
115-
// - An error is encountered in the condition function f or from the upstream
101+
// Any checks if there is an item in the input stream that satisfies the condition f.
102+
// This function returns true as soon as it finds such an item. Otherwise, it returns false.
116103
//
117-
// In case of early termination, Any ensures the input channel is drained to avoid goroutine leaks,
118-
// making it safe for use in environments where cleanup is crucial. The function returns the first encountered error, or nil
104+
// Any is a blocking unordered function that processes items concurrently using n goroutines.
105+
// The case when n = 1 is optimized: it does not spawn additional goroutines and processes items sequentially,
106+
// making the function ordered.
119107
//
120-
// The function returns true if a match is found, false otherwise, or a first encountered error.
108+
// See the package documentation for more information on blocking unordered functions and error handling.
121109
func Any[A any](in <-chan Try[A], n int, f func(A) (bool, error)) (bool, error) {
122110
errBreak := errors.New("break")
123111
res := false
@@ -145,16 +133,15 @@ func Any[A any](in <-chan Try[A], n int, f func(A) (bool, error)) (bool, error)
145133
return res, err
146134
}
147135

148-
// All checks if all items in the input channel satisfy the condition function f.
149-
// This function uses n goroutines for concurrency and blocks execution until:
150-
// - A non-matching item is found,
151-
// - All items have been checked,
152-
// - An error is encountered in the condition function f or from the upstream.
136+
// All checks if all items in the input stream satisfy the condition f.
137+
// This function returns false as soon as it finds an item that does not satisfy the condition. Otherwise, it returns true,
138+
// including the case when the stream was empty.
153139
//
154-
// In case of early termination, All ensures the input channel is drained to avoid goroutine leaks,
155-
// making it safe for use in environments where cleanup is crucial. The function returns the first encountered error, or nil
140+
// This is a blocking unordered function that processes items concurrently using n goroutines.
141+
// The case when n = 1 is optimized: it does not spawn additional goroutines and processes items sequentially,
142+
// making the function ordered.
156143
//
157-
// Returns true if all items match the condition, false otherwise, or a first encountered error.
144+
// See the package documentation for more information on blocking unordered functions and error handling.
158145
func All[A any](in <-chan Try[A], n int, f func(A) (bool, error)) (bool, error) {
159146
// Idea: x && y && z is the same as !(!x || !y || !z)
160147
// So we can use Any with a negated condition to implement All

doc.go

Lines changed: 78 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,79 @@
1-
// Package rill is a Go toolkit designed for efficient and straightforward streaming, parallel processing, and pipeline construction.
2-
// It abstracts away the complexities of concurrency management, enabling developers to focus on core logic.
3-
// With features like lightweight integration, batch processing, error handling, and support for functional programming paradigms,
4-
// rill enhances productivity in building concurrent applications. It offers type-safe operations, and minimizes memory usage even for large data sets.
1+
// Package rill is a collection of easy-to-use functions for concurrency, streaming, batching and pipeline construction.
2+
// It abstracts away the complexities of concurrency, removes boilerplate, and provides a structured way to handle errors.
3+
// Rill is modular and can be easily integrated into existing projects: it requires no setup and allows using only the necessary functions.
4+
// At the same time, rill's functions can be composed into complex, concurrent, and reusable pipelines when needed.
5+
//
6+
// # Streams and Try Containers
7+
//
8+
// In this package, a stream refers to a channel of [Try] containers. A Try container is a simple struct that holds a value and an error.
9+
// When an "empty stream" is referred to, it means a channel of Try containers that has been closed and was never written to.
10+
//
11+
// Most functions in this package are concurrent, and the level of concurrency can be controlled by the argument n.
12+
// Some functions share common behaviors and characteristics, which are described below.
13+
//
14+
// # Non-blocking functions
15+
//
16+
// Functions such as [Map], [Filter], and [Batch] take a stream as an input and return a new stream as an output.
17+
// They do not block and return the output stream immediately. All the processing is done in the background by the goroutine pools they spawn.
18+
// These functions forward all errors from the input stream to the output stream.
19+
// Any errors returned by the user-provided functions are also sent to the output stream.
20+
// When such function reaches the end of the input stream, it closes the output stream, stops processing and cleans up resources.
21+
//
22+
// Such functions are designed to be composed together to build complex processing pipelines:
23+
//
24+
// stage2 := rill.Map(input, ...)
25+
// stage3 := rill.Batch(stage2, ...)
26+
// stage4 := rill.Map(stage3, ...)
27+
// results := rill.Unbatch(stage4, ...)
28+
// // consume the results and handle errors with some blocking function
29+
//
30+
// # Blocking functions
31+
//
32+
// Functions such as [ForEach], [Reduce] and [MapReduce] are used at the last stage of the pipeline
33+
// to consume the stream and return the final result or error.
34+
//
35+
// Usually, these functions block until one of the following conditions is met:
36+
// - The end of the stream is reached. In this case, the function returns the final result.
37+
// - An error is encountered either in the input stream or in some user-provided function. In this case, the function returns the error.
38+
//
39+
// In case of an early termination (before reaching the end of the input stream), such functions initiate
40+
// background draining of the remaining items. This is done to prevent goroutine
41+
// leaks by ensuring that all goroutines feeding the stream are allowed to complete.
42+
// The input stream should not be used anymore after calling such functions.
43+
//
44+
// It's also possible to consume the pipeline results manually, for example using a for-range loop.
45+
// In this case, add a deferred call to [DrainNB] before the loop to ensure that goroutines are not leaked.
46+
//
47+
// defer rill.DrainNB(results)
48+
//
49+
// for res := range results {
50+
// if res.Error != nil {
51+
// return res.Error
52+
// }
53+
// // process res.Value
54+
// }
55+
//
56+
// # Unordered functions
57+
//
58+
// Functions such as [Map], [Filter] and [FlatMap] write items to the output stream as soon as they become available.
59+
// Due to the concurrent nature of these functions, the order of items in the output stream may not match the order of items in the input stream.
60+
// These functions prioritize performance and concurrency over maintaining the original order.
61+
//
62+
// # Ordered functions
63+
//
64+
// Functions such as [OrderedMap] or [OrderedFilter] preserve the order of items from the input stream.
65+
// These functions are still concurrent, but use special synchronization techniques to ensure that
66+
// items are written to the output stream in the same order as they were read from the input stream.
67+
// This additional synchronization has some overhead, but it is negligible for i/o bound workloads.
68+
//
69+
// Some other functions, such as [ToSlice], [Batch] or [First] are not concurrent and are ordered by nature.
70+
//
71+
// # Error handling
72+
//
73+
// Error handling can be non-trivial in concurrent applications. Rill simplifies this by providing a structured error handling approach.
74+
// As described above, all errors are automatically propagated down the pipeline to the final stage, where they can be caught.
75+
// This allows the pipeline to terminate after the first error is encountered and return it to the caller.
76+
//
77+
// In cases where more complex error handling logic is required, the [Catch] function can be used.
78+
// It allows to catch and handle errors at any point in the pipeline, providing the flexibility to handle not only the first error, but any of them.
579
package rill

0 commit comments

Comments
 (0)