|
96 | 96 | :flag (long-array (repeatedly 1000000 #(rand-int 2)))})</code></pre></div></div><div style="margin:15px;"><div><p>Single-pass fused filter + aggregate:</p></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure source-clojure bg-light">(time |
97 | 97 | (st/q {:from large-ds |
98 | 98 | :where [[:> :amount 500.0] [:= :flag 1]] |
99 | | - :agg [[:sum :amount] [:count]]}))</code></pre></div></div><div style="margin:15px;"><div><strong>OUT</strong><pre><code>"Elapsed time: 477.039557 msecs" |
100 | | -</code></pre></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure printed-clojure">[{:_count 249791, :sum 1.8735921822152323E8, :count 249791}] |
| 99 | + :agg [[:sum :amount] [:count]]}))</code></pre></div></div><div style="margin:15px;"><div><strong>OUT</strong><pre><code>"Elapsed time: 745.622644 msecs" |
| 100 | +</code></pre></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure printed-clojure">[{:_count 250044, :sum 1.875662563822782E8, :count 250044}] |
101 | 101 | </code></pre></div></div><div style="margin:15px;"><div><p>Dense group-by (direct array indexing, no hash for ≤200K groups):</p></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure source-clojure bg-light">(time |
102 | 102 | (st/q {:from large-ds |
103 | 103 | :group [:region] |
104 | | - :agg [[:sum :amount] [:avg :amount] [:count]]}))</code></pre></div></div><div style="margin:15px;"><div><strong>OUT</strong><pre><code>"Elapsed time: 125.550777 msecs" |
| 104 | + :agg [[:sum :amount] [:avg :amount] [:count]]}))</code></pre></div></div><div style="margin:15px;"><div><strong>OUT</strong><pre><code>"Elapsed time: 125.451823 msecs" |
105 | 105 | </code></pre></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure printed-clojure">[{:region 0, |
106 | | - :_count 99832, |
107 | | - :sum 4.984979812538232E7, |
108 | | - :avg 499.3368671907036, |
109 | | - :count 99832} |
| 106 | + :_count 100070, |
| 107 | + :sum 4.988999541814934E7, |
| 108 | + :avg 498.5509685035409, |
| 109 | + :count 100070} |
110 | 110 | {:region 1, |
111 | | - :_count 99849, |
112 | | - :sum 4.9880746600852E7, |
113 | | - :avg 499.5618043330629, |
114 | | - :count 99849} |
| 111 | + :_count 100492, |
| 112 | + :sum 5.022953172006592E7, |
| 113 | + :avg 499.8361234731712, |
| 114 | + :count 100492} |
115 | 115 | {:region 2, |
116 | | - :_count 99878, |
117 | | - :sum 4.983648865253064E7, |
118 | | - :avg 498.97363435922466, |
119 | | - :count 99878} |
| 116 | + :_count 99399, |
| 117 | + :sum 4.974791869803327E7, |
| 118 | + :avg 500.4871145387104, |
| 119 | + :count 99399} |
120 | 120 | {:region 3, |
121 | | - :_count 99903, |
122 | | - :sum 4.981424866684195E7, |
123 | | - :avg 498.62615403783616, |
124 | | - :count 99903} |
| 121 | + :_count 99939, |
| 122 | + :sum 4.9978934858833194E7, |
| 123 | + :avg 500.09440617609937, |
| 124 | + :count 99939} |
125 | 125 | {:region 4, |
126 | | - :_count 99687, |
127 | | - :sum 4.996144638635216E7, |
128 | | - :avg 501.18316717678493, |
129 | | - :count 99687} |
| 126 | + :_count 100233, |
| 127 | + :sum 5.009205577906139E7, |
| 128 | + :avg 499.7561260169943, |
| 129 | + :count 100233} |
130 | 130 | {:region 5, |
131 | | - :_count 99764, |
132 | | - :sum 4.98234758907312E7, |
133 | | - :avg 499.4133744710637, |
134 | | - :count 99764} |
| 131 | + :_count 99664, |
| 132 | + :sum 4.9897951325023964E7, |
| 133 | + :avg 500.66173668550294, |
| 134 | + :count 99664} |
135 | 135 | {:region 6, |
136 | | - :_count 100745, |
137 | | - :sum 5.048420472551149E7, |
138 | | - :avg 501.1087867935033, |
139 | | - :count 100745} |
| 136 | + :_count 100182, |
| 137 | + :sum 5.004161000445469E7, |
| 138 | + :avg 499.5069973094437, |
| 139 | + :count 100182} |
140 | 140 | {:region 7, |
141 | | - :_count 99851, |
142 | | - :sum 4.985141050218023E7, |
143 | | - :avg 499.2579994409694, |
144 | | - :count 99851} |
| 141 | + :_count 99955, |
| 142 | + :sum 5.004523374769073E7, |
| 143 | + :avg 500.6776424159945, |
| 144 | + :count 99955} |
145 | 145 | {:region 8, |
146 | | - :_count 100003, |
147 | | - :sum 4.993377624752043E7, |
148 | | - :avg 499.32278279172056, |
149 | | - :count 100003} |
| 146 | + :_count 100198, |
| 147 | + :sum 5.0043233552947626E7, |
| 148 | + :avg 499.44343752318036, |
| 149 | + :count 100198} |
150 | 150 | {:region 9, |
151 | | - :_count 100488, |
152 | | - :sum 5.019928136613044E7, |
153 | | - :avg 499.55498533287994, |
154 | | - :count 100488}] |
| 151 | + :_count 99868, |
| 152 | + :sum 5.002264971353778E7, |
| 153 | + :avg 500.88766885827073, |
| 154 | + :count 99868}] |
155 | 155 | </code></pre></div></div><div style="margin:15px;"><div><h2 id="4.-zone-map-pruning">4. Zone Map Pruning</h2><p>For index-backed datasets, Stratum tracks min/max per 8192-row chunk. Range queries classify each chunk as: skip / stats-only / SIMD — so a predicate like ts >= 900000 only touches ~1% of chunks.</p></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure source-clojure bg-light">(def time-series |
156 | 156 | (st/make-dataset |
157 | 157 | {:ts (st/index-from-seq :int64 (range 0 1000000)) |
158 | 158 | :value (st/index-from-seq :float64 (repeatedly 1000000 rand))}))</code></pre></div></div><div style="margin:15px;"><div><p>Only the last ~1% of chunks are scanned:</p></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure source-clojure bg-light">(time |
159 | 159 | (st/q {:from time-series |
160 | 160 | :where [[:>= :ts 900000] [:< :ts 910000]] |
161 | | - :agg [[:sum :value] [:count]]}))</code></pre></div></div><div style="margin:15px;"><div><strong>OUT</strong><pre><code>"Elapsed time: 13.857909 msecs" |
162 | | -</code></pre></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure printed-clojure">[{:_count 10000, :sum 4976.255276477182, :count 10000}] |
| 161 | + :agg [[:sum :value] [:count]]}))</code></pre></div></div><div style="margin:15px;"><div><strong>OUT</strong><pre><code>"Elapsed time: 17.712668 msecs" |
| 162 | +</code></pre></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure printed-clojure">[{:_count 10000, :sum 4970.948979574672, :count 10000}] |
163 | 163 | </code></pre></div></div><div style="margin:15px;"><div><h2 id="5.-persistence:-version-control-for-analytics">5. Persistence: Version Control for Analytics</h2><p>Datasets are immutable Clojure values. st/sync! durably persists to a Konserve store (file, S3, memory …). st/fork is O(1) — structural sharing with copy-on-write on mutation.</p></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure source-clojure bg-light">(require '[konserve.store :as kstore])</code></pre></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure source-clojure bg-light">(def store-cfg |
164 | 164 | {:backend :file |
165 | 165 | :path "/tmp/stratum-intro" |
|
170 | 170 | {:product (st/index-from-seq :int64 [101 102 103]) |
171 | 171 | :qty (st/index-from-seq :float64 [100.0 50.0 75.0]) |
172 | 172 | :revenue (st/index-from-seq :float64 [1000.0 750.0 1125.0])} |
173 | | - {:name "sales-q1"}))</code></pre></div></div><div style="margin:15px;"><div><p>Persist to "main" branch — returns new dataset with commit metadata:</p></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure source-clojure bg-light">(def v1 (st/sync! base store "main"))</code></pre></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure source-clojure bg-light">(:id (:commit-info v1))</code></pre></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure printed-clojure">#uuid "638e09e5-a522-47c6-9f40-f1cc85039d4f" |
| 173 | + {:name "sales-q1"}))</code></pre></div></div><div style="margin:15px;"><div><p>Persist to "main" branch — returns new dataset with commit metadata:</p></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure source-clojure bg-light">(def v1 (st/sync! base store "main"))</code></pre></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure source-clojure bg-light">(:id (:commit-info v1))</code></pre></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure printed-clojure">#uuid "4816039e-a864-4f4a-a91a-fefcddbee750" |
174 | 174 | </code></pre></div></div><div style="margin:15px;"><div><p>=> #uuid "..."</p><p>Fork for a what-if scenario. Fork is O(1) — all chunks shared. Mutations use the transient/persistent protocol (like Clojure's collections):</p></div></div><div style="margin:15px;"><div><pre><code class="sourceCode language-clojure source-clojure bg-light">(def what-if |
175 | 175 | (-> (st/fork base) |
176 | 176 | transient |
|
0 commit comments