1+ # Licensed to the Apache Software Foundation (ASF) under one
2+ # or more contributor license agreements. See the NOTICE file
3+ # distributed with this work for additional information
4+ # regarding copyright ownership. The ASF licenses this file
5+ # to you under the Apache License, Version 2.0 (the
6+ # "License"); you may not use this file except in compliance
7+ # with the License. You may obtain a copy of the License at
8+ #
9+ # http://www.apache.org/licenses/LICENSE-2.0
10+ #
11+ # Unless required by applicable law or agreed to in writing, software
12+ # distributed under the License is distributed on an "AS IS" BASIS,
13+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+ # See the License for the specific language governing permissions and
15+ # limitations under the License.
16+
17+ """
18+ Arrow.jl Sparse Tensor Demo
19+
20+ This example demonstrates the usage of sparse tensor formats supported
21+ by Arrow.jl:
22+ - COO (Coordinate): General sparse tensor format
23+ - CSR/CSC (Compressed Sparse Row/Column): Efficient 2D sparse matrices
24+ - CSF (Compressed Sparse Fiber): Advanced N-dimensional sparse tensors
25+
26+ The demo shows construction, manipulation, and serialization of sparse tensors.
27+ """
28+
29+ using Arrow
30+ using SparseArrays
31+ using LinearAlgebra
32+
33+ println(" === Arrow.jl Sparse Tensor Demo ===\n " )
34+
35+ # ============================================================================
36+ # COO (Coordinate) Format Demo
37+ # ============================================================================
38+ println(" 1. COO (Coordinate) Format" )
39+ println(" - General purpose sparse tensor format" )
40+ println(" - Stores explicit coordinates and values for each non-zero element" )
41+ println()
42+
43+ # Create a 4×4 sparse matrix with some non-zero elements
44+ println(" Creating a 4×4 sparse matrix:" )
45+ indices = [1 2 3 4 2 ; 1 2 3 1 4 ] # 2×5 matrix: coordinates (row, col)
46+ data = [1.0 , 4.0 , 9.0 , 2.0 , 8.0 ] # Values at those coordinates
47+ shape = (4 , 4 )
48+
49+ coo_tensor = Arrow. SparseTensorCOO{Float64,2 }(indices, data, shape)
50+ println(" COO Tensor: $coo_tensor " )
51+ println(" Matrix representation:" )
52+ for i in 1 : 4
53+ row = [coo_tensor[i, j] for j in 1 : 4 ]
54+ println(" $row " )
55+ end
56+ println(" Non-zero elements: $(Arrow. nnz(coo_tensor)) " )
57+ println()
58+
59+ # Demonstrate 3D COO tensor
60+ println(" Creating a 3×3×3 sparse 3D tensor:" )
61+ indices_3d = [1 2 3 1 ; 1 2 1 3 ; 1 1 3 3 ] # 3×4 matrix
62+ data_3d = [1.0 , 2.0 , 3.0 , 4.0 ]
63+ shape_3d = (3 , 3 , 3 )
64+
65+ coo_3d = Arrow. SparseTensorCOO{Float64,3 }(indices_3d, data_3d, shape_3d)
66+ println(" 3D COO Tensor: $coo_3d " )
67+ println(" Sample elements:" )
68+ println(" [1,1,1] = $(coo_3d[1 ,1 ,1 ]) " )
69+ println(" [2,2,1] = $(coo_3d[2 ,2 ,1 ]) " )
70+ println(" [1,1,3] = $(coo_3d[1 ,1 ,3 ]) " )
71+ println(" [1,2,2] = $(coo_3d[1 ,2 ,2 ]) (zero element)" )
72+ println()
73+
74+ # ============================================================================
75+ # CSR/CSC (Compressed Sparse Row/Column) Format Demo
76+ # ============================================================================
77+ println(" 2. CSX (Compressed Sparse Row/Column) Format" )
78+ println(" - Efficient for 2D sparse matrices" )
79+ println(" - CSR compresses rows, CSC compresses columns" )
80+ println()
81+
82+ # Create the same 4×4 matrix in CSR format
83+ println(" Same 4×4 matrix in CSR (Compressed Sparse Row) format:" )
84+ # Matrix: [1.0 0 0 0 ]
85+ # [0 4.0 0 8.0]
86+ # [0 0 9.0 0 ]
87+ # [2.0 0 0 0 ]
88+ indptr_csr = [1 , 2 , 4 , 5 , 6 ] # Row pointers: where each row starts in data/indices
89+ indices_csr = [1 , 2 , 4 , 3 , 1 ] # Column indices for each value
90+ data_csr = [1.0 , 4.0 , 8.0 , 9.0 , 2.0 ]
91+
92+ csr_tensor = Arrow. SparseTensorCSX{Float64}(indptr_csr, indices_csr, data_csr, (4 , 4 ), :row)
93+ println(" CSR Tensor: $csr_tensor " )
94+ println(" Matrix representation:" )
95+ for i in 1 : 4
96+ row = [csr_tensor[i, j] for j in 1 : 4 ]
97+ println(" $row " )
98+ end
99+ println()
100+
101+ # Create the same matrix in CSC format
102+ println(" Same matrix in CSC (Compressed Sparse Column) format:" )
103+ indptr_csc = [1 , 3 , 4 , 5 , 6 ] # Column pointers
104+ indices_csc = [1 , 4 , 2 , 3 , 2 ] # Row indices for each value
105+ data_csc = [1.0 , 2.0 , 4.0 , 9.0 , 8.0 ]
106+
107+ csc_tensor = Arrow. SparseTensorCSX{Float64}(indptr_csc, indices_csc, data_csc, (4 , 4 ), :col)
108+ println(" CSC Tensor: $csc_tensor " )
109+
110+ # Verify both formats give same results
111+ println(" Verification - CSR and CSC should give same values:" )
112+ println(" CSR[2,2] = $(csr_tensor[2 ,2 ]) , CSC[2,2] = $(csc_tensor[2 ,2 ]) " )
113+ println(" CSR[2,4] = $(csr_tensor[2 ,4 ]) , CSC[2,4] = $(csc_tensor[2 ,4 ]) " )
114+ println()
115+
116+ # ============================================================================
117+ # Integration with Julia SparseArrays
118+ # ============================================================================
119+ println(" 3. Integration with Julia SparseArrays" )
120+ println(" - Convert Julia SparseMatrixCSC to Arrow sparse tensors" )
121+ println()
122+
123+ # Create a Julia sparse matrix
124+ println(" Creating Julia SparseMatrixCSC:" )
125+ I_julia = [1 , 3 , 2 , 4 , 2 ]
126+ J_julia = [1 , 3 , 2 , 1 , 4 ]
127+ V_julia = [10.0 , 30.0 , 20.0 , 40.0 , 25.0 ]
128+ julia_sparse = sparse(I_julia, J_julia, V_julia, 4 , 4 )
129+ println(" Julia sparse matrix:" )
130+ display(julia_sparse)
131+ println()
132+
133+ # Convert to Arrow COO format
134+ println(" Converting to Arrow COO format:" )
135+ coo_from_julia = Arrow. SparseTensorCOO(julia_sparse)
136+ println(" Arrow COO: $coo_from_julia " )
137+ println(" Verification - [3,3] = $(coo_from_julia[3 ,3 ]) (should be 30.0)" )
138+ println()
139+
140+ # Convert to Arrow CSC format (natural fit)
141+ println(" Converting to Arrow CSC format:" )
142+ csc_from_julia = Arrow. SparseTensorCSX(julia_sparse, :col)
143+ println(" Arrow CSC: $csc_from_julia " )
144+ println()
145+
146+ # Convert to Arrow CSR format
147+ println(" Converting to Arrow CSR format:" )
148+ csr_from_julia = Arrow. SparseTensorCSX(julia_sparse, :row)
149+ println(" Arrow CSR: $csr_from_julia " )
150+ println()
151+
152+ # ============================================================================
153+ # CSF (Compressed Sparse Fiber) Format Demo
154+ # ============================================================================
155+ println(" 4. CSF (Compressed Sparse Fiber) Format" )
156+ println(" - Most advanced format for high-dimensional sparse tensors" )
157+ println(" - Provides excellent compression for structured sparse data" )
158+ println()
159+
160+ # Create a simple 3D CSF tensor (simplified structure)
161+ println(" Creating a 2×2×2 CSF tensor:" )
162+ indices_buffers_csf = [
163+ [1 , 2 ], # Indices for dimension 1
164+ [1 , 2 ], # Indices for dimension 2
165+ [1 , 2 ] # Indices for dimension 3
166+ ]
167+ indptr_buffers_csf = [
168+ [1 , 2 , 3 ], # Pointers for level 0
169+ [1 , 2 , 3 ] # Pointers for level 1
170+ ]
171+ data_csf = [100.0 , 200.0 ]
172+ shape_csf = (2 , 2 , 2 )
173+
174+ csf_tensor = Arrow. SparseTensorCSF{Float64,3 }(indices_buffers_csf, indptr_buffers_csf, data_csf, shape_csf)
175+ println(" CSF Tensor: $csf_tensor " )
176+ println(" Note: CSF format is complex - this is a simplified demonstration" )
177+ println()
178+
179+ # ============================================================================
180+ # Serialization and Metadata Demo
181+ # ============================================================================
182+ println(" 5. Serialization and Metadata" )
183+ println(" - Sparse tensors can be serialized with format metadata" )
184+ println()
185+
186+ # Generate metadata for different formats
187+ println(" COO metadata:" )
188+ coo_metadata = Arrow. sparse_tensor_metadata(coo_tensor)
189+ println(" $coo_metadata " )
190+ println()
191+
192+ println(" CSR metadata:" )
193+ csr_metadata = Arrow. sparse_tensor_metadata(csr_tensor)
194+ println(" $csr_metadata " )
195+ println()
196+
197+ # Demonstrate serialization round-trip
198+ println(" Serialization round-trip test:" )
199+ buffers, metadata = Arrow. serialize_sparse_tensor(coo_tensor)
200+ reconstructed = Arrow. deserialize_sparse_tensor(buffers, metadata, Float64)
201+ println(" Original: $coo_tensor " )
202+ println(" Reconstructed: $reconstructed " )
203+ println(" Round-trip successful: $(reconstructed[1 ,1 ] == coo_tensor[1 ,1 ] && Arrow. nnz(reconstructed) == Arrow. nnz(coo_tensor)) " )
204+ println()
205+
206+ # ============================================================================
207+ # Performance and Sparsity Analysis
208+ # ============================================================================
209+ println(" 6. Performance and Sparsity Analysis" )
210+ println(" - Demonstrate efficiency gains with sparse storage" )
211+ println()
212+
213+ # Create a large sparse matrix
214+ println(" Creating a large sparse matrix (1000×1000 with 0.1% non-zeros):" )
215+ n = 1000
216+ nnz_count = div(n * n, 1000 ) # 0.1% density
217+
218+ # Generate random sparse data
219+ Random. seed!(42 ) # For reproducible results
220+ using Random
221+ rows = rand(1 : n, nnz_count)
222+ cols = rand(1 : n, nnz_count)
223+ vals = rand(Float64, nnz_count)
224+
225+ # Remove duplicates by creating a dictionary
226+ sparse_dict = Dict{Tuple{Int,Int}, Float64}()
227+ for (r, c, v) in zip(rows, cols, vals)
228+ sparse_dict[(r, c)] = v
229+ end
230+
231+ # Convert back to arrays
232+ coords = collect(keys(sparse_dict))
233+ values = collect(values(sparse_dict))
234+ actual_nnz = length(values)
235+
236+ indices_large = [getindex.(coords, 1 ) getindex.(coords, 2 )]' # 2×nnz matrix
237+ large_coo = Arrow. SparseTensorCOO{Float64,2 }(indices_large, values, (n, n))
238+
239+ println(" Large COO tensor: $(large_coo) " )
240+ total_elements = n * n
241+ stored_elements = actual_nnz
242+ memory_saved = total_elements - stored_elements
243+ compression_ratio = total_elements / stored_elements
244+
245+ println(" Storage analysis:" )
246+ println(" Total elements: $(total_elements) " )
247+ println(" Stored elements: $(stored_elements) " )
248+ println(" Memory saved: $(memory_saved) elements" )
249+ println(" Compression ratio: $(round(compression_ratio, digits= 2 )) x" )
250+ println(" Storage efficiency: $(round((1 - stored_elements/ total_elements) * 100 , digits= 2 )) %" )
251+ println()
252+
253+ println(" === Demo Complete ===" )
254+ println(" Sparse tensors provide efficient storage and computation for" )
255+ println(" data where most elements are zero, with significant memory" )
256+ println(" savings and computational advantages for appropriate workloads." )
0 commit comments