@@ -208,6 +208,104 @@ PDAL and Python:
208208 with tiledb.open(" clamped" ) as a:
209209 print (a.schema)
210210
211+ Reading using Numpy Arrays as buffers (advanced)
212+ ................................................................................
213+
214+ It's also possible to treat the Numpy arrays passed to PDAL as buffers that are iteratively populated through
215+ custom python functions during the execution of the pipeline.
216+
217+ This may be useful in cases where you want the reading of the input data to be handled in a streamable fashion,
218+ like for example:
219+
220+ * When the total Numpy array data wouldn't fit into memory.
221+ * To initiate execution of a streamable PDAL pipeline while the input data is still being read.
222+
223+ To enable this mode, you just need to include the python populate function along with each corresponding Numpy array.
224+
225+ .. code-block :: python
226+
227+ # Numpy array to be used as buffer
228+ in_buffer = np.zeros(max_chunk_size, dtype = [(" X" , float ), (" Y" , float ), (" Z" , float )])
229+
230+ # The function to populate the buffer iteratively
231+ def load_next_chunk () -> int :
232+ """
233+ Function called by PDAL before reading the data from the buffer.
234+
235+ IMPORTANT: must return the total number of items to be read from the buffer.
236+ The Pipeline execution will keep calling this function in a loop until 0 is returned.
237+ """
238+ #
239+ # Replace here with your code that populates the buffer and returns the number of elements to read
240+ #
241+ chunk_size = next_chunk.size
242+ in_buffer[:chunk_size][" X" ] = next_chunk[:][" X" ]
243+ in_buffer[:chunk_size][" Y" ] = next_chunk[:][" Y" ]
244+ in_buffer[:chunk_size][" Z" ] = next_chunk[:][" Z" ]
245+
246+ return chunk_size
247+
248+ # Configure input array and handler during Pipeline initialization...
249+ p = pdal.Pipeline(pipeline_json, arrays = [in_buffer], stream_handlers = [load_next_chunk])
250+
251+ # ...alternatively you can use the setter on an existing Pipeline
252+ # p.inputs = [(in_buffer, load_next_chunk)]
253+
254+ The following snippet provides a simple example of how to use a Numpy array as buffer to support writing through PDAL
255+ with total control over the maximum amount of memory to use.
256+
257+ .. raw :: html
258+
259+ <details >
260+ <summary >Example: Streaming the read and write of a very large LAZ file with low memory footprint</summary >
261+
262+ .. code-block :: python
263+
264+ import numpy as np
265+ import pdal
266+
267+ in_chunk_size = 10_000_000
268+ in_pipeline = pdal.Reader.las(** {
269+ " filename" : " in_test.laz"
270+ }).pipeline()
271+
272+ in_pipeline_it = in_pipeline.iterator(in_chunk_size).__iter__ ()
273+
274+ out_chunk_size = 50_000_000
275+ out_file = " out_test.laz"
276+ out_pipeline = pdal.Writer.las(
277+ filename = out_file
278+ ).pipeline()
279+
280+ out_buffer = np.zeros(in_chunk_size, dtype = [(" X" , float ), (" Y" , float ), (" Z" , float )])
281+
282+ def load_next_chunk ():
283+ try :
284+ next_chunk = next (in_pipeline_it)
285+ except StopIteration :
286+ # Stops the streaming
287+ return 0
288+
289+ chunk_size = next_chunk.size
290+ out_buffer[:chunk_size][" X" ] = next_chunk[:][" X" ]
291+ out_buffer[:chunk_size][" Y" ] = next_chunk[:][" Y" ]
292+ out_buffer[:chunk_size][" Z" ] = next_chunk[:][" Z" ]
293+
294+ print (f " Loaded next chunk -> { chunk_size} " )
295+
296+ return chunk_size
297+
298+ out_pipeline.inputs = [(out_buffer, load_next_chunk)]
299+
300+ out_pipeline.loglevel = 20 # INFO
301+ count = out_pipeline.execute_streaming(out_chunk_size)
302+
303+ print (f " \n WROTE - { count} " )
304+
305+ .. raw :: html
306+
307+ </details >
308+
211309Executing Streamable Pipelines
212310................................................................................
213311Streamable pipelines (pipelines that consist exclusively of streamable PDAL
0 commit comments