Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions C_FHist/C_FHist.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
module C_FHist

using FHist: Hist1D, _fast_bincounts_1d!

Base.@ccallable function hist1d(input::Ptr{Cdouble}, Ninput::Clong, bincounts::Ptr{Cdouble}, Nbincounts::Clong, start::Cdouble, step::Cdouble, stop::Cdouble)::Cvoid
np_input = unsafe_wrap(Array{Float64}, input, Ninput, own=false)
np_bincounts = unsafe_wrap(Array{Float64}, bincounts, Nbincounts, own=false)
binedges = start:step:stop
h = Hist1D(; bincounts=np_bincounts, binedges)
_fast_bincounts_1d!(h, np_input, binedges)
return nothing
end

end
96 changes: 96 additions & 0 deletions C_FHist/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
## Setup Julia nightly:
Follow instruction at https://julialang.org/install/
```
curl -fsSL https://install.julialang.org | sh
juliaup add 1.12
```

## How to test

```bash
>julia +1.12 --project=.. -e "using Pkg; Pkg.update()"
# the the -beta4+0 is the latest version at the time of writing, change it to the latest version
# the x64.linux.gnu need to be changed to your platform
>julia +1.12 --project=.. ~/.julia/juliaup/julia-1.12.0-beta4+0.x64.linux.gnu/share/julia/juliac.jl --output-lib libfhistjl.so --compile-ccallable --experimental --trim C_FHist.jl

>pixi ls -x
Package Version Build Size Kind Source
numpy 2.3.1 py313h41a2e72_0 6.3 MiB conda numpy

> pixi run python test.py
```

## Results on macOS ARM (Mac Mini M4)
```
> julia +1.12 -e "using InteractiveUtils; versioninfo()"
Julia Version 1.12.0-beta4
Commit 600ac61d3d2 (2025-06-05 07:03 UTC)
Build Info:
Official https://julialang.org release
Platform Info:
OS: macOS (arm64-apple-darwin24.0.0)
CPU: 10 × Apple M4
WORD_SIZE: 64
LLVM: libLLVM-18.1.7 (ORCJIT, apple-m1)
GC: Built with stock GC

> pixi run python test.py
=====================================
Input size: 1000
All close: True
Numpy time (ms): 0.021641701459884644
FHist.jl time (ms): 0.003958120942115784
=====================================
Input size: 10000
All close: True
Numpy time (ms): 0.05824156105518341
FHist.jl time (ms): 0.011725164949893951
=====================================
Input size: 100000
All close: True
Numpy time (ms): 0.438716821372509
FHist.jl time (ms): 0.0859750434756279
=====================================
Input size: 1000000
All close: True
Numpy time (ms): 3.94724179059267
FHist.jl time (ms): 0.829133577644825
```

## Results on Linux x86_64 (Ryzen 9 3900X)

```
> julia +1.12 -e "using InteractiveUtils; versioninfo()"
Julia Version 1.12.0-beta4
Commit 600ac61d3d2 (2025-06-05 07:03 UTC)
Build Info:
Official https://julialang.org release
Platform Info:
OS: Linux (x86_64-linux-gnu)
CPU: 24 × AMD Ryzen 9 3900X 12-Core Processor
WORD_SIZE: 64
LLVM: libLLVM-18.1.7 (ORCJIT, znver2)
GC: Built with stock GC

> pixi run python test.py
=====================================
Input size: 1000
All close: True
Numpy time (ms): 0.05090880440548062
FHist.jl time (ms): 0.007097609341144562
=====================================
Input size: 10000
All close: True
Numpy time (ms): 0.19860140746459365
FHist.jl time (ms): 0.020549201872199774
=====================================
Input size: 100000
All close: True
Numpy time (ms): 1.3177349930629134
FHist.jl time (ms): 0.19442759221419692
=====================================
Input size: 1000000
All close: True
Numpy time (ms): 6.48591200588271
FHist.jl time (ms): 1.8390380078926682
```
33 changes: 33 additions & 0 deletions C_FHist/benchmarks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
## On a AMD Zen2 system:

### JULIA_CPU_TARGET='znver2;generic' julia +1.12
```
Input size: 1000000
All close: True
Numpy time (μs): 6.558584200683981
FHist.jl time (μs): 1.7967560008401051
```

### JULIA_CPU_TARGET='generic;znver2' julia +1.12
```
Input size: 1000000
All close: True
Numpy time (μs): 6.538515799911693
FHist.jl time (μs): 2.6081674004672095
```

### JULIA_CPU_TARGET='znver2;generic' julia +nightly
```
Input size: 1000000
All close: True
Numpy time (μs): 6.641940600820817
FHist.jl time (μs): 1.5721895993920043
```

### JULIA_CPU_TARGET='generic;znver2' julia +nightly
```
Input size: 1000000
All close: True
Numpy time (μs): 6.617425798322074
FHist.jl time (μs): 2.40858779870905
```
4 changes: 4 additions & 0 deletions C_FHist/jlsum.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Base.@ccallable function jlsum(p::Ptr{Cdouble}, N::Clong)::Cdouble
np_input = unsafe_wrap(Array{Float64}, p, N, own=false)
return sum(np_input)
end
26 changes: 26 additions & 0 deletions C_FHist/jlsum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import ctypes

lib = ctypes.CDLL('./libjlsum.so')
lib.jlsum.argtypes = [
ctypes.POINTER(ctypes.c_double), ctypes.c_long,
]
lib.jlsum.restype = ctypes.c_double
def jlsum(a):
res = lib.jlsum(
a.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
ctypes.c_long(len(a)),
)
return res


import numpy as np
import timeit
for N_input in [10**4, 10**5, 10**6]:
input_data = np.random.rand(N_input)
print("=====================================")
print(f"Input size: {N_input}")
print("np.isclose:", np.isclose(np.sum(input_data), jlsum(input_data)))
np_timer = timeit.Timer(lambda: np.sum(input_data))
jl_timer = timeit.Timer(lambda: jlsum(input_data))
print(f"Numpy time (ms): {np.min(np_timer.repeat(number=5, repeat=500)) / 5 * 1000}")
print(f"FHist.jl time (ms): {np.min(jl_timer.repeat(number=5, repeat=500)) / 5 * 1000}")
13 changes: 13 additions & 0 deletions C_FHist/pixi.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[project]
authors = ["Jerry Ling <[email protected]>"]
channels = ["conda-forge"]
description = "Add a short description here"
name = "C_FHist"
platforms = ["linux-64", "osx-arm64"]
version = "0.1.0"

[tasks]

[dependencies]
numpy = ">=2.3.1,<3"
ipython = ">=9.4.0,<10"
41 changes: 41 additions & 0 deletions C_FHist/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import ctypes
import numpy as np
import timeit
import time

lib = ctypes.CDLL('./libfhistjl.so')
lib.hist1d.argtypes = [
ctypes.POINTER(ctypes.c_double), ctypes.c_long, # input and length
ctypes.POINTER(ctypes.c_double), ctypes.c_long, # bincoutns and length
ctypes.c_double, ctypes.c_double, ctypes.c_double # binedges
]
lib.hist1d.restype = ctypes.c_voidp

def jlhist(a, bins, range):
bincounts = np.zeros(bins)
step = (range[1] - range[0]) / bins

lib.hist1d(
a.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
ctypes.c_long(len(a)),
bincounts.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
ctypes.c_long(len(bincounts)),
ctypes.c_double(range[0]),
ctypes.c_double(step),
ctypes.c_double(range[1])
)
return bincounts


for N_input in [10**3, 10**4, 10**5, 10**6]:
input_data = np.random.rand(N_input)
bins = 20
print("=====================================")
print(f"Input size: {N_input}")
print("All close:", np.allclose(np.histogram(input_data, bins=bins, range=(0.0, 1.0))[0], jlhist(input_data,
bins=bins, range=(0.0,
1.0))))
np_timer = timeit.Timer(lambda: np.histogram(input_data, bins=bins, range=(0.0, 1.0)))
jl_timer = timeit.Timer(lambda: jlhist(input_data, bins=bins, range=(0.0, 1.0)))
print(f"Numpy time (ms): {np.min(np_timer.repeat(number=5, repeat=200)) / 5 * 1000}")
print(f"FHist.jl time (ms): {np.min(jl_timer.repeat(number=5, repeat=200)) / 5 * 1000}")
6 changes: 3 additions & 3 deletions src/hist1d.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
function _fast_bincounts!(h::Hist1D, TA::Tuple{T}, r::AbstractRange, weights::Nothing) where T
A = TA[1]
_fast_bincounts!(h::Hist1D, TA::Tuple{T}, r::AbstractRange, ::Nothing) where T = _fast_bincounts_1d!(h, TA[1], r)

function _fast_bincounts_1d!(h::Hist1D, A, r::AbstractRange)
overflow = h.overflow
counts = bincounts(h)
s2 = sumw2(h)
firstr = first(r)
invstep = inv(step(r))
L = nbins(h)
Expand Down
Loading