update

GiggleLiu · GiggleLiu · commit 0806a5fa609e · 2025-04-29T01:43:28.000+08:00
diff --git a/src/TensorInference.jl b/src/TensorInference.jl
@@ -41,10 +41,10 @@ export MMAPModel
 export update_temperature
 
 # belief propagation
-export belief_propagation
+export BeliefPropgation, belief_propagate
 
 # utils
-export random_matrix_product_state
+export random_matrix_product_state, random_tensor_train_uai, random_matrix_product_uai
 
 include("Core.jl")
 include("RescaledArray.jl")
diff --git a/src/belief.jl b/src/belief.jl
@@ -1,7 +1,26 @@
-struct BPState{T, VT<:AbstractVector{T}}
+struct BeliefPropgation{T}
     t2v::Vector{Vector{Int}}           # a mapping from tensors to variables
     v2t::Vector{Vector{Int}}           # a mapping from variables to tensors
     tensors::Vector{AbstractArray{T}}                 # the tensors
+end
+num_tensors(bp::BeliefPropgation) = length(bp.t2v)
+ProblemReductions.num_variables(bp::BeliefPropgation) = length(bp.v2t)
+
+function BeliefPropgation(nvars::Int, t2v::AbstractVector{Vector{Int}}, tensors::AbstractVector{AbstractArray{T}}) where T
+    # initialize the inverse mapping
+    v2t = [Int[] for _ in 1:nvars]
+    for (i, edge) in enumerate(t2v)
+        for v in edge
+            push!(v2t[v], i)
+        end
+    end
+    return BeliefPropgation(t2v, v2t, tensors)
+end
+function BeliefPropgation(uai::UAIModel{T}) where T
+    return BeliefPropgation(uai.nvars, [collect(Int, f.vars) for f in uai.factors], AbstractArray{T}[f.vals for f in uai.factors])
+end
+
+struct BPState{T, VT<:AbstractVector{T}}
     message_in::Vector{Vector{VT}}  # for each variable, we store the incoming messages
     message_out::Vector{Vector{VT}} # the outgoing messages
 end
@@ -22,20 +41,20 @@ function _process_message!(ov::Vector, iv::Vector)
     end
 end
 
-function collect_message!(bp::BPState)
-    for (it, t) in enumerate(bp.t2v)
-        _collect_message!(vectors_on_tensor(bp.message_out, bp, it), t, vectors_on_tensor(bp.message_in, bp, it))
+function collect_message!(bp::BeliefPropgation, state::BPState)
+    for it in 1:num_tensors(bp)
+        _collect_message!(vectors_on_tensor(state.message_out, bp, it), bp.tensors[it], vectors_on_tensor(state.message_in, bp, it))
     end
 end
 # collect the vectors associated with the target tensor
-function vectors_on_tensor(messages, bp::BPState, it::Int)
+function vectors_on_tensor(messages, bp::BeliefPropgation, it::Int)
     return map(bp.t2v[it]) do v
         # the message goes to the idx-th tensor from variable v
         messages[v][findfirst(==(it), bp.v2t[v])]
     end
 end
 function _collect_message!(vectors_out::Vector, t::AbstractArray, vectors_in::Vector)
-    @assert length(vectors_out) == length(vectors_in) == ndims(t)
+    @assert length(vectors_out) == length(vectors_in) == ndims(t) "dimensions mismatch: $(length(vectors_out)), $(length(vectors_in)), $(ndims(t))"
     # TODO: speed up if needed!
     code = star_code(length(vectors_in))
     cost, gradient = cost_and_gradient(code, [t, vectors_in...])
@@ -44,6 +63,8 @@ function _collect_message!(vectors_out::Vector, t::AbstractArray, vectors_in::Ve
     end
     return cost
 end
+
+# star code: contract a tensor with multiple vectors, one for each dimension
 function star_code(n::Int)
     ix1, ixrest = collect(1:n), [[i] for i in 1:n]
     ne = DynamicNestedEinsum([DynamicNestedEinsum{Int}(1), DynamicNestedEinsum{Int}(2)], DynamicEinCode([ix1, ixrest[1]], collect(2:n)))
@@ -53,41 +74,38 @@ function star_code(n::Int)
     return ne
 end
 
-function BPState(::Type{T}, n::Int, t2v::Vector{Vector{Int}}, size_dict::Dict{Int, Int}) where T
-    v2t = [Int[] for _ in 1:n]
-    edges_vectors = [Vector{VT}[] for _ in 1:n]
-    for (i, edge) in enumerate(t2v)
-        for v in edge
-            push!(v2t[v], i)
-            push!(edges_vectors[i], ones(T, size_dict[v]))
-        end
+function initial_state(bp::BeliefPropgation{T}) where T
+    size_dict = OMEinsum.get_size_dict(bp.t2v, bp.tensors)
+    edges_vectors = Vector{Vector{T}}[]
+    for (i, tids) in enumerate(bp.v2t)
+        push!(edges_vectors, [ones(T, size_dict[i]) for _ in 1:length(tids)])
     end
-    return BPState(t2v, v2t, edges_vectors)
+    return BPState(deepcopy(edges_vectors), edges_vectors)
 end
 
 # belief propagation, update the tensors on the edges of the tensor network
-function belief_propagation(tn::TensorNetworkModel{T}, bpstate::BPState{T}; max_iter::Int=100, tol::Float64=1e-6) where T
-    # collect the messages from the neighbors
-    messages = [similar(bpstate.edges_vectors[it]) for it in 1:length(bpstate.t2v)]
-    for (it, vs) in enumerate(bpstate.t2v)
-        for (iv, v) in enumerate(vs)
-            messages[it][iv] = tn.tensors[v]
-        end
-    end
-    # update the tensors on the edges of the tensor network
-    for (it, vs) in enumerate(bpstate.t2v)
-        # update the tensor
-        for (iv, v) in enumerate(vs)
-            bpstate.edges_vectors[it][iv] = zeros(T, size_dict[v])
-            for (j, w) in enumerate(vs)
-                if j != iv
-                    bpstate.edges_vectors[it][iv] += messages[j][iv] * messages[j][iv]
-                end
-            end
+function belief_propagate(bp::BeliefPropgation; max_iter::Int=100, tol::Float64=1e-6)
+    state = initial_state(bp)
+    info = belief_propagate!(bp, state; max_iter=max_iter, tol=tol)
+    return state, info
+end
+struct BPInfo
+    converged::Bool
+    iterations::Int
+end
+function belief_propagate!(bp::BeliefPropgation, state::BPState{T}; max_iter::Int=100, tol::Float64=1e-6) where T
+    for i in 1:max_iter
+        process_message!(state)
+        collect_message!(bp, state)
+        # check convergence
+        if all(iv -> all(it -> isapprox(state.message_out[iv][it], state.message_in[iv][it], atol=tol), 1:length(bp.v2t[iv])), 1:num_variables(bp))
+            return BPInfo(true, i)
         end
     end
+    return BPInfo(false, max_iter)
 end
 
-function belief_propagation(tn::TensorNetworkModel{T}) where T
-    return belief_propagation(tn, BPState(T, OMEinsum.get_ixsv(tn.code), tn.size_dict))
-end
+# if BP is exact and converged (e.g. tree like), the result should be the same as the tensor network contraction
+function contraction_results(state::BPState{T}) where T
+    return [sum(reduce((x, y) -> x .* y, mi)) for mi in state.message_in]
+end
diff --git a/src/utils.jl b/src/utils.jl
@@ -334,6 +334,12 @@ connected in a chain.
 - `d` is the dimension of the physical indices.
 """
 function random_matrix_product_state(::Type{T}, n::Int, chi::Int, d::Int=2) where T
+    uai = random_matrix_product_uai(T, n, chi, d)
+    return TensorNetworkModel(uai; optimizer=GreedyMethod())
+end
+random_matrix_product_state(n::Int, chi::Int, d::Int=2) = random_matrix_product_state(ComplexF64, n, chi, d)
+
+function random_matrix_product_uai(::Type{T}, n::Int, chi::Int, d::Int=2) where T
     # chi ^ (n-1) * (variance^n)^2 == 1/d^n
     variance = d^(-1/2) * chi^(-1/2+1/2n)
     tensors = Any[randn(T, d, chi) .* variance]
@@ -351,12 +357,41 @@ function random_matrix_product_state(::Type{T}, n::Int, chi::Int, d::Int=2) wher
     push!(ixs_ket, [virtual_indices_ket[n-1], physical_indices[n]])
     push!(ixs_bra, [virtual_indices_bra[n-1], physical_indices[n]])
     tensors, ixs = [tensors..., conj.(tensors)...], [ixs_ket..., ixs_bra...]
-    return TensorNetworkModel(
-        3n-2,
-        optimize_code(DynamicEinCode(ixs, Int[]), OMEinsum.get_size_dict(ixs, tensors), GreedyMethod()),
-        tensors,
-        Dict{Int, Int}(),
-        collect(1:n)
+    size_dict = OMEinsum.get_size_dict(ixs, tensors)
+    nvars = 3n-2
+    return UAIModel(
+        nvars,
+        [size_dict[i] for i=1:nvars],
+        [Factor((ixs[i]...,), tensors[i]) for i in 1:length(tensors)]
     )
 end
-random_matrix_product_state(n::Int, chi::Int, d::Int=2) = random_matrix_product_state(ComplexF64, n, chi, d)
+
+
+"""
+$TYPEDSIGNATURES
+
+Tensor train (TT) is a tensor network model that is widely used in quantum
+many-body physics. This model is different from the matrix product state (MPS)
+in that it does not have an extra copy for representing the bra state.
+"""
+function random_tensor_train_uai(::Type{T}, n::Int, chi::Int, d::Int=2) where T
+    # chi ^ (n-1) * (variance^n)^2 == 1/d^n
+    variance = d^(-1/2) * chi^(-1/2+1/2n)
+    tensors = Any[randn(T, d, chi) .* variance]
+    physical_indices = collect(1:n)
+    virtual_indices = collect(n+1:2n-1)
+    ixs = [[physical_indices[1], virtual_indices[1]]]
+    for i = 2:n-1
+        push!(tensors, randn(T, chi, d, chi) .* variance)
+        push!(ixs, [virtual_indices[i-1], physical_indices[i], virtual_indices[i]])
+    end
+    push!(tensors, randn(T, chi, d) .* variance)
+    push!(ixs, [virtual_indices[n-1], physical_indices[n]])
+    size_dict = OMEinsum.get_size_dict(ixs, tensors)
+    nvars = 2n-1
+    return UAIModel(
+        nvars,
+        [size_dict[i] for i=1:nvars],
+        [Factor((ixs[i]...,), tensors[i]) for i in 1:length(tensors)]
+    )
+end
diff --git a/test/belief.jl b/test/belief.jl
@@ -1,4 +1,5 @@
 using TensorInference, Test
+using OMEinsum
 
 @testset "process message" begin
     mi = [[1, 2, 3], [2, 3, 4], [3, 4, 5]]
@@ -26,11 +27,24 @@ end
     @test vectors_out[3] ≈ vec(kron(v2, v1)' * reshape(t, 4, 2))
 end
 
+@testset "constructor" begin
+    problem = problem_from_artifact("uai2014", "MAR", "Promedus", 14)
+    uai = read_model(problem)
+    bp = BeliefPropgation(uai)
+    @test length(bp.v2t) == 414
+    @test TensorInference.num_tensors(bp) == 414
+    @test TensorInference.num_variables(bp) == length(unique(vcat([collect(Int, f.vars) for f in uai.factors]...)))
+end
+
 @testset "belief propagation" begin
     n = 5
     chi = 3
-    Random.seed!(140)
-    mps = random_matrix_product_state(n, chi)
-    model = TensorNetworkModel(mps)
-    state = belief_propagation(model)
+    mps_uai = TensorInference.random_tensor_train_uai(Float64, n, chi)
+    bp = BeliefPropgation(mps_uai)
+    @test TensorInference.initial_state(bp) isa TensorInference.BPState
+    state, info = belief_propagate(bp)
+    @show TensorInference.contraction_results(state)
+    @test info.converged
+    tnet = TensorNetworkModel(mps_uai)
+    @show probability(tnet)[]
 end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -24,6 +24,14 @@ end
     include("cspmodels.jl")
 end
 
+@testset "utils" begin
+    include("utils.jl")
+end
+
+@testset "belief propagation" begin
+    include("belief.jl")
+end
+
 using CUDA
 if CUDA.functional()
     include("cuda.jl")
diff --git a/test/sampling.jl b/test/sampling.jl
@@ -71,7 +71,6 @@ end
     mps = random_matrix_product_state(n, chi)
     num_samples = 10000
     ixs = OMEinsum.getixsv(mps.code)
-    @show ixs
     samples = map(1:num_samples) do i
         sample(mps, 1; queryvars=collect(1:n)).samples[:,1]
     end
diff --git a/test/utils.jl b/test/utils.jl
@@ -0,0 +1,12 @@
+using TensorInference, Test
+
+@testset "tensor train" begin
+    tt = random_tensor_train_uai(Float64, 5, 3)
+    @test tt.nvars == length(unique(vcat([collect(Int, f.vars) for f in tt.factors]...)))
+end
+
+@testset "mps" begin
+    tt = random_matrix_product_uai(Float64, 5, 3)
+    @test tt.nvars == length(unique(vcat([collect(Int, f.vars) for f in tt.factors]...)))
+end
+