diff --git a/cpp/open3d/core/Device.h b/cpp/open3d/core/Device.h
index d215d16e55e..95763ebc702 100644
--- a/cpp/open3d/core/Device.h
+++ b/cpp/open3d/core/Device.h
@@ -13,6 +13,8 @@
 namespace open3d {
 namespace core {
 
+// TODO: PyTorch device compatibility: Switch to lower case devices (cpu instead of CPU)
+// Make xpu a synonym for sycl:xpu
 /// Device context specifying device type and device id.
 /// For CPU, there is only one device with id 0.
 class Device {
diff --git a/cpp/open3d/core/Tensor.cpp b/cpp/open3d/core/Tensor.cpp
index ef4baf7287d..a07cd13e9dd 100644
--- a/cpp/open3d/core/Tensor.cpp
+++ b/cpp/open3d/core/Tensor.cpp
@@ -461,6 +461,52 @@ Tensor Tensor::Arange(const Scalar start,
     return kernel::Arange(t_start, t_stop, t_step);
 }
 
+Tensor Tensor::Quasirandom(int64_t n,
+                           int64_t dims,
+                           const Dtype dtype,
+                           const Device& device) {
+    // Implements a simple quasirandom (low-discrepancy) sequence generator
+    // using the method from
+    // https://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/
+    // Uses the fractional part of multiples of the generalized golden ratio for
+    // each dimension.
+
+    // Validate input
+    if (n <= 0) {
+        utility::LogError("Quasirandom: n must be positive, got {}", n);
+    }
+    if (dims <= 0) {
+        utility::LogError("Quasirandom: dims must be positive, got {}", dims);
+    }
+    if (!(dtype == core::Float32 || dtype == core::Float64)) {
+        utility::LogError(
+                "Quasirandom: dtype must be Float32 or Float64, got {}",
+                dtype.ToString());
+    }
+
+    // Compute generalized golden ratio for each dimension
+    std::vector<double> alpha(dims + 1);
+    // Use the recurrence: phi_1 = 1, phi_{d+1} = (1 + sqrt(1 + 4*phi_d)) / 2
+    alpha[0] = 1.0;
+    for (int64_t d = 1; d < dims + 1; ++d) {
+        alpha[d] = (1.0 + std::sqrt(1.0 + 4.0 * alpha[d - 1])) / 2.0;
+        alpha[d - 1] = 1. / alpha[d - 1];  // Each dimension basis is 1/phi^d
+    }
+    alpha[dims] = 1. / alpha[dims];
+    Tensor out({n, dims}, dtype, Device("CPU:0"));  // on the CPU for now.
+    // Fill tensor with quasirandom values
+    DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
+        scalar_t* data = static_cast<scalar_t*>(out.GetDataPtr());
+        for (int64_t i = 0; i < n; ++i) {
+            for (int64_t d = 0; d < dims; ++d) {
+                double val = std::fmod(0.5 + alpha[d + 1] * (i + 1), 1.0);
+                data[i * dims + d] = static_cast<scalar_t>(val);
+            }
+        }
+    });
+    return out.To(device);
+}
+
 Tensor Tensor::Reverse() const {
     // TODO: Unoptimized with ai. Can be improved when negative step in Slice is
     // implemented.
@@ -644,7 +690,7 @@ Tensor Tensor::Expand(const SizeVector& dst_shape) const {
     int64_t omitted_ndims = dst_ndims - src_ndims;
 
     // Fill 1 in shape for omitted dimensions in front.
-    // Noe that unexpanded_new_shape is not the expanded shape. The expanded
+    // Note that unexpanded_new_shape is not the expanded shape. The expanded
     // shape is the dst_shape.
     SizeVector unexpanded_new_shape(dst_ndims, 1);
     for (int64_t i = 0; i < src_ndims; ++i) {
@@ -1093,6 +1139,29 @@ double Tensor::Det() const {
     return core::Det(*this);
 }
 
+Tensor Tensor::Cross(const Tensor& other, int64_t axis) const {
+    AssertTensorDevice(other, GetDevice());
+    AssertTensorDtype(other, GetDtype());
+    if (GetShape(axis) != 3 || other.GetShape(axis) != 3) {
+        utility::LogError(
+                "[Tensor::Cross] Dim axis={} of both tensors must have shape "
+                "3. Got {} and {} instead.",
+                axis, GetShape(3), other.GetShape(3));
+    }
+    Tensor t_x = Slice(axis, 0, 1), t_y = Slice(axis, 1, 2),
+           t_z = Slice(axis, 2, 3);
+    Tensor b_x = other.Slice(axis, 0, 1), b_y = other.Slice(axis, 1, 2),
+           b_z = other.Slice(axis, 2, 3);
+    Tensor dst_tensor(shape_util::BroadcastedShape(shape_, other.shape_),
+                      dtype_, GetDevice());
+
+    // TODO: Slow. Needs tiling for speed.
+    dst_tensor.Slice(axis, 0, 1) = t_y * b_z - t_z * b_y;
+    dst_tensor.Slice(axis, 1, 2) = t_z * b_x - t_x * b_z;
+    dst_tensor.Slice(axis, 2, 3) = t_x * b_y - t_y * b_x;
+    return dst_tensor;
+}
+
 Tensor Tensor::Add(const Tensor& value) const {
     AssertTensorDevice(value, GetDevice());
     AssertTensorDtype(value, GetDtype());
@@ -1431,6 +1500,31 @@ Tensor Tensor::Trunc() const {
     return dst_tensor;
 }
 
+Tensor Tensor::Norm(const SizeVector& dims, bool keepdim, float p) const {
+    // Determine output dtype
+    Dtype output_dtype = dtype_;
+    if (dtype_ != core::Float32 && dtype_ != core::Float64) {
+        output_dtype = core::Float32;
+    }
+    if (std::isinf(p)) {
+        // L-infinity
+        return Abs().Max(dims, keepdim).To(output_dtype);
+    } else if (p == 0.0) {
+        // L0 "norm": count of non-zero elements
+        return Ne(0).To(output_dtype).Sum(dims, keepdim);
+    } else if (p == 1.0) {
+        // L1 norm: sum(|x|)
+        return Abs().To(output_dtype).Sum(dims, keepdim);
+    } else if (p == 2.0) {
+        // L2 norm: sqrt(sum(x^2))
+        Tensor out = To(output_dtype);
+        return (out * out).Sum(dims, keepdim).Sqrt();
+    } else {
+        utility::LogError(
+                "Norm order p must be 0, 1, 2 or INFINITY, but got {}.", p);
+    }
+}
+
 Device Tensor::GetDevice() const {
     if (blob_ == nullptr) {
         utility::LogError("Blob is null, cannot get device");
diff --git a/cpp/open3d/core/Tensor.h b/cpp/open3d/core/Tensor.h
index 865aed12a93..2d0ae11ffbf 100644
--- a/cpp/open3d/core/Tensor.h
+++ b/cpp/open3d/core/Tensor.h
@@ -319,6 +319,27 @@ class Tensor : public IsDevice {
                          const Dtype dtype = core::Int64,
                          const Device& device = core::Device("CPU:0"));
 
+    /// \brief Generates a tensor containing points from a quasirandom sequence.
+    ///
+    /// This function creates a tensor of shape {n, dims} where each row is a point
+    /// in a low-discrepancy quasirandom sequence (specifically, the generalized
+    /// golden ratio based R_n sequence). The 2D variant is the plastic
+    /// sequence. Such sequences are commonly used in numerical integration,
+    /// computer graphics and sampling tasks where uniform coverage of the space
+    /// is desired. See 
+    // https://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/
+    // for more details.
+    /// 
+    /// \param n Number of points to generate (default: 16).
+    /// \param dims Number of dimensions for each point (default: 2).
+    /// \param dtype Data type of the output tensor (default: core::Float32).
+    /// \param device Device on which to allocate the tensor (default: CPU:0).
+    /// \return A tensor of shape {n, dims} containing quasirandom points.
+    static Tensor Quasirandom(int64_t n = 16,
+                              int64_t dims = 2,
+                              const Dtype dtype = core::Float32,
+                              const Device& device = Device("CPU:0"));
+
     /// Reverse a Tensor's elements by viewing the tensor as a 1D array.
     Tensor Reverse() const;
 
@@ -390,7 +411,7 @@ class Tensor : public IsDevice {
     /// t = np.empty((4, 5), dtype=np.float32)
     /// t[2, 0:4:2] = np.empty((2, 5), dtype=np.float32)
     /// ```
-    ///
+    ///    Tensor Cross(const Tensor &b) const;
     /// The equivalent Open3D C++ calls:
     /// ```cpp
     /// Tensor t({4, 5}, core::Float32);
@@ -611,6 +632,18 @@ class Tensor : public IsDevice {
     /// \return returns the determinant of the matrix (double).
     double Det() const;
 
+    
+    /// \brief Computes the cross product of the current tensor with another tensor.
+    ///
+    /// The cross product is computed along the specified axis. The tensors must have compatible 
+    // (i.e. broadcastable) shape, and the size of the specified axis must be 3.
+    // If the axis is -1 (default), the last axis is used.
+    ///
+    /// \param other The second input tensor to compute the cross product with.
+    /// \param axis The axis along which to compute the cross product. Default is -1 (the last axis).
+    /// \return A tensor containing the cross product of the two input tensors along the specified axis.
+    Tensor Cross(const Tensor &other, int64_t axis=-1) const;
+
     /// Helper function to return scalar value of a scalar Tensor, the Tensor
     /// must have empty shape.
     template <typename T>
@@ -799,21 +832,36 @@ class Tensor : public IsDevice {
     /// Element-wise trunc value of a tensor, returning a new tensor.
     Tensor Trunc() const;
 
-    /// Element-wise logical not of a tensor, returning a new boolean tensor.
+    /// Computes the norm of the tensor along given dimension(s).
+    ///
+    /// \param dims A list of dimensions to be reduced. The default (empty
+    /// SizeVector) means reduce all dimensions.
+    /// \param keepdim If true, the reduced dims are retained as dimensions
+    /// with size 1.
+    /// \param p The order of the norm. Default is 2.0 (L2 norm). Can be
+    /// positive infinity (L-infinity norm).
+    /// \return Tensor containing the computed norms. Dtype is
+    /// preserved for Float32 and Float64 for L2 norm, It is onverted to Float32
+    /// for all other cases.
+    Tensor Norm(const SizeVector& dims = {}, bool keepdim=false, float p=2.0) const;
+
+    /// Element-wise logical not of a tensor, returning a new boolean
+    /// tensor.
     ///
-    /// If the tensor is not boolean, 0 will be treated as False, while non-zero
-    /// will be treated as True.
+    /// If the tensor is not boolean, 0 will be treated as False, while
+    /// non-zero will be treated as True.
     Tensor LogicalNot() const;
 
     /// Element-wise logical not of a tensor, in-place. This operation won't
     /// change the tensor's dtype.
     ///
-    /// If the tensor is not boolean, 0 will be treated as False, while non-zero
-    /// will be treated as True. The tensor will be filled with 0 or 1 casted to
-    /// the tensor's dtype.
+    /// If the tensor is not boolean, 0 will be treated as False, while
+    /// non-zero will be treated as True. The tensor will be filled with 0
+    /// or 1 casted to the tensor's dtype.
     Tensor LogicalNot_();
 
-    /// Element-wise logical and of a tensor, returning a new boolean tensor.
+    /// Element-wise logical and of a tensor, returning a new boolean
+    /// tensor.
     ///
     /// If the tensor is not boolean, zero will be treated as False, while
     /// non-zero values will be treated as True.
@@ -824,9 +872,9 @@ class Tensor : public IsDevice {
     /// Element-wise logical and of tensors, in-place. This operation won't
     /// change the tensor's dtype.
     ///
-    /// If the tensor is not boolean, 0 will be treated as False, while non-zero
-    /// will be treated as True. The tensor will be filled with 0 or 1 casted to
-    /// the tensor's dtype.
+    /// If the tensor is not boolean, 0 will be treated as False, while
+    /// non-zero will be treated as True. The tensor will be filled with 0
+    /// or 1 casted to the tensor's dtype.
     Tensor LogicalAnd_(const Tensor& value);
     Tensor LogicalAnd_(Scalar value);
 
@@ -841,30 +889,31 @@ class Tensor : public IsDevice {
     /// Element-wise logical or of tensors, in-place. This operation won't
     /// change the tensor's dtype.
     ///
-    /// If the tensor is not boolean, 0 will be treated as False, while non-zero
-    /// will be treated as True. The tensor will be filled with 0 or 1 casted to
-    /// the tensor's dtype.
+    /// If the tensor is not boolean, 0 will be treated as False, while
+    /// non-zero will be treated as True. The tensor will be filled with 0
+    /// or 1 casted to the tensor's dtype.
     Tensor LogicalOr_(const Tensor& value);
     Tensor LogicalOr_(Scalar value);
 
-    /// Element-wise logical exclusive-or of tensors, returning a new boolean
-    /// tensor.
+    /// Element-wise logical exclusive-or of tensors, returning a new
+    /// boolean tensor.
     ///
     /// If the tensor is not boolean, zero will be treated as False, while
     /// non-zero values will be treated as True.
     Tensor LogicalXor(const Tensor& value) const;
     Tensor LogicalXor(Scalar value) const;
 
-    /// Element-wise logical exclusive-or of tensors, in-place. This operation
-    /// won't change the tensor's dtype.
+    /// Element-wise logical exclusive-or of tensors, in-place. This
+    /// operation won't change the tensor's dtype.
     ///
     /// If the tensor is not boolean, zero will be treated as False, while
-    /// non-zero values will be treated as True. The tensor will be filled with
-    /// 0 or 1 casted to the tensor's dtype.
+    /// non-zero values will be treated as True. The tensor will be filled
+    /// with 0 or 1 casted to the tensor's dtype.
     Tensor LogicalXor_(const Tensor& value);
     Tensor LogicalXor_(Scalar value);
 
-    /// Element-wise greater-than of tensors, returning a new boolean tensor.
+    /// Element-wise greater-than of tensors, returning a new boolean
+    /// tensor.
     Tensor Gt(const Tensor& value) const;
     Tensor operator>(const Tensor& value) const { return Gt(value); }
     Tensor Gt(Scalar value) const;
@@ -879,8 +928,8 @@ class Tensor : public IsDevice {
     Tensor operator<(const Tensor& value) const { return Lt(value); }
     Tensor Lt(Scalar value) const;
 
-    /// Element-wise less-than of tensors, in-place. This operation won't change
-    /// the tensor's dtype.
+    /// Element-wise less-than of tensors, in-place. This operation won't
+    /// change the tensor's dtype.
     Tensor Lt_(const Tensor& value);
     Tensor Lt_(Scalar value);
 
@@ -895,14 +944,14 @@ class Tensor : public IsDevice {
     Tensor Ge_(const Tensor& value);
     Tensor Ge_(Scalar value);
 
-    /// Element-wise less-than-or-equals-to of tensors, returning a new boolean
-    /// tensor.
+    /// Element-wise less-than-or-equals-to of tensors, returning a new
+    /// boolean tensor.
     Tensor Le(const Tensor& value) const;
     Tensor operator<=(const Tensor& value) const { return Le(value); }
     Tensor Le(Scalar value) const;
 
-    /// Element-wise less-than-or-equals-to of tensors, in-place. This operation
-    /// won't change the tensor's dtype.
+    /// Element-wise less-than-or-equals-to of tensors, in-place. This
+    /// operation won't change the tensor's dtype.
     Tensor Le_(const Tensor& value);
     Tensor Le_(Scalar value);
 
@@ -916,7 +965,8 @@ class Tensor : public IsDevice {
     Tensor Eq_(const Tensor& value);
     Tensor Eq_(Scalar value);
 
-    /// Element-wise not-equals-to of tensors, returning a new boolean tensor.
+    /// Element-wise not-equals-to of tensors, returning a new boolean
+    /// tensor.
     Tensor Ne(const Tensor& value) const;
     Tensor operator!=(const Tensor& value) const { return Ne(value); }
     Tensor Ne(Scalar value) const;
@@ -926,25 +976,26 @@ class Tensor : public IsDevice {
     Tensor Ne_(const Tensor& value);
     Tensor Ne_(Scalar value);
 
-    /// Find the indices of the elements that are non-zero. Returns a vector of
-    /// int64 Tensors, each containing the indices of the non-zero elements in
-    /// each dimension.
+    /// Find the indices of the elements that are non-zero. Returns a vector
+    /// of int64 Tensors, each containing the indices of the non-zero
+    /// elements in each dimension.
     std::vector<Tensor> NonZeroNumpy() const;
 
     /// Find the indices of the elements that are non-zero. Returns an int64
-    /// tensor of shape {num_dims, num_non_zeros}, where the i-th row contains
-    /// the indices of the non-zero elements in i-th dimension of the original
-    /// tensor.
+    /// tensor of shape {num_dims, num_non_zeros}, where the i-th row
+    /// contains the indices of the non-zero elements in i-th dimension of
+    /// the original tensor.
     Tensor NonZero() const;
 
-    /// Evaluate a single-element Tensor as a boolean value. This can be used to
-    /// implement Tensor.__bool__() in Python, e.g.
+    /// Evaluate a single-element Tensor as a boolean value. This can be
+    /// used to implement Tensor.__bool__() in Python, e.g.
     /// ```python
     /// assert Tensor([True])         # Passes.
     /// assert Tensor([123])          # Passes.
     /// assert Tensor([False])        # AssertionError.
     /// assert Tensor([0])            # AssertionError.
-    /// assert Tensor([True, False])  # ValueError: cannot be evaluated as bool.
+    /// assert Tensor([True, False])  # ValueError: cannot be evaluated as
+    /// bool.
     /// ```
     bool IsNonZero() const;
 
@@ -1000,8 +1051,8 @@ class Tensor : public IsDevice {
     /// - For each element in the returned tensor:
     ///   abs(self - other) <= (atol + rtol * abs(other)).
     ///
-    /// The equation is not symmetrial, i.e. a.AllClose(b) might not be the same
-    /// as b.AllClose(a). Also see Numpy's documentation:
+    /// The equation is not symmetrial, i.e. a.AllClose(b) might not be the
+    /// same as b.AllClose(a). Also see Numpy's documentation:
     /// https://numpy.org/doc/stable/reference/generated/numpy.allclose.html.
     ///
     /// TODO: support nan
@@ -1014,9 +1065,9 @@ class Tensor : public IsDevice {
                    double rtol = 1e-5,
                    double atol = 1e-8) const;
 
-    /// Returns true iff the tensor is the other tensor. This means that, the
-    /// two tensors have the same underlying memory, device, dtype, shape,
-    /// strides and etc.
+    /// Returns true iff the tensor is the other tensor. This means that,
+    /// the two tensors have the same underlying memory, device, dtype,
+    /// shape, strides and etc.
     bool IsSame(const Tensor& other) const;
 
     /// Retrieve all values as an std::vector, for debugging and testing
@@ -1030,15 +1081,16 @@ class Tensor : public IsDevice {
         return values;
     }
 
-    /// Returns True if the underlying memory buffer is contiguous. A contiguous
-    /// Tensor's data_ptr_ does not need to point to the beginning of blob_.
+    /// Returns True if the underlying memory buffer is contiguous. A
+    /// contiguous Tensor's data_ptr_ does not need to point to the
+    /// beginning of blob_.
     inline bool IsContiguous() const {
         return shape_util::DefaultStrides(shape_) == strides_;
     }
 
-    /// Returns a contiguous Tensor containing the same data in the same device.
-    /// If self tensor is already contiguous, the same underlying memory will be
-    /// used.
+    /// Returns a contiguous Tensor containing the same data in the same
+    /// device. If self tensor is already contiguous, the same underlying
+    /// memory will be used.
     Tensor Contiguous() const;
 
     /// Computes matrix multiplication with *this and rhs and returns the
@@ -1066,61 +1118,63 @@ class Tensor : public IsDevice {
     /// using A = P * L * U; where P is the permutation matrix, L is the
     /// lower-triangular matrix with diagonal elements as 1.0 and U is the
     /// upper-triangular matrix, and returns tuple `output` tensor of shape
-    /// {n,n} and `ipiv` tensor of shape {n}, where {n,n} is the shape of input
-    /// tensor. [ipiv, output = open3d.core.lu_ipiv(a)].
-    ///
-    /// \return Tuple {ipiv, output}. Where ipiv is a 1D integer pivort indices
-    /// tensor. It contains the pivot indices, indicating row i of the matrix
-    /// was interchanged with row ipiv(i)); and output it has L as
-    /// lower triangular values and U as upper triangle values including the
-    /// main diagonal (diagonal elements of L to be taken as unity).
+    /// {n,n} and `ipiv` tensor of shape {n}, where {n,n} is the shape of
+    /// input tensor. [ipiv, output = open3d.core.lu_ipiv(a)].
+    ///
+    /// \return Tuple {ipiv, output}. Where ipiv is a 1D integer pivort
+    /// indices tensor. It contains the pivot indices, indicating row i of
+    /// the matrix was interchanged with row ipiv(i)); and output it has L
+    /// as lower triangular values and U as upper triangle values including
+    /// the main diagonal (diagonal elements of L to be taken as unity).
     std::tuple<Tensor, Tensor> LUIpiv() const;
 
     /// \brief Returns the upper triangular matrix of the 2D tensor,
     /// above the given diagonal index. [The value of diagonal = col - row,
     /// therefore 0 is the main diagonal (row = col), and it shifts towards
-    /// right for positive values (for diagonal = 1, col - row = 1), and towards
-    /// left for negative values. The value of the diagonal parameter must be
-    /// between [-m, n] for a {m,n} shaped tensor.
+    /// right for positive values (for diagonal = 1, col - row = 1), and
+    /// towards left for negative values. The value of the diagonal
+    /// parameter must be between [-m, n] for a {m,n} shaped tensor.
     ///
-    /// \param diagonal value of [col - row], above which the elements are to be
-    /// taken for upper triangular matrix.
+    /// \param diagonal value of [col - row], above which the elements are
+    /// to be taken for upper triangular matrix.
     Tensor Triu(const int diagonal = 0) const;
 
     /// \brief Returns the lower triangular matrix of the 2D tensor,
     /// above the given diagonal index. [The value of diagonal = col - row,
     /// therefore 0 is the main diagonal (row = col), and it shifts towards
-    /// right for positive values (for diagonal = 1, col - row = 1), and towards
-    /// left for negative values. The value of the diagonal parameter must be
-    /// between [-m, n] where {m, n} is the shape of input tensor.
+    /// right for positive values (for diagonal = 1, col - row = 1), and
+    /// towards left for negative values. The value of the diagonal
+    /// parameter must be between [-m, n] where {m, n} is the shape of input
+    /// tensor.
     ///
-    /// \param diagonal value of [col - row], below which the elements are to be
-    /// taken for lower triangular matrix.
+    /// \param diagonal value of [col - row], below which the elements are
+    /// to be taken for lower triangular matrix.
     Tensor Tril(const int diagonal = 0) const;
 
     /// \brief Returns the tuple of upper and lower triangular matrix
     /// of the 2D tensor, above and below the given diagonal index.
-    /// The diagonal elements of lower triangular matrix are taken to be unity.
-    /// [The value of diagonal = col - row, therefore 0 is the main diagonal
-    /// (row = col), and it shifts towards right for positive values
-    /// (for diagonal = 1, col - row = 1), and towards left for negative values.
-    /// The value of the diagonal parameter must be between [-m, n] where {m, n}
-    /// is the shape of input tensor.
-    ///
-    /// \param diagonal value of [col - row], above and below which the elements
-    /// are to be taken for upper (diag. included) and lower triangular matrix.
+    /// The diagonal elements of lower triangular matrix are taken to be
+    /// unity. [The value of diagonal = col - row, therefore 0 is the main
+    /// diagonal (row = col), and it shifts towards right for positive
+    /// values (for diagonal = 1, col - row = 1), and towards left for
+    /// negative values. The value of the diagonal parameter must be between
+    /// [-m, n] where {m, n} is the shape of input tensor.
+    ///
+    /// \param diagonal value of [col - row], above and below which the
+    /// elements are to be taken for upper (diag. included) and lower
+    /// triangular matrix.
     std::tuple<Tensor, Tensor> Triul(const int diagonal = 0) const;
 
     /// Computes the matrix inversion of the square matrix *this with LU
     /// factorization and returns the result.
     Tensor Inverse() const;
 
-    /// Computes the matrix SVD decomposition A = U S VT and returns the result.
-    /// Note VT (V transpose) is returned instead of V.
+    /// Computes the matrix SVD decomposition A = U S VT and returns the
+    /// result. Note VT (V transpose) is returned instead of V.
     std::tuple<Tensor, Tensor, Tensor> SVD() const;
 
-    /// Returns the size of the first dimension. If NumDims() == 0, an exception
-    /// will be thrown.
+    /// Returns the size of the first dimension. If NumDims() == 0, an
+    /// exception will be thrown.
     inline int64_t GetLength() const { return GetShape().GetLength(); }
 
     inline SizeVector GetShape() const { return shape_; }
@@ -1208,12 +1262,12 @@ class Tensor : public IsDevice {
         using difference_type = std::ptrdiff_t;
         using value_type = Tensor;
         using pointer = value_type*;
-        using reference = value_type;  // Typically Tensor&, but a tensor slice
-                                       // creates a new Tensor object with
-                                       // shared memory.
+        using reference = value_type;  // Typically Tensor&, but a tensor
+                                       // slice creates a new Tensor object
+                                       // with shared memory.
 
-        // Iterator must be constructible, copy-constructible, copy-assignable,
-        // destructible and swappable.
+        // Iterator must be constructible, copy-constructible,
+        // copy-assignable, destructible and swappable.
         Iterator(pointer tensor, int64_t index);
         Iterator(const Iterator&);
         ~Iterator();
@@ -1235,9 +1289,9 @@ class Tensor : public IsDevice {
         using difference_type = std::ptrdiff_t;
         using value_type = const Tensor;
         using pointer = value_type*;
-        using reference = value_type;  // Typically Tensor&, but a tensor slice
-                                       // creates a new Tensor object with
-                                       // shared memory.
+        using reference = value_type;  // Typically Tensor&, but a tensor
+                                       // slice creates a new Tensor object
+                                       // with shared memory.
 
         // ConstIterator must be constructible, copy-constructible,
         // copy-assignable, destructible and swappable.
@@ -1256,36 +1310,36 @@ class Tensor : public IsDevice {
         std::unique_ptr<Impl> impl_;
     };
 
-    /// Returns the beginning of the tensor iterator. The iterator iterates over
-    /// the first dimension of the tensor. The generated tensor slices share the
-    /// same memory with the original tensor.
+    /// Returns the beginning of the tensor iterator. The iterator iterates
+    /// over the first dimension of the tensor. The generated tensor slices
+    /// share the same memory with the original tensor.
     Iterator begin();
 
-    /// Returns the end of the tensor iterator. The iterator iterates over the
-    /// first dimension of the tensor. The generated tensor slices share the
-    /// same memory with the original tensor.
+    /// Returns the end of the tensor iterator. The iterator iterates over
+    /// the first dimension of the tensor. The generated tensor slices share
+    /// the same memory with the original tensor.
     Iterator end();
 
     /// Returns the beginning of the const tensor iterator. The iterator
-    /// iterates over the first dimension of the tensor. The generated tensor
-    /// slices share the same memory with the original tensor.
+    /// iterates over the first dimension of the tensor. The generated
+    /// tensor slices share the same memory with the original tensor.
     ConstIterator cbegin() const;
 
-    /// Returns the end of the const tensor iterator. The iterator iterates over
-    /// the first dimension of the tensor. The generated tensor slices share the
-    /// same memory with the original tensor.
+    /// Returns the end of the const tensor iterator. The iterator iterates
+    /// over the first dimension of the tensor. The generated tensor slices
+    /// share the same memory with the original tensor.
     ConstIterator cend() const;
 
     /// Returns the beginning of the const tensor iterator. The iterator
-    /// iterates over the first dimension of the tensor. The generated tensor
-    /// slices share the same memory with the original tensor. This is
-    /// equivalent to Tensor::cbegin().
+    /// iterates over the first dimension of the tensor. The generated
+    /// tensor slices share the same memory with the original tensor. This
+    /// is equivalent to Tensor::cbegin().
     ConstIterator begin() const { return cbegin(); }
 
-    /// Returns the end of the const tensor iterator. The iterator iterates over
-    /// the first dimension of the tensor. The generated tensor slices share the
-    /// same memory with the original tensor. This is equivalent to
-    /// Tensor::cend().
+    /// Returns the end of the const tensor iterator. The iterator iterates
+    /// over the first dimension of the tensor. The generated tensor slices
+    /// share the same memory with the original tensor. This is equivalent
+    /// to Tensor::cend().
     ConstIterator end() const { return cend(); }
 
 protected:
@@ -1310,10 +1364,10 @@ class Tensor : public IsDevice {
     /// Stride of a Tensor.
     /// The stride of a n-dimensional tensor is also n-dimensional.
     /// Stride(i) is the number of elements (not bytes) to jump in a
-    /// continuous memory space before reaching the next element in dimension
-    /// i. For example, a 2x3x4 float32 dense tensor has shape(2, 3, 4) and
-    /// stride(12, 4, 1). A slicing operation performed on the tensor can
-    /// change the shape and stride.
+    /// continuous memory space before reaching the next element in
+    /// dimension i. For example, a 2x3x4 float32 dense tensor has shape(2,
+    /// 3, 4) and stride(12, 4, 1). A slicing operation performed on the
+    /// tensor can change the shape and stride.
     SizeVector strides_ = {1};
 
     /// Data pointer pointing to the beginning element of the Tensor.
diff --git a/cpp/open3d/t/geometry/TriangleMesh.cpp b/cpp/open3d/t/geometry/TriangleMesh.cpp
index d176023db7d..15582e9afae 100644
--- a/cpp/open3d/t/geometry/TriangleMesh.cpp
+++ b/cpp/open3d/t/geometry/TriangleMesh.cpp
@@ -25,21 +25,15 @@
 #include <mutex>
 #include <string>
 #include <tuple>
-#include <unordered_map>
-#include <unordered_set>
-#include <vector>
 
 #include "open3d/core/CUDAUtils.h"
 #include "open3d/core/Device.h"
 #include "open3d/core/Dtype.h"
 #include "open3d/core/EigenConverter.h"
-#include "open3d/core/ParallelFor.h"
 #include "open3d/core/ShapeUtil.h"
 #include "open3d/core/Tensor.h"
 #include "open3d/core/TensorCheck.h"
 #include "open3d/core/TensorKey.h"
-#include "open3d/core/linalg/AddMM.h"
-#include "open3d/core/linalg/Matmul.h"
 #include "open3d/core/nns/NearestNeighborSearch.h"
 #include "open3d/t/geometry/LineSet.h"
 #include "open3d/t/geometry/PointCloud.h"
@@ -48,12 +42,10 @@
 #include "open3d/t/geometry/kernel/IPPImage.h"
 #include "open3d/t/geometry/kernel/Metrics.h"
 #include "open3d/t/geometry/kernel/PCAPartition.h"
-#include "open3d/t/geometry/kernel/PointCloud.h"
 #include "open3d/t/geometry/kernel/Transform.h"
 #include "open3d/t/geometry/kernel/TriangleMesh.h"
 #include "open3d/t/geometry/kernel/UVUnwrapping.h"
-#include "open3d/t/io/ImageIO.h"
-#include "open3d/t/io/NumpyIO.h"
+#include "open3d/t/geometry/kernel/mikktspace.h"
 #include "open3d/utility/ParallelScan.h"
 
 namespace open3d {
@@ -137,7 +129,12 @@ std::string TriangleMesh::ToString() const {
         triangles_attr_str[triangles_attr_str.size() - 1] = '.';
     }
 
-    return str + vertices_attr_str + triangles_attr_str;
+    std::string material_str;
+    if (HasMaterial()) {
+        material_str = '\n' + GetMaterial().ToString();
+    }
+
+    return str + vertices_attr_str + triangles_attr_str + material_str;
 }
 
 TriangleMesh &TriangleMesh::Transform(const core::Tensor &transformation) {
@@ -1004,7 +1001,7 @@ TriangleMesh::BakeVertexAttrTextures(
                 });
     }
     if (update_material) {
-        UpdateMaterialTextures(result, this->GetMaterial());
+        UpdateMaterialTextures(result, GetMaterial());
     }
 
     return result;
@@ -1894,6 +1891,385 @@ core::Tensor TriangleMesh::ComputeMetrics(const TriangleMesh &mesh2,
     return ComputeMetricsCommon(distance12, distance21, metrics, params);
 }
 
+Image TriangleMesh::ComputeAmbientOcclusion(int tex_width,
+                                            int n_rays,
+                                            float max_hit_distance,
+                                            bool update_material) {
+    using core::Tensor;
+    if (!HasTriangleAttr("texture_uvs")) {
+        utility::LogError(
+                "TriangleMesh does not contain 'texture_uvs'. Please compute "
+                "it with ComputeUVAtlas() first.");
+    }
+    if (!HasVertexNormals()) {
+        utility::LogWarning(
+                "TriangleMesh does not have vertex normals. Computing them "
+                "now.");
+        ComputeVertexNormals(true);
+    }
+
+    // Bake positions and normals into textures.
+    const float margin =
+            (tex_width + 511.f) / 512.f;  // = ceil(tex_width / 512)
+    auto baked_textures =
+            BakeVertexAttrTextures(tex_width, {"positions", "normals"}, margin,
+                                   /*fill=*/0.0, /*update_material=*/false);
+    Tensor position_map =
+            baked_textures["positions"].To(GetDevice(), core::Float32);
+    Tensor normal_map =
+            baked_textures["normals"].To(GetDevice(), core::Float32);
+
+    RaycastingScene rcs;
+    rcs.AddTriangles(*this);
+
+    const int64_t n_pixels = tex_width * tex_width;
+
+    // Reshape maps and create masks for valid pixels (not in the margin).
+    Tensor positions = position_map.Reshape({n_pixels, 3});
+    Tensor normals = normal_map.Reshape({n_pixels, 3});
+    Tensor valid_mask = positions.Abs().Sum({1}).Ne(0);
+    Tensor valid_indices = valid_mask.NonZero().Flatten();
+    const int64_t n_valid_pixels = valid_indices.GetLength();
+
+    if (n_valid_pixels == 0) {
+        utility::LogWarning("No valid pixels found to compute AO on.");
+        return Image(Tensor::Ones({(int64_t)tex_width, (int64_t)tex_width, 1},
+                                  core::UInt8, GetDevice()) *
+                     255);
+    }
+
+    // Get origins and normals for valid pixels.
+    Tensor valid_positions = positions.IndexGet({valid_indices});
+    Tensor valid_normals = normals.IndexGet({valid_indices});
+
+    Tensor origins = valid_positions.Expand({n_rays, n_valid_pixels, 3});
+
+    // Hemisphere sampling using 2D quasirandom numbers.
+    Tensor qr = Tensor::Quasirandom(n_rays, 2, core::Float32, GetDevice());
+    Tensor r1 = qr.Slice(1, 0, 1);
+    Tensor r = r1.Sqrt();
+    Tensor r2 = qr.Slice(1, 1, 2);
+    Tensor theta = r2 * 2.0 * M_PI;
+    Tensor sampled_dirs =
+            Tensor::Empty({n_rays, 3}, core::Float32, GetDevice());
+    sampled_dirs.Slice(1, 0, 1) = r * theta.Cos();  // X
+    sampled_dirs.Slice(1, 1, 2) = r * theta.Sin();  // Y
+    sampled_dirs.Slice(1, 2, 3) = (1.f - r1).Sqrt();  // Z >= 0
+    sampled_dirs =
+            sampled_dirs.Expand({n_valid_pixels, n_rays, 3}).Transpose(0, 1);
+
+    // Rotate (0, 0, 1) to align with normal
+    // Create basis from normals to transform sampled directions.
+    Tensor t = valid_normals.Expand({n_rays, n_valid_pixels, 3});
+    Tensor up1 = Tensor::Init<float>({0, 0, 1}, GetDevice());
+    Tensor up2 = Tensor::Init<float>({1, 0, 0}, GetDevice());
+    Tensor up = up1.Expand({n_rays, n_valid_pixels, 3});
+    Tensor abs_dot = t.Mul(up).Sum({-1}).Abs();
+    up.IndexSet({abs_dot.Ge(1.0 - 1e-6)}, up2);
+    // Compute b = normalize(cross(t, up))
+    Tensor b = t.Cross(up);
+    b = b / b.Norm({-1}, true).Clip_(1e-6, INFINITY);
+    // Transform directions to world space.
+    Tensor directions = sampled_dirs.Slice(-1, 0, 1) * t.Cross(b) +
+                        sampled_dirs.Slice(-1, 1, 2) * b +
+                        sampled_dirs.Slice(-1, 2, 3) * t;
+
+    // Cast all rays at once.
+    Tensor rays = origins.Append(directions, -1);
+    Tensor occlusion = rcs.TestOcclusions(
+            rays, 1e-5f,  // tnear > 0 to prevent self-occlusions
+            max_hit_distance);
+
+    Tensor ao_texture = Tensor::Ones({n_pixels}, core::Float32, GetDevice());
+    ao_texture.IndexSet({valid_indices}, 1.0f - occlusion.To(core::Float32).Mean({0}));
+    ao_texture = ao_texture.View({(int64_t)tex_width, (int64_t)tex_width, 1});
+
+    // Denoise with bilateral filter.
+    Image ao_image(ao_texture);
+    Image denoised_ao_image = ao_image.FilterBilateral(
+            /*kernel_size*/ 3, /*value_sigma*/ 2./n_rays, /*distance_sigma*/ 3.0);
+
+    // Convert to an 8-bit image and update material.
+    Image final_image(denoised_ao_image.To(core::UInt8, false, 255.f, 0.f));
+    if (update_material) {
+        if (!HasMaterial()) {
+            SetMaterial(visualization::rendering::Material());
+            GetMaterial().SetDefaultProperties();  // defaultUnlit
+        }
+        GetMaterial().SetAOMap(final_image);
+    }
+
+    return final_image;
+}
+
+namespace {
+
+struct MikkTSpaceContextData {
+    const core::Tensor *vertices = nullptr;
+    const core::Tensor *normals = nullptr;
+    const core::Tensor *uvs = nullptr;
+    const core::Tensor *indices = nullptr;
+    core::Tensor *tangents = nullptr;
+    core::Tensor *bitangents = nullptr;
+};
+
+int GetNumFaces(const SMikkTSpaceContext *pContext) {
+    auto *data = static_cast<MikkTSpaceContextData *>(pContext->m_pUserData);
+    return data->indices->GetLength();
+}
+
+int GetNumVerticesOfFace(const SMikkTSpaceContext *pContext, const int iFace) {
+    return 3;
+}
+
+void GetPosition(const SMikkTSpaceContext *pContext,
+                 float fvPosOut[],
+                 const int iFace,
+                 const int iVert) {
+    auto *data = static_cast<MikkTSpaceContextData *>(pContext->m_pUserData);
+    int64_t vertex_index =
+            data->indices->GetDataPtr<int64_t>()[iFace * 3 + iVert];
+    const float *vertex =
+            data->vertices->GetDataPtr<float>() + vertex_index * 3;
+    fvPosOut[0] = vertex[0];
+    fvPosOut[1] = vertex[1];
+    fvPosOut[2] = vertex[2];
+}
+
+void GetNormal(const SMikkTSpaceContext *pContext,
+               float fvNormOut[],
+               const int iFace,
+               const int iVert) {
+    auto *data = static_cast<MikkTSpaceContextData *>(pContext->m_pUserData);
+    int64_t vertex_index =
+            data->indices->GetDataPtr<int64_t>()[iFace * 3 + iVert];
+    const float *normal = data->normals->GetDataPtr<float>() + vertex_index * 3;
+    fvNormOut[0] = normal[0];
+    fvNormOut[1] = normal[1];
+    fvNormOut[2] = normal[2];
+}
+
+void GetTexCoord(const SMikkTSpaceContext *pContext,
+                 float fvTexcOut[],
+                 const int iFace,
+                 const int iVert) {
+    auto *data = static_cast<MikkTSpaceContextData *>(pContext->m_pUserData);
+    const float *uv = data->uvs->GetDataPtr<float>() + (iFace * 3 + iVert) * 2;
+    fvTexcOut[0] = uv[0];
+    fvTexcOut[1] = uv[1];
+}
+
+void SetTSpaceBasic(const SMikkTSpaceContext *pContext,
+                    const float fvTangent[],
+                    const float fSign,
+                    const int iFace,
+                    const int iVert) {
+    auto *data = static_cast<MikkTSpaceContextData *>(pContext->m_pUserData);
+    int64_t vertex_index =
+            data->indices->GetDataPtr<int64_t>()[iFace * 3 + iVert];
+
+    float *tangent_ptr = data->tangents->GetDataPtr<float>() + vertex_index * 4;
+    tangent_ptr[0] = fvTangent[0];
+    tangent_ptr[1] = fvTangent[1];
+    tangent_ptr[2] = fvTangent[2];
+    tangent_ptr[3] = fSign;
+}
+
+}  // namespace
+
+// Implementation from http://www.mikktspace.com/
+void TriangleMesh::ComputeTangentSpace(bool bake /*=true*/,
+                                       int tex_width /*=512*/) {
+    if (!HasVertexPositions() || !HasVertexNormals() ||
+        !HasTriangleAttr("texture_uvs")) {
+        utility::LogError(
+                "TriangleMesh must have vertex positions, vertex normals, and "
+                "texture UVs to compute tangent space.");
+        return;
+    }
+
+    core::Device cpu_device("CPU:0");
+    TriangleMesh cpu_mesh = To(cpu_device);
+
+    core::Tensor vertices =
+            cpu_mesh.GetVertexPositions().To(core::Float32).Contiguous();
+    core::Tensor normals =
+            cpu_mesh.GetVertexNormals().To(core::Float32).Contiguous();
+    core::Tensor uvs = cpu_mesh.GetTriangleAttr("texture_uvs")
+                               .To(core::Float32)
+                               .Contiguous();
+    core::Tensor indices =
+            cpu_mesh.GetTriangleIndices().To(core::Int64).Contiguous();
+
+    int64_t num_vertices = vertices.GetLength();
+    core::Tensor tangents =
+            core::Tensor::Zeros({num_vertices, 4}, core::Float32, cpu_device);
+
+    MikkTSpaceContextData context_data;
+    context_data.vertices = &vertices;
+    context_data.normals = &normals;
+    context_data.uvs = &uvs;
+    context_data.indices = &indices;
+    context_data.tangents = &tangents;
+
+    SMikkTSpaceInterface mikk_interface;
+    mikk_interface.m_getNumFaces = GetNumFaces;
+    mikk_interface.m_getNumVerticesOfFace = GetNumVerticesOfFace;
+    mikk_interface.m_getPosition = GetPosition;
+    mikk_interface.m_getNormal = GetNormal;
+    mikk_interface.m_getTexCoord = GetTexCoord;
+    mikk_interface.m_setTSpaceBasic = SetTSpaceBasic;
+    mikk_interface.m_setTSpace = nullptr;
+
+    SMikkTSpaceContext mikk_context;
+    mikk_context.m_pInterface = &mikk_interface;
+    mikk_context.m_pUserData = &context_data;
+
+    if (!genTangSpaceDefault(&mikk_context)) {
+        utility::LogError("Failed to generate tangent space.");
+        return;
+    }
+
+    SetVertexAttr("tangents", tangents.To(GetDevice()));  // normalized
+    if (bake) {
+        utility::LogDebug("Baking normals and tangents as textures...");
+        if (!HasMaterial()) {
+            SetMaterial(visualization::rendering::Material());
+            GetMaterial().SetDefaultProperties();  // defaultLit
+        }
+        cpu_mesh.SetVertexAttr("tangents", tangents);
+        const float margin = (tex_width + 511.f) / 512.f;
+        auto baked_textures = cpu_mesh.BakeVertexAttrTextures(
+                tex_width, {"normals", "tangents"}, margin,
+                /*fill=*/0.0, /*bake=*/false);
+        // Baked (interpolated) normals and tangents are NOT normalized.
+        GetMaterial().SetTextureMap("normals",
+                                    baked_textures["normals"].To(GetDevice()));
+        GetMaterial().SetTextureMap("tangents",
+                                    baked_textures["tangents"].To(GetDevice()));
+    }
+}
+
+Image TriangleMesh::TransformNormalMap(const Image &normal_map,
+                                       bool to_tangent_space /*=true*/,
+                                       bool update_material /*=false*/) {
+    if (!HasTriangleAttr("texture_uvs")) {
+        utility::LogError(
+                "Mesh must have triangle attribute 'texture_uvs'. "
+                "Use ComputeUVAtlas() to compute it.");
+    }
+    if (!HasVertexNormals() || !HasVertexAttr("tangents")) {
+        utility::LogError(
+                "Mesh must have vertex normals and tangents. "
+                "Use ComputeVertexNormals() and ComputeTangentSpace() to "
+                "compute them.");
+    }
+
+    if (normal_map.GetChannels() != 3) {
+        utility::LogError("Normal map must have 3 channels, but has {}.",
+                          normal_map.GetChannels());
+    }
+
+    int tex_width = normal_map.GetCols();
+    int tex_height = normal_map.GetRows();
+    if (tex_width != tex_height) {
+        utility::LogWarning(
+                "Normal map width {} and height {} are not equal. Using width "
+                "as texture size.",
+                tex_width, tex_height);
+    }
+
+    // Bake TBN vectors into textures.
+    const float margin =
+            (tex_width + 511.f) / 512.f;  // = ceil(tex_width / 512)
+    if (!HasMaterial()) {
+        SetMaterial(visualization::rendering::Material());
+        GetMaterial().SetDefaultProperties();  // defaultLit
+    }
+
+    std::unordered_map<std::string, core::Tensor> baked_textures;
+    if (!(GetMaterial().HasTextureMap("normals") &&
+          GetMaterial().HasTextureMap("tangents"))) {
+        baked_textures = To(core::Device("CPU:0"))
+                                 .BakeVertexAttrTextures(
+                                         tex_width, {"normals", "tangents"},
+                                         margin, /*fill=*/0.0, false);
+    } else {
+        baked_textures["normals"] =
+                GetMaterial().GetTextureMap("normals").AsTensor().To(
+                        core::Device("CPU:0"));
+        baked_textures["tangents"] = GetMaterial()
+                                             .GetTextureMap("tangents")
+                                             .AsTensor()
+                                             .To(core::Device("CPU:0"));
+    }
+
+    // Baked (Interpolated) TBN vectors are NOT normalized.
+    core::Tensor tbn_N = baked_textures.at("normals").To(core::Float32);
+    core::Tensor tbn_T = baked_textures.at("tangents").To(core::Float32);
+    // 4th element of T is the sign for B
+    // bitangent = Sign * cross(normal, tangent)
+    core::Tensor tbn_B =
+            tbn_T.Slice(2, 3, 4) * tbn_N.Cross(tbn_T.Slice(2, 0, 3));
+    tbn_T = tbn_T.Slice(2, 0, 3);
+
+    core::Tensor input_normals_t = normal_map.AsTensor();
+    if (!(input_normals_t.GetDtype() == core::Float32 ||
+          input_normals_t.GetDtype() == core::Float64)) {
+        // Integer: Decode from [0, 255] to [-1, 1]
+        input_normals_t =
+                (input_normals_t.To(core::Float32) / 255.0f) * 2.0f - 1.0f;
+    }
+    input_normals_t = input_normals_t.To(core::Float32);
+
+    if (to_tangent_space) {
+        // World -> Tangent
+        // N_t = transpose(TBN) * N_w
+        core::Tensor tangent_normals_t =
+                core::Tensor::Empty({tex_height, tex_width, 3}, core::Float32);
+
+        tangent_normals_t.Slice(2, 0, 1) =
+                (input_normals_t * tbn_T).Sum({2}, true);
+        tangent_normals_t.Slice(2, 1, 2) =
+                (input_normals_t * tbn_B).Sum({2}, true);
+        tangent_normals_t.Slice(2, 2, 3) =
+                (input_normals_t * tbn_N).Sum({2}, true);
+
+        // Normalize and encode from [-1, 1] to [0, 255]
+        tangent_normals_t =
+                tangent_normals_t /
+                tangent_normals_t.Norm({2}, true).Clip_(1e-6, INFINITY);
+        tangent_normals_t = (tangent_normals_t + 1.0f) * 127.5f;
+
+        auto out = Image(tangent_normals_t.To(core::UInt8));
+        if (update_material) {
+            GetMaterial().SetNormalMap(out);
+        }
+        return out;
+    } else {
+        // Tangent -> World
+        core::Tensor tangent_normals_t = input_normals_t;
+
+        // N_w = TBN * N_t
+        core::Tensor world_normals_t =
+                tbn_T * tangent_normals_t.Slice(2, 0, 1) +
+                tbn_B * tangent_normals_t.Slice(2, 1, 2) +
+                tbn_N * tangent_normals_t.Slice(2, 2, 3);
+
+        // Normalize
+        world_normals_t = world_normals_t /
+                          world_normals_t.Norm({2}, true).Clip_(1e-6, INFINITY);
+        world_normals_t = (world_normals_t + 1.0f) * 127.5f;
+
+        if (update_material) {
+            utility::LogWarning(
+                    "Ignoring update_material, since output normal map is not "
+                    "in tangent space.");
+        }
+        return Image(world_normals_t.To(core::UInt8));
+    }
+}
+
 }  // namespace geometry
 }  // namespace t
 }  // namespace open3d
diff --git a/cpp/open3d/t/geometry/TriangleMesh.h b/cpp/open3d/t/geometry/TriangleMesh.h
index 9a5ade04f3e..cc70deb870b 100644
--- a/cpp/open3d/t/geometry/TriangleMesh.h
+++ b/cpp/open3d/t/geometry/TriangleMesh.h
@@ -26,6 +26,7 @@ namespace geometry {
 
 class LineSet;
 class PointCloud;
+class RaycastingScene;
 
 /// \class TriangleMesh
 /// \brief A triangle mesh contains vertices and triangles.
@@ -1092,6 +1093,61 @@ class TriangleMesh : public Geometry, public DrawableGeometry {
             std::vector<Metric> metrics = {Metric::ChamferDistance},
             MetricParameters params = MetricParameters()) const;
 
+    /// \brief Computes an ambient occlusion texture for the mesh.
+    ///
+    /// This method generates an ambient occlusion map by casting rays from the surface
+    /// of the mesh and measuring occlusion. The mesh must have texture coordinates
+    /// ("texture_uvs" triangle attribute). The resulting texture is a single-channel
+    /// grayscale image with values in [0, 255], where 0 is fully occluded and 255 is
+    /// fully exposed.
+    ///
+    /// This function always uses the CPU device.
+    ///
+    /// \param tex_width The width and height of the output texture in pixels (square).
+    /// \param n_rays The number of rays to cast per texel for occlusion estimation.
+    /// \param max_hit_distance The maximum distance for ray intersection. The
+    ///         default is INFINITY, i.e. very far occlusions also count.
+    /// \param update_material If true, updates the mesh material with the computed
+    ///        ambient occlusion texture.
+    /// \return The computed ambient occlusion texture as an Image (single channel, UInt8).
+    Image ComputeAmbientOcclusion(int tex_width = 256,
+                                    int n_rays = 32,
+                                    float max_hit_distance = INFINITY,
+                                    bool update_material = true);
+
+    /// Computes tangent space for the triangle mesh with MikkTSpace.
+    /// The mesh must have vertex positions, vertex normals, and texture UVs
+    /// (triangle attribute 'texture_uvs'). The computed tangents and bitangents
+    /// will be added as vertex attributes 'tangents' and 'bitangents'.
+    /// This function works on the CPU and will transfer data to the CPU if
+    /// necessary.
+    /// \param bake If true, the tangents, bitangents and normals will also be
+    /// baked to textures and saved to the material.
+    /// \param tex_width Baked texture size. Default 512.
+    void ComputeTangentSpace(bool bake = true, int tex_width = 512);
+
+    /// \brief Converts a normal map between world and tangent space.
+    ///
+    /// The conversion is performed for each pixel of the map. The mesh must
+    /// have vertex normals, tangents, bitangents, and texture UVs.
+    /// The tangent space attributes can be computed with
+    /// `ComputeTangentSpace()`.
+    ///
+    /// \param normal_map The normal map to convert.
+    /// When converting to tangent space, this is the world-space normal map.
+    /// When converting to world space, this is the tangent-space normal map.
+    /// It is expected to have 3 channels with Float32 or Float64 data type,
+    /// with values in range [-1, 1], or UInt8 data type in the range [0, 255].
+    /// \param to_tangent_space If true, converts from world to tangent space.
+    /// If false, converts from tangent to world space.
+    /// \param update_material If true and we are converting to tangent space,
+    /// the mesh material will be updated to contain the new normal map.
+    /// \return The converted normal map as an Image. This will have 3 channels,
+    /// UInt8 data type, with values in range [0, 255].
+    Image TransformNormalMap(const Image &normal_map,
+                             bool to_tangent_space = true,
+                             bool update_material = false);
+
 protected:
     core::Device device_ = core::Device("CPU:0");
     TensorMap vertex_attr_;
diff --git a/cpp/open3d/t/geometry/kernel/CMakeLists.txt b/cpp/open3d/t/geometry/kernel/CMakeLists.txt
index 223c4d4c7a8..6853c824e3c 100644
--- a/cpp/open3d/t/geometry/kernel/CMakeLists.txt
+++ b/cpp/open3d/t/geometry/kernel/CMakeLists.txt
@@ -17,6 +17,7 @@ target_sources(tgeometry_kernel PRIVATE
     UVUnwrapping.cpp
     VoxelBlockGrid.cpp
     VoxelBlockGridCPU.cpp
+    mikktspace.c
 )
 
 if (BUILD_CUDA_MODULE)
diff --git a/cpp/open3d/t/geometry/kernel/mikktspace.c b/cpp/open3d/t/geometry/kernel/mikktspace.c
new file mode 100644
index 00000000000..0342ae0146f
--- /dev/null
+++ b/cpp/open3d/t/geometry/kernel/mikktspace.c
@@ -0,0 +1,1899 @@
+/** \file mikktspace/mikktspace.c
+ *  \ingroup mikktspace
+ */
+/**
+ *  Copyright (C) 2011 by Morten S. Mikkelsen
+ *
+ *  This software is provided 'as-is', without any express or implied
+ *  warranty.  In no event will the authors be held liable for any damages
+ *  arising from the use of this software.
+ *
+ *  Permission is granted to anyone to use this software for any purpose,
+ *  including commercial applications, and to alter it and redistribute it
+ *  freely, subject to the following restrictions:
+ *
+ *  1. The origin of this software must not be misrepresented; you must not
+ *     claim that you wrote the original software. If you use this software
+ *     in a product, an acknowledgment in the product documentation would be
+ *     appreciated but is not required.
+ *  2. Altered source versions must be plainly marked as such, and must not be
+ *     misrepresented as being the original software.
+ *  3. This notice may not be removed or altered from any source distribution.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include <float.h>
+#include <stdlib.h>
+
+#include "mikktspace.h"
+
+#define TFALSE		0
+#define TTRUE		1
+
+#ifndef M_PI
+#define M_PI	3.1415926535897932384626433832795
+#endif
+
+#define INTERNAL_RND_SORT_SEED		39871946
+
+// internal structure
+typedef struct {
+	float x, y, z;
+} SVec3;
+
+static tbool			veq( const SVec3 v1, const SVec3 v2 )
+{
+	return (v1.x == v2.x) && (v1.y == v2.y) && (v1.z == v2.z);
+}
+
+static SVec3		vadd( const SVec3 v1, const SVec3 v2 )
+{
+	SVec3 vRes;
+
+	vRes.x = v1.x + v2.x;
+	vRes.y = v1.y + v2.y;
+	vRes.z = v1.z + v2.z;
+
+	return vRes;
+}
+
+
+static SVec3		vsub( const SVec3 v1, const SVec3 v2 )
+{
+	SVec3 vRes;
+
+	vRes.x = v1.x - v2.x;
+	vRes.y = v1.y - v2.y;
+	vRes.z = v1.z - v2.z;
+
+	return vRes;
+}
+
+static SVec3		vscale(const float fS, const SVec3 v)
+{
+	SVec3 vRes;
+
+	vRes.x = fS * v.x;
+	vRes.y = fS * v.y;
+	vRes.z = fS * v.z;
+
+	return vRes;
+}
+
+static float			LengthSquared( const SVec3 v )
+{
+	return v.x*v.x + v.y*v.y + v.z*v.z;
+}
+
+static float			Length( const SVec3 v )
+{
+	return sqrtf(LengthSquared(v));
+}
+
+static SVec3		Normalize( const SVec3 v )
+{
+	return vscale(1 / Length(v), v);
+}
+
+static float		vdot( const SVec3 v1, const SVec3 v2)
+{
+	return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z;
+}
+
+
+static tbool NotZero(const float fX)
+{
+	// could possibly use FLT_EPSILON instead
+	return fabsf(fX) > FLT_MIN;
+}
+
+static tbool VNotZero(const SVec3 v)
+{
+	// might change this to an epsilon based test
+	return NotZero(v.x) || NotZero(v.y) || NotZero(v.z);
+}
+
+
+
+typedef struct {
+	int iNrFaces;
+	int * pTriMembers;
+} SSubGroup;
+
+typedef struct {
+	int iNrFaces;
+	int * pFaceIndices;
+	int iVertexRepresentitive;
+	tbool bOrientPreservering;
+} SGroup;
+
+// 
+#define MARK_DEGENERATE				1
+#define QUAD_ONE_DEGEN_TRI			2
+#define GROUP_WITH_ANY				4
+#define ORIENT_PRESERVING			8
+
+
+
+typedef struct {
+	int FaceNeighbors[3];
+	SGroup * AssignedGroup[3];
+	
+	// normalized first order face derivatives
+	SVec3 vOs, vOt;
+	float fMagS, fMagT;	// original magnitudes
+
+	// determines if the current and the next triangle are a quad.
+	int iOrgFaceNumber;
+	int iFlag, iTSpacesOffs;
+	unsigned char vert_num[4];
+} STriInfo;
+
+typedef struct {
+	SVec3 vOs;
+	float fMagS;
+	SVec3 vOt;
+	float fMagT;
+	int iCounter;	// this is to average back into quads.
+	tbool bOrient;
+} STSpace;
+
+static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn);
+static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn);
+static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn);
+static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupTrianglesBuffer[], const int piTriListIn[], const int iNrTrianglesIn);
+static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], const SGroup pGroups[],
+                             const int iNrActiveGroups, const int piTriListIn[], const float fThresCos,
+                             const SMikkTSpaceContext * pContext);
+
+static int MakeIndex(const int iFace, const int iVert)
+{
+	assert(iVert>=0 && iVert<4 && iFace>=0);
+	return (iFace<<2) | (iVert&0x3);
+}
+
+static void IndexToData(int * piFace, int * piVert, const int iIndexIn)
+{
+	piVert[0] = iIndexIn&0x3;
+	piFace[0] = iIndexIn>>2;
+}
+
+static STSpace AvgTSpace(const STSpace * pTS0, const STSpace * pTS1)
+{
+	STSpace ts_res;
+
+	// this if is important. Due to floating point precision
+	// averaging when ts0==ts1 will cause a slight difference
+	// which results in tangent space splits later on
+	if (pTS0->fMagS==pTS1->fMagS && pTS0->fMagT==pTS1->fMagT &&
+	   veq(pTS0->vOs,pTS1->vOs)	&& veq(pTS0->vOt, pTS1->vOt))
+	{
+		ts_res.fMagS = pTS0->fMagS;
+		ts_res.fMagT = pTS0->fMagT;
+		ts_res.vOs = pTS0->vOs;
+		ts_res.vOt = pTS0->vOt;
+	}
+	else
+	{
+		ts_res.fMagS = 0.5f*(pTS0->fMagS+pTS1->fMagS);
+		ts_res.fMagT = 0.5f*(pTS0->fMagT+pTS1->fMagT);
+		ts_res.vOs = vadd(pTS0->vOs,pTS1->vOs);
+		ts_res.vOt = vadd(pTS0->vOt,pTS1->vOt);
+		if ( VNotZero(ts_res.vOs) ) ts_res.vOs = Normalize(ts_res.vOs);
+		if ( VNotZero(ts_res.vOt) ) ts_res.vOt = Normalize(ts_res.vOt);
+	}
+
+	return ts_res;
+}
+
+
+
+static SVec3 GetPosition(const SMikkTSpaceContext * pContext, const int index);
+static SVec3 GetNormal(const SMikkTSpaceContext * pContext, const int index);
+static SVec3 GetTexCoord(const SMikkTSpaceContext * pContext, const int index);
+
+
+// degen triangles
+static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int iNrTrianglesIn, const int iTotTris);
+static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn, const int iTotTris);
+
+
+tbool genTangSpaceDefault(const SMikkTSpaceContext * pContext)
+{
+	return genTangSpace(pContext, 180.0f);
+}
+
+tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThreshold)
+{
+	// count nr_triangles
+	int * piTriListIn = NULL, * piGroupTrianglesBuffer = NULL;
+	STriInfo * pTriInfos = NULL;
+	SGroup * pGroups = NULL;
+	STSpace * psTspace = NULL;
+	int iNrTrianglesIn = 0, f=0, t=0, i=0;
+	int iNrTSPaces = 0, iTotTris = 0, iDegenTriangles = 0, iNrMaxGroups = 0;
+	int iNrActiveGroups = 0, index = 0;
+	const int iNrFaces = pContext->m_pInterface->m_getNumFaces(pContext);
+	tbool bRes = TFALSE;
+	const float fThresCos = (float) cos((fAngularThreshold*(float)M_PI)/180.0f);
+
+	// verify all call-backs have been set
+	if ( pContext->m_pInterface->m_getNumFaces==NULL ||
+		pContext->m_pInterface->m_getNumVerticesOfFace==NULL ||
+		pContext->m_pInterface->m_getPosition==NULL ||
+		pContext->m_pInterface->m_getNormal==NULL ||
+		pContext->m_pInterface->m_getTexCoord==NULL )
+		return TFALSE;
+
+	// count triangles on supported faces
+	for (f=0; f<iNrFaces; f++)
+	{
+		const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f);
+		if (verts==3) ++iNrTrianglesIn;
+		else if (verts==4) iNrTrianglesIn += 2;
+	}
+	if (iNrTrianglesIn<=0) return TFALSE;
+
+	// allocate memory for an index list
+	piTriListIn = (int *) malloc(sizeof(int)*3*iNrTrianglesIn);
+	pTriInfos = (STriInfo *) malloc(sizeof(STriInfo)*iNrTrianglesIn);
+	if (piTriListIn==NULL || pTriInfos==NULL)
+	{
+		if (piTriListIn!=NULL) free(piTriListIn);
+		if (pTriInfos!=NULL) free(pTriInfos);
+		return TFALSE;
+	}
+
+	// make an initial triangle --> face index list
+	iNrTSPaces = GenerateInitialVerticesIndexList(pTriInfos, piTriListIn, pContext, iNrTrianglesIn);
+
+	// make a welded index list of identical positions and attributes (pos, norm, texc)
+	//printf("gen welded index list begin\n");
+	GenerateSharedVerticesIndexList(piTriListIn, pContext, iNrTrianglesIn);
+	//printf("gen welded index list end\n");
+
+	// Mark all degenerate triangles
+	iTotTris = iNrTrianglesIn;
+	iDegenTriangles = 0;
+	for (t=0; t<iTotTris; t++)
+	{
+		const int i0 = piTriListIn[t*3+0];
+		const int i1 = piTriListIn[t*3+1];
+		const int i2 = piTriListIn[t*3+2];
+		const SVec3 p0 = GetPosition(pContext, i0);
+		const SVec3 p1 = GetPosition(pContext, i1);
+		const SVec3 p2 = GetPosition(pContext, i2);
+		if (veq(p0,p1) || veq(p0,p2) || veq(p1,p2))	// degenerate
+		{
+			pTriInfos[t].iFlag |= MARK_DEGENERATE;
+			++iDegenTriangles;
+		}
+	}
+	iNrTrianglesIn = iTotTris - iDegenTriangles;
+
+	// mark all triangle pairs that belong to a quad with only one
+	// good triangle. These need special treatment in DegenEpilogue().
+	// Additionally, move all good triangles to the start of
+	// pTriInfos[] and piTriListIn[] without changing order and
+	// put the degenerate triangles last.
+	DegenPrologue(pTriInfos, piTriListIn, iNrTrianglesIn, iTotTris);
+
+	
+	// evaluate triangle level attributes and neighbor list
+	//printf("gen neighbors list begin\n");
+	InitTriInfo(pTriInfos, piTriListIn, pContext, iNrTrianglesIn);
+	//printf("gen neighbors list end\n");
+
+	
+	// based on the 4 rules, identify groups based on connectivity
+	iNrMaxGroups = iNrTrianglesIn*3;
+	pGroups = (SGroup *) malloc(sizeof(SGroup)*iNrMaxGroups);
+	piGroupTrianglesBuffer = (int *) malloc(sizeof(int)*iNrTrianglesIn*3);
+	if (pGroups==NULL || piGroupTrianglesBuffer==NULL)
+	{
+		if (pGroups!=NULL) free(pGroups);
+		if (piGroupTrianglesBuffer!=NULL) free(piGroupTrianglesBuffer);
+		free(piTriListIn);
+		free(pTriInfos);
+		return TFALSE;
+	}
+	//printf("gen 4rule groups begin\n");
+	iNrActiveGroups =
+		Build4RuleGroups(pTriInfos, pGroups, piGroupTrianglesBuffer, piTriListIn, iNrTrianglesIn);
+	//printf("gen 4rule groups end\n");
+
+	//
+
+	psTspace = (STSpace *) malloc(sizeof(STSpace)*iNrTSPaces);
+	if (psTspace==NULL)
+	{
+		free(piTriListIn);
+		free(pTriInfos);
+		free(pGroups);
+		free(piGroupTrianglesBuffer);
+		return TFALSE;
+	}
+	memset(psTspace, 0, sizeof(STSpace)*iNrTSPaces);
+	for (t=0; t<iNrTSPaces; t++)
+	{
+		psTspace[t].vOs.x=1.0f; psTspace[t].vOs.y=0.0f; psTspace[t].vOs.z=0.0f; psTspace[t].fMagS = 1.0f;
+		psTspace[t].vOt.x=0.0f; psTspace[t].vOt.y=1.0f; psTspace[t].vOt.z=0.0f; psTspace[t].fMagT = 1.0f;
+	}
+
+	// make tspaces, each group is split up into subgroups if necessary
+	// based on fAngularThreshold. Finally a tangent space is made for
+	// every resulting subgroup
+	//printf("gen tspaces begin\n");
+	bRes = GenerateTSpaces(psTspace, pTriInfos, pGroups, iNrActiveGroups, piTriListIn, fThresCos, pContext);
+	//printf("gen tspaces end\n");
+	
+	// clean up
+	free(pGroups);
+	free(piGroupTrianglesBuffer);
+
+	if (!bRes)	// if an allocation in GenerateTSpaces() failed
+	{
+		// clean up and return false
+		free(pTriInfos); free(piTriListIn); free(psTspace);
+		return TFALSE;
+	}
+
+
+	// degenerate quads with one good triangle will be fixed by copying a space from
+	// the good triangle to the coinciding vertex.
+	// all other degenerate triangles will just copy a space from any good triangle
+	// with the same welded index in piTriListIn[].
+	DegenEpilogue(psTspace, pTriInfos, piTriListIn, pContext, iNrTrianglesIn, iTotTris);
+
+	free(pTriInfos); free(piTriListIn);
+
+	index = 0;
+	for (f=0; f<iNrFaces; f++)
+	{
+		const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f);
+		if (verts!=3 && verts!=4) continue;
+		
+
+		// I've decided to let degenerate triangles and group-with-anythings
+		// vary between left/right hand coordinate systems at the vertices.
+		// All healthy triangles on the other hand are built to always be either or.
+
+		/*// force the coordinate system orientation to be uniform for every face.
+		// (this is already the case for good triangles but not for
+		// degenerate ones and those with bGroupWithAnything==true)
+		bool bOrient = psTspace[index].bOrient;
+		if (psTspace[index].iCounter == 0)	// tspace was not derived from a group
+		{
+			// look for a space created in GenerateTSpaces() by iCounter>0
+			bool bNotFound = true;
+			int i=1;
+			while (i<verts && bNotFound)
+			{
+				if (psTspace[index+i].iCounter > 0) bNotFound=false;
+				else ++i;
+			}
+			if (!bNotFound) bOrient = psTspace[index+i].bOrient;
+		}*/
+
+		// set data
+		for (i=0; i<verts; i++)
+		{
+			const STSpace * pTSpace = &psTspace[index];
+			float tang[] = {pTSpace->vOs.x, pTSpace->vOs.y, pTSpace->vOs.z};
+			float bitang[] = {pTSpace->vOt.x, pTSpace->vOt.y, pTSpace->vOt.z};
+			if (pContext->m_pInterface->m_setTSpace!=NULL)
+				pContext->m_pInterface->m_setTSpace(pContext, tang, bitang, pTSpace->fMagS, pTSpace->fMagT, pTSpace->bOrient, f, i);
+			if (pContext->m_pInterface->m_setTSpaceBasic!=NULL)
+				pContext->m_pInterface->m_setTSpaceBasic(pContext, tang, pTSpace->bOrient==TTRUE ? 1.0f : (-1.0f), f, i);
+
+			++index;
+		}
+	}
+
+	free(psTspace);
+
+	
+	return TTRUE;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+typedef struct {
+	float vert[3];
+	int index;
+} STmpVert;
+
+static const int g_iCells = 2048;
+
+#ifdef _MSC_VER
+#  define NOINLINE __declspec(noinline)
+#else
+#  define NOINLINE __attribute__ ((noinline))
+#endif
+
+// it is IMPORTANT that this function is called to evaluate the hash since
+// inlining could potentially reorder instructions and generate different
+// results for the same effective input value fVal.
+static NOINLINE int FindGridCell(const float fMin, const float fMax, const float fVal)
+{
+	const float fIndex = g_iCells * ((fVal-fMin)/(fMax-fMin));
+	const int iIndex = (int)fIndex;
+	return iIndex < g_iCells ? (iIndex >= 0 ? iIndex : 0) : (g_iCells - 1);
+}
+
+static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in);
+static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries);
+static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn);
+
+static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
+{
+
+	// Generate bounding box
+	int * piHashTable=NULL, * piHashCount=NULL, * piHashOffsets=NULL, * piHashCount2=NULL;
+	STmpVert * pTmpVert = NULL;
+	int i=0, iChannel=0, k=0, e=0;
+	int iMaxCount=0;
+	SVec3 vMin = GetPosition(pContext, 0), vMax = vMin, vDim;
+	float fMin, fMax;
+	for (i=1; i<(iNrTrianglesIn*3); i++)
+	{
+		const int index = piTriList_in_and_out[i];
+
+		const SVec3 vP = GetPosition(pContext, index);
+		if (vMin.x > vP.x) vMin.x = vP.x;
+		else if (vMax.x < vP.x) vMax.x = vP.x;
+		if (vMin.y > vP.y) vMin.y = vP.y;
+		else if (vMax.y < vP.y) vMax.y = vP.y;
+		if (vMin.z > vP.z) vMin.z = vP.z;
+		else if (vMax.z < vP.z) vMax.z = vP.z;
+	}
+
+	vDim = vsub(vMax,vMin);
+	iChannel = 0;
+	fMin = vMin.x; fMax=vMax.x;
+	if (vDim.y>vDim.x && vDim.y>vDim.z)
+	{
+		iChannel=1;
+		fMin = vMin.y;
+		fMax = vMax.y;
+	}
+	else if (vDim.z>vDim.x)
+	{
+		iChannel=2;
+		fMin = vMin.z;
+		fMax = vMax.z;
+	}
+
+	// make allocations
+	piHashTable = (int *) malloc(sizeof(int)*iNrTrianglesIn*3);
+	piHashCount = (int *) malloc(sizeof(int)*g_iCells);
+	piHashOffsets = (int *) malloc(sizeof(int)*g_iCells);
+	piHashCount2 = (int *) malloc(sizeof(int)*g_iCells);
+
+	if (piHashTable==NULL || piHashCount==NULL || piHashOffsets==NULL || piHashCount2==NULL)
+	{
+		if (piHashTable!=NULL) free(piHashTable);
+		if (piHashCount!=NULL) free(piHashCount);
+		if (piHashOffsets!=NULL) free(piHashOffsets);
+		if (piHashCount2!=NULL) free(piHashCount2);
+		GenerateSharedVerticesIndexListSlow(piTriList_in_and_out, pContext, iNrTrianglesIn);
+		return;
+	}
+	memset(piHashCount, 0, sizeof(int)*g_iCells);
+	memset(piHashCount2, 0, sizeof(int)*g_iCells);
+
+	// count amount of elements in each cell unit
+	for (i=0; i<(iNrTrianglesIn*3); i++)
+	{
+		const int index = piTriList_in_and_out[i];
+		const SVec3 vP = GetPosition(pContext, index);
+		const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z);
+		const int iCell = FindGridCell(fMin, fMax, fVal);
+		++piHashCount[iCell];
+	}
+
+	// evaluate start index of each cell.
+	piHashOffsets[0]=0;
+	for (k=1; k<g_iCells; k++)
+		piHashOffsets[k]=piHashOffsets[k-1]+piHashCount[k-1];
+
+	// insert vertices
+	for (i=0; i<(iNrTrianglesIn*3); i++)
+	{
+		const int index = piTriList_in_and_out[i];
+		const SVec3 vP = GetPosition(pContext, index);
+		const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z);
+		const int iCell = FindGridCell(fMin, fMax, fVal);
+		int * pTable = NULL;
+
+		assert(piHashCount2[iCell]<piHashCount[iCell]);
+		pTable = &piHashTable[piHashOffsets[iCell]];
+		pTable[piHashCount2[iCell]] = i;	// vertex i has been inserted.
+		++piHashCount2[iCell];
+	}
+	for (k=0; k<g_iCells; k++)
+		assert(piHashCount2[k] == piHashCount[k]);	// verify the count
+	free(piHashCount2);
+
+	// find maximum amount of entries in any hash entry
+	iMaxCount = piHashCount[0];
+	for (k=1; k<g_iCells; k++)
+		if (iMaxCount<piHashCount[k])
+			iMaxCount=piHashCount[k];
+	pTmpVert = (STmpVert *) malloc(sizeof(STmpVert)*iMaxCount);
+	
+
+	// complete the merge
+	for (k=0; k<g_iCells; k++)
+	{
+		// extract table of cell k and amount of entries in it
+		int * pTable = &piHashTable[piHashOffsets[k]];
+		const int iEntries = piHashCount[k];
+		if (iEntries < 2) continue;
+
+		if (pTmpVert!=NULL)
+		{
+			for (e=0; e<iEntries; e++)
+			{
+				int i = pTable[e];
+				const SVec3 vP = GetPosition(pContext, piTriList_in_and_out[i]);
+				pTmpVert[e].vert[0] = vP.x; pTmpVert[e].vert[1] = vP.y;
+				pTmpVert[e].vert[2] = vP.z; pTmpVert[e].index = i;
+			}
+			MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, 0, iEntries-1);
+		}
+		else
+			MergeVertsSlow(piTriList_in_and_out, pContext, pTable, iEntries);
+	}
+
+	if (pTmpVert!=NULL) { free(pTmpVert); }
+	free(piHashTable);
+	free(piHashCount);
+	free(piHashOffsets);
+}
+
+static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in)
+{
+	// make bbox
+	int c=0, l=0, channel=0;
+	float fvMin[3], fvMax[3];
+	float dx=0, dy=0, dz=0, fSep=0;
+	for (c=0; c<3; c++)
+	{	fvMin[c]=pTmpVert[iL_in].vert[c]; fvMax[c]=fvMin[c];	}
+	for (l=(iL_in+1); l<=iR_in; l++) {
+		for (c=0; c<3; c++) {
+			if (fvMin[c]>pTmpVert[l].vert[c]) fvMin[c]=pTmpVert[l].vert[c];
+			if (fvMax[c]<pTmpVert[l].vert[c]) fvMax[c]=pTmpVert[l].vert[c];
+		}
+	}
+
+	dx = fvMax[0]-fvMin[0];
+	dy = fvMax[1]-fvMin[1];
+	dz = fvMax[2]-fvMin[2];
+
+	channel = 0;
+	if (dy>dx && dy>dz) channel=1;
+	else if (dz>dx) channel=2;
+
+	fSep = 0.5f*(fvMax[channel]+fvMin[channel]);
+
+	// stop if all vertices are NaNs
+	if (!isfinite(fSep))
+		return;
+
+	// terminate recursion when the separation/average value
+	// is no longer strictly between fMin and fMax values.
+	if (fSep>=fvMax[channel] || fSep<=fvMin[channel])
+	{
+		// complete the weld
+		for (l=iL_in; l<=iR_in; l++)
+		{
+			int i = pTmpVert[l].index;
+			const int index = piTriList_in_and_out[i];
+			const SVec3 vP = GetPosition(pContext, index);
+			const SVec3 vN = GetNormal(pContext, index);
+			const SVec3 vT = GetTexCoord(pContext, index);
+
+			tbool bNotFound = TTRUE;
+			int l2=iL_in, i2rec=-1;
+			while (l2<l && bNotFound)
+			{
+				const int i2 = pTmpVert[l2].index;
+				const int index2 = piTriList_in_and_out[i2];
+				const SVec3 vP2 = GetPosition(pContext, index2);
+				const SVec3 vN2 = GetNormal(pContext, index2);
+				const SVec3 vT2 = GetTexCoord(pContext, index2);
+				i2rec=i2;
+
+				//if (vP==vP2 && vN==vN2 && vT==vT2)
+				if (vP.x==vP2.x && vP.y==vP2.y && vP.z==vP2.z &&
+					vN.x==vN2.x && vN.y==vN2.y && vN.z==vN2.z &&
+					vT.x==vT2.x && vT.y==vT2.y && vT.z==vT2.z)
+					bNotFound = TFALSE;
+				else
+					++l2;
+			}
+			
+			// merge if previously found
+			if (!bNotFound)
+				piTriList_in_and_out[i] = piTriList_in_and_out[i2rec];
+		}
+	}
+	else
+	{
+		int iL=iL_in, iR=iR_in;
+		assert((iR_in-iL_in)>0);	// at least 2 entries
+
+		// separate (by fSep) all points between iL_in and iR_in in pTmpVert[]
+		while (iL < iR)
+		{
+			tbool bReadyLeftSwap = TFALSE, bReadyRightSwap = TFALSE;
+			while ((!bReadyLeftSwap) && iL<iR)
+			{
+				assert(iL>=iL_in && iL<=iR_in);
+				bReadyLeftSwap = !(pTmpVert[iL].vert[channel]<fSep);
+				if (!bReadyLeftSwap) ++iL;
+			}
+			while ((!bReadyRightSwap) && iL<iR)
+			{
+				assert(iR>=iL_in && iR<=iR_in);
+				bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep;
+				if (!bReadyRightSwap) --iR;
+			}
+			assert( (iL<iR) || !(bReadyLeftSwap && bReadyRightSwap) );
+
+			if (bReadyLeftSwap && bReadyRightSwap)
+			{
+				const STmpVert sTmp = pTmpVert[iL];
+				assert(iL<iR);
+				pTmpVert[iL] = pTmpVert[iR];
+				pTmpVert[iR] = sTmp;
+				++iL; --iR;
+			}
+		}
+
+		assert(iL==(iR+1) || (iL==iR));
+		if (iL==iR)
+		{
+			const tbool bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep;
+			if (bReadyRightSwap) ++iL;
+			else --iR;
+		}
+
+		// only need to weld when there is more than 1 instance of the (x,y,z)
+		if (iL_in < iR)
+			MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL_in, iR);	// weld all left of fSep
+		if (iL < iR_in)
+			MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL, iR_in);	// weld all right of (or equal to) fSep
+	}
+}
+
+static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries)
+{
+	// this can be optimized further using a tree structure or more hashing.
+	int e=0;
+	for (e=0; e<iEntries; e++)
+	{
+		int i = pTable[e];
+		const int index = piTriList_in_and_out[i];
+		const SVec3 vP = GetPosition(pContext, index);
+		const SVec3 vN = GetNormal(pContext, index);
+		const SVec3 vT = GetTexCoord(pContext, index);
+
+		tbool bNotFound = TTRUE;
+		int e2=0, i2rec=-1;
+		while (e2<e && bNotFound)
+		{
+			const int i2 = pTable[e2];
+			const int index2 = piTriList_in_and_out[i2];
+			const SVec3 vP2 = GetPosition(pContext, index2);
+			const SVec3 vN2 = GetNormal(pContext, index2);
+			const SVec3 vT2 = GetTexCoord(pContext, index2);
+			i2rec = i2;
+
+			if (veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2))
+				bNotFound = TFALSE;
+			else
+				++e2;
+		}
+		
+		// merge if previously found
+		if (!bNotFound)
+			piTriList_in_and_out[i] = piTriList_in_and_out[i2rec];
+	}
+}
+
+static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
+{
+	int iNumUniqueVerts = 0, t=0, i=0;
+	for (t=0; t<iNrTrianglesIn; t++)
+	{
+		for (i=0; i<3; i++)
+		{
+			const int offs = t*3 + i;
+			const int index = piTriList_in_and_out[offs];
+
+			const SVec3 vP = GetPosition(pContext, index);
+			const SVec3 vN = GetNormal(pContext, index);
+			const SVec3 vT = GetTexCoord(pContext, index);
+
+			tbool bFound = TFALSE;
+			int t2=0, index2rec=-1;
+			while (!bFound && t2<=t)
+			{
+				int j=0;
+				while (!bFound && j<3)
+				{
+					const int index2 = piTriList_in_and_out[t2*3 + j];
+					const SVec3 vP2 = GetPosition(pContext, index2);
+					const SVec3 vN2 = GetNormal(pContext, index2);
+					const SVec3 vT2 = GetTexCoord(pContext, index2);
+					
+					if (veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2))
+						bFound = TTRUE;
+					else
+						++j;
+				}
+				if (!bFound) ++t2;
+			}
+
+			assert(bFound);
+			// if we found our own
+			if (index2rec == index) { ++iNumUniqueVerts; }
+
+			piTriList_in_and_out[offs] = index2rec;
+		}
+	}
+}
+
+static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
+{
+	int iTSpacesOffs = 0, f=0, t=0;
+	int iDstTriIndex = 0;
+	for (f=0; f<pContext->m_pInterface->m_getNumFaces(pContext); f++)
+	{
+		const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f);
+		if (verts!=3 && verts!=4) continue;
+
+		pTriInfos[iDstTriIndex].iOrgFaceNumber = f;
+		pTriInfos[iDstTriIndex].iTSpacesOffs = iTSpacesOffs;
+
+		if (verts==3)
+		{
+			unsigned char * pVerts = pTriInfos[iDstTriIndex].vert_num;
+			pVerts[0]=0; pVerts[1]=1; pVerts[2]=2;
+			piTriList_out[iDstTriIndex*3+0] = MakeIndex(f, 0);
+			piTriList_out[iDstTriIndex*3+1] = MakeIndex(f, 1);
+			piTriList_out[iDstTriIndex*3+2] = MakeIndex(f, 2);
+			++iDstTriIndex;	// next
+		}
+		else
+		{
+			{
+				pTriInfos[iDstTriIndex+1].iOrgFaceNumber = f;
+				pTriInfos[iDstTriIndex+1].iTSpacesOffs = iTSpacesOffs;
+			}
+
+			{
+				// need an order independent way to evaluate
+				// tspace on quads. This is done by splitting
+				// along the shortest diagonal.
+				const int i0 = MakeIndex(f, 0);
+				const int i1 = MakeIndex(f, 1);
+				const int i2 = MakeIndex(f, 2);
+				const int i3 = MakeIndex(f, 3);
+				const SVec3 T0 = GetTexCoord(pContext, i0);
+				const SVec3 T1 = GetTexCoord(pContext, i1);
+				const SVec3 T2 = GetTexCoord(pContext, i2);
+				const SVec3 T3 = GetTexCoord(pContext, i3);
+				const float distSQ_02 = LengthSquared(vsub(T2,T0));
+				const float distSQ_13 = LengthSquared(vsub(T3,T1));
+				tbool bQuadDiagIs_02;
+				if (distSQ_02<distSQ_13)
+					bQuadDiagIs_02 = TTRUE;
+				else if (distSQ_13<distSQ_02)
+					bQuadDiagIs_02 = TFALSE;
+				else
+				{
+					const SVec3 P0 = GetPosition(pContext, i0);
+					const SVec3 P1 = GetPosition(pContext, i1);
+					const SVec3 P2 = GetPosition(pContext, i2);
+					const SVec3 P3 = GetPosition(pContext, i3);
+					const float distSQ_02 = LengthSquared(vsub(P2,P0));
+					const float distSQ_13 = LengthSquared(vsub(P3,P1));
+
+					bQuadDiagIs_02 = distSQ_13<distSQ_02 ? TFALSE : TTRUE;
+				}
+
+				if (bQuadDiagIs_02)
+				{
+					{
+						unsigned char * pVerts_A = pTriInfos[iDstTriIndex].vert_num;
+						pVerts_A[0]=0; pVerts_A[1]=1; pVerts_A[2]=2;
+					}
+					piTriList_out[iDstTriIndex*3+0] = i0;
+					piTriList_out[iDstTriIndex*3+1] = i1;
+					piTriList_out[iDstTriIndex*3+2] = i2;
+					++iDstTriIndex;	// next
+					{
+						unsigned char * pVerts_B = pTriInfos[iDstTriIndex].vert_num;
+						pVerts_B[0]=0; pVerts_B[1]=2; pVerts_B[2]=3;
+					}
+					piTriList_out[iDstTriIndex*3+0] = i0;
+					piTriList_out[iDstTriIndex*3+1] = i2;
+					piTriList_out[iDstTriIndex*3+2] = i3;
+					++iDstTriIndex;	// next
+				}
+				else
+				{
+					{
+						unsigned char * pVerts_A = pTriInfos[iDstTriIndex].vert_num;
+						pVerts_A[0]=0; pVerts_A[1]=1; pVerts_A[2]=3;
+					}
+					piTriList_out[iDstTriIndex*3+0] = i0;
+					piTriList_out[iDstTriIndex*3+1] = i1;
+					piTriList_out[iDstTriIndex*3+2] = i3;
+					++iDstTriIndex;	// next
+					{
+						unsigned char * pVerts_B = pTriInfos[iDstTriIndex].vert_num;
+						pVerts_B[0]=1; pVerts_B[1]=2; pVerts_B[2]=3;
+					}
+					piTriList_out[iDstTriIndex*3+0] = i1;
+					piTriList_out[iDstTriIndex*3+1] = i2;
+					piTriList_out[iDstTriIndex*3+2] = i3;
+					++iDstTriIndex;	// next
+				}
+			}
+		}
+
+		iTSpacesOffs += verts;
+		assert(iDstTriIndex<=iNrTrianglesIn);
+	}
+
+	for (t=0; t<iNrTrianglesIn; t++)
+		pTriInfos[t].iFlag = 0;
+
+	// return total amount of tspaces
+	return iTSpacesOffs;
+}
+
+static SVec3 GetPosition(const SMikkTSpaceContext * pContext, const int index)
+{
+	int iF, iI;
+	SVec3 res; float pos[3];
+	IndexToData(&iF, &iI, index);
+	pContext->m_pInterface->m_getPosition(pContext, pos, iF, iI);
+	res.x=pos[0]; res.y=pos[1]; res.z=pos[2];
+	return res;
+}
+
+static SVec3 GetNormal(const SMikkTSpaceContext * pContext, const int index)
+{
+	int iF, iI;
+	SVec3 res; float norm[3];
+	IndexToData(&iF, &iI, index);
+	pContext->m_pInterface->m_getNormal(pContext, norm, iF, iI);
+	res.x=norm[0]; res.y=norm[1]; res.z=norm[2];
+	return res;
+}
+
+static SVec3 GetTexCoord(const SMikkTSpaceContext * pContext, const int index)
+{
+	int iF, iI;
+	SVec3 res; float texc[2];
+	IndexToData(&iF, &iI, index);
+	pContext->m_pInterface->m_getTexCoord(pContext, texc, iF, iI);
+	res.x=texc[0]; res.y=texc[1]; res.z=1.0f;
+	return res;
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+
+typedef union {
+	struct
+	{
+		int i0, i1, f;
+	};
+	int array[3];
+} SEdge;
+
+static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int piTriListIn[], const int iNrTrianglesIn);
+static void BuildNeighborsSlow(STriInfo pTriInfos[], const int piTriListIn[], const int iNrTrianglesIn);
+
+// returns the texture area times 2
+static float CalcTexArea(const SMikkTSpaceContext * pContext, const int indices[])
+{
+	const SVec3 t1 = GetTexCoord(pContext, indices[0]);
+	const SVec3 t2 = GetTexCoord(pContext, indices[1]);
+	const SVec3 t3 = GetTexCoord(pContext, indices[2]);
+
+	const float t21x = t2.x-t1.x;
+	const float t21y = t2.y-t1.y;
+	const float t31x = t3.x-t1.x;
+	const float t31y = t3.y-t1.y;
+
+	const float fSignedAreaSTx2 = t21x*t31y - t21y*t31x;
+
+	return fSignedAreaSTx2<0 ? (-fSignedAreaSTx2) : fSignedAreaSTx2;
+}
+
+static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
+{
+	int f=0, i=0, t=0;
+	// pTriInfos[f].iFlag is cleared in GenerateInitialVerticesIndexList() which is called before this function.
+
+	// generate neighbor info list
+	for (f=0; f<iNrTrianglesIn; f++)
+		for (i=0; i<3; i++)
+		{
+			pTriInfos[f].FaceNeighbors[i] = -1;
+			pTriInfos[f].AssignedGroup[i] = NULL;
+
+			pTriInfos[f].vOs.x=0.0f; pTriInfos[f].vOs.y=0.0f; pTriInfos[f].vOs.z=0.0f;
+			pTriInfos[f].vOt.x=0.0f; pTriInfos[f].vOt.y=0.0f; pTriInfos[f].vOt.z=0.0f;
+			pTriInfos[f].fMagS = 0;
+			pTriInfos[f].fMagT = 0;
+
+			// assumed bad
+			pTriInfos[f].iFlag |= GROUP_WITH_ANY;
+		}
+
+	// evaluate first order derivatives
+	for (f=0; f<iNrTrianglesIn; f++)
+	{
+		// initial values
+		const SVec3 v1 = GetPosition(pContext, piTriListIn[f*3+0]);
+		const SVec3 v2 = GetPosition(pContext, piTriListIn[f*3+1]);
+		const SVec3 v3 = GetPosition(pContext, piTriListIn[f*3+2]);
+		const SVec3 t1 = GetTexCoord(pContext, piTriListIn[f*3+0]);
+		const SVec3 t2 = GetTexCoord(pContext, piTriListIn[f*3+1]);
+		const SVec3 t3 = GetTexCoord(pContext, piTriListIn[f*3+2]);
+
+		const float t21x = t2.x-t1.x;
+		const float t21y = t2.y-t1.y;
+		const float t31x = t3.x-t1.x;
+		const float t31y = t3.y-t1.y;
+		const SVec3 d1 = vsub(v2,v1);
+		const SVec3 d2 = vsub(v3,v1);
+
+		const float fSignedAreaSTx2 = t21x*t31y - t21y*t31x;
+		//assert(fSignedAreaSTx2!=0);
+		SVec3 vOs = vsub(vscale(t31y,d1), vscale(t21y,d2));	// eq 18
+		SVec3 vOt = vadd(vscale(-t31x,d1), vscale(t21x,d2)); // eq 19
+
+		pTriInfos[f].iFlag |= (fSignedAreaSTx2>0 ? ORIENT_PRESERVING : 0);
+
+		if ( NotZero(fSignedAreaSTx2) )
+		{
+			const float fAbsArea = fabsf(fSignedAreaSTx2);
+			const float fLenOs = Length(vOs);
+			const float fLenOt = Length(vOt);
+			const float fS = (pTriInfos[f].iFlag&ORIENT_PRESERVING)==0 ? (-1.0f) : 1.0f;
+			if ( NotZero(fLenOs) ) pTriInfos[f].vOs = vscale(fS/fLenOs, vOs);
+			if ( NotZero(fLenOt) ) pTriInfos[f].vOt = vscale(fS/fLenOt, vOt);
+
+			// evaluate magnitudes prior to normalization of vOs and vOt
+			pTriInfos[f].fMagS = fLenOs / fAbsArea;
+			pTriInfos[f].fMagT = fLenOt / fAbsArea;
+
+			// if this is a good triangle
+			if ( NotZero(pTriInfos[f].fMagS) && NotZero(pTriInfos[f].fMagT))
+				pTriInfos[f].iFlag &= (~GROUP_WITH_ANY);
+		}
+	}
+
+	// force otherwise healthy quads to a fixed orientation
+	while (t<(iNrTrianglesIn-1))
+	{
+		const int iFO_a = pTriInfos[t].iOrgFaceNumber;
+		const int iFO_b = pTriInfos[t+1].iOrgFaceNumber;
+		if (iFO_a==iFO_b)	// this is a quad
+		{
+			const tbool bIsDeg_a = (pTriInfos[t].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
+			const tbool bIsDeg_b = (pTriInfos[t+1].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
+			
+			// bad triangles should already have been removed by
+			// DegenPrologue(), but just in case check bIsDeg_a and bIsDeg_a are false
+			if ((bIsDeg_a||bIsDeg_b)==TFALSE)
+			{
+				const tbool bOrientA = (pTriInfos[t].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
+				const tbool bOrientB = (pTriInfos[t+1].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
+				// if this happens the quad has extremely bad mapping!!
+				if (bOrientA!=bOrientB)
+				{
+					//printf("found quad with bad mapping\n");
+					tbool bChooseOrientFirstTri = TFALSE;
+					if ((pTriInfos[t+1].iFlag&GROUP_WITH_ANY)!=0) bChooseOrientFirstTri = TTRUE;
+					else if ( CalcTexArea(pContext, &piTriListIn[t*3+0]) >= CalcTexArea(pContext, &piTriListIn[(t+1)*3+0]) )
+						bChooseOrientFirstTri = TTRUE;
+
+					// force match
+					{
+						const int t0 = bChooseOrientFirstTri ? t : (t+1);
+						const int t1 = bChooseOrientFirstTri ? (t+1) : t;
+						pTriInfos[t1].iFlag &= (~ORIENT_PRESERVING);	// clear first
+						pTriInfos[t1].iFlag |= (pTriInfos[t0].iFlag&ORIENT_PRESERVING);	// copy bit
+					}
+				}
+			}
+			t += 2;
+		}
+		else
+			++t;
+	}
+	
+	// match up edge pairs
+	{
+		SEdge * pEdges = (SEdge *) malloc(sizeof(SEdge)*iNrTrianglesIn*3);
+		if (pEdges==NULL)
+			BuildNeighborsSlow(pTriInfos, piTriListIn, iNrTrianglesIn);
+		else
+		{
+			BuildNeighborsFast(pTriInfos, pEdges, piTriListIn, iNrTrianglesIn);
+	
+			free(pEdges);
+		}
+	}
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+
+static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[], const int iMyTriIndex, SGroup * pGroup);
+static void AddTriToGroup(SGroup * pGroup, const int iTriIndex);
+
+static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupTrianglesBuffer[], const int piTriListIn[], const int iNrTrianglesIn)
+{
+	const int iNrMaxGroups = iNrTrianglesIn*3;
+	int iNrActiveGroups = 0;
+	int iOffset = 0, f=0, i=0;
+	(void)iNrMaxGroups;  /* quiet warnings in non debug mode */
+	for (f=0; f<iNrTrianglesIn; f++)
+	{
+		for (i=0; i<3; i++)
+		{
+			// if not assigned to a group
+			if ((pTriInfos[f].iFlag&GROUP_WITH_ANY)==0 && pTriInfos[f].AssignedGroup[i]==NULL)
+			{
+				tbool bOrPre;
+				int neigh_indexL, neigh_indexR;
+				const int vert_index = piTriListIn[f*3+i];
+				assert(iNrActiveGroups<iNrMaxGroups);
+				pTriInfos[f].AssignedGroup[i] = &pGroups[iNrActiveGroups];
+				pTriInfos[f].AssignedGroup[i]->iVertexRepresentitive = vert_index;
+				pTriInfos[f].AssignedGroup[i]->bOrientPreservering = (pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0;
+				pTriInfos[f].AssignedGroup[i]->iNrFaces = 0;
+				pTriInfos[f].AssignedGroup[i]->pFaceIndices = &piGroupTrianglesBuffer[iOffset];
+				++iNrActiveGroups;
+
+				AddTriToGroup(pTriInfos[f].AssignedGroup[i], f);
+				bOrPre = (pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
+				neigh_indexL = pTriInfos[f].FaceNeighbors[i];
+				neigh_indexR = pTriInfos[f].FaceNeighbors[i>0?(i-1):2];
+				if (neigh_indexL>=0) // neighbor
+				{
+					const tbool bAnswer =
+						AssignRecur(piTriListIn, pTriInfos, neigh_indexL,
+									pTriInfos[f].AssignedGroup[i] );
+					
+					const tbool bOrPre2 = (pTriInfos[neigh_indexL].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
+					const tbool bDiff = bOrPre!=bOrPre2 ? TTRUE : TFALSE;
+					assert(bAnswer || bDiff);
+					(void)bAnswer, (void)bDiff;  /* quiet warnings in non debug mode */
+				}
+				if (neigh_indexR>=0) // neighbor
+				{
+					const tbool bAnswer =
+						AssignRecur(piTriListIn, pTriInfos, neigh_indexR,
+									pTriInfos[f].AssignedGroup[i] );
+
+					const tbool bOrPre2 = (pTriInfos[neigh_indexR].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
+					const tbool bDiff = bOrPre!=bOrPre2 ? TTRUE : TFALSE;
+					assert(bAnswer || bDiff);
+					(void)bAnswer, (void)bDiff;  /* quiet warnings in non debug mode */
+				}
+
+				// update offset
+				iOffset += pTriInfos[f].AssignedGroup[i]->iNrFaces;
+				// since the groups are disjoint a triangle can never
+				// belong to more than 3 groups. Subsequently something
+				// is completely screwed if this assertion ever hits.
+				assert(iOffset <= iNrMaxGroups);
+			}
+		}
+	}
+
+	return iNrActiveGroups;
+}
+
+static void AddTriToGroup(SGroup * pGroup, const int iTriIndex)
+{
+	pGroup->pFaceIndices[pGroup->iNrFaces] = iTriIndex;
+	++pGroup->iNrFaces;
+}
+
+static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[],
+				 const int iMyTriIndex, SGroup * pGroup)
+{
+	STriInfo * pMyTriInfo = &psTriInfos[iMyTriIndex];
+
+	// track down vertex
+	const int iVertRep = pGroup->iVertexRepresentitive;
+	const int * pVerts = &piTriListIn[3*iMyTriIndex+0];
+	int i=-1;
+	if (pVerts[0]==iVertRep) i=0;
+	else if (pVerts[1]==iVertRep) i=1;
+	else if (pVerts[2]==iVertRep) i=2;
+	assert(i>=0 && i<3);
+
+	// early out
+	if (pMyTriInfo->AssignedGroup[i] == pGroup) return TTRUE;
+	else if (pMyTriInfo->AssignedGroup[i]!=NULL) return TFALSE;
+	if ((pMyTriInfo->iFlag&GROUP_WITH_ANY)!=0)
+	{
+		// first to group with a group-with-anything triangle
+		// determines it's orientation.
+		// This is the only existing order dependency in the code!!
+		if ( pMyTriInfo->AssignedGroup[0] == NULL &&
+			pMyTriInfo->AssignedGroup[1] == NULL &&
+			pMyTriInfo->AssignedGroup[2] == NULL )
+		{
+			pMyTriInfo->iFlag &= (~ORIENT_PRESERVING);
+			pMyTriInfo->iFlag |= (pGroup->bOrientPreservering ? ORIENT_PRESERVING : 0);
+		}
+	}
+	{
+		const tbool bOrient = (pMyTriInfo->iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
+		if (bOrient != pGroup->bOrientPreservering) return TFALSE;
+	}
+
+	AddTriToGroup(pGroup, iMyTriIndex);
+	pMyTriInfo->AssignedGroup[i] = pGroup;
+
+	{
+		const int neigh_indexL = pMyTriInfo->FaceNeighbors[i];
+		const int neigh_indexR = pMyTriInfo->FaceNeighbors[i>0?(i-1):2];
+		if (neigh_indexL>=0)
+			AssignRecur(piTriListIn, psTriInfos, neigh_indexL, pGroup);
+		if (neigh_indexR>=0)
+			AssignRecur(piTriListIn, psTriInfos, neigh_indexR, pGroup);
+	}
+
+
+
+	return TTRUE;
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+
+static tbool CompareSubGroups(const SSubGroup * pg1, const SSubGroup * pg2);
+static void QuickSort(int* pSortBuffer, int iLeft, int iRight, unsigned int uSeed);
+static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriListIn[], const STriInfo pTriInfos[], const SMikkTSpaceContext * pContext, const int iVertexRepresentitive);
+
+static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], const SGroup pGroups[],
+                             const int iNrActiveGroups, const int piTriListIn[], const float fThresCos,
+                             const SMikkTSpaceContext * pContext)
+{
+	STSpace * pSubGroupTspace = NULL;
+	SSubGroup * pUniSubGroups = NULL;
+	int * pTmpMembers = NULL;
+	int iMaxNrFaces=0, iUniqueTspaces=0, g=0, i=0;
+	for (g=0; g<iNrActiveGroups; g++)
+		if (iMaxNrFaces < pGroups[g].iNrFaces)
+			iMaxNrFaces = pGroups[g].iNrFaces;
+
+	if (iMaxNrFaces == 0) return TTRUE;
+
+	// make initial allocations
+	pSubGroupTspace = (STSpace *) malloc(sizeof(STSpace)*iMaxNrFaces);
+	pUniSubGroups = (SSubGroup *) malloc(sizeof(SSubGroup)*iMaxNrFaces);
+	pTmpMembers = (int *) malloc(sizeof(int)*iMaxNrFaces);
+	if (pSubGroupTspace==NULL || pUniSubGroups==NULL || pTmpMembers==NULL)
+	{
+		if (pSubGroupTspace!=NULL) free(pSubGroupTspace);
+		if (pUniSubGroups!=NULL) free(pUniSubGroups);
+		if (pTmpMembers!=NULL) free(pTmpMembers);
+		return TFALSE;
+	}
+
+
+	iUniqueTspaces = 0;
+	for (g=0; g<iNrActiveGroups; g++)
+	{
+		const SGroup * pGroup = &pGroups[g];
+		int iUniqueSubGroups = 0, s=0;
+
+		for (i=0; i<pGroup->iNrFaces; i++)	// triangles
+		{
+			const int f = pGroup->pFaceIndices[i];	// triangle number
+			int index=-1, iVertIndex=-1, iOF_1=-1, iMembers=0, j=0, l=0;
+			SSubGroup tmp_group;
+			tbool bFound;
+			SVec3 n, vOs, vOt;
+			if (pTriInfos[f].AssignedGroup[0]==pGroup) index=0;
+			else if (pTriInfos[f].AssignedGroup[1]==pGroup) index=1;
+			else if (pTriInfos[f].AssignedGroup[2]==pGroup) index=2;
+			assert(index>=0 && index<3);
+
+			iVertIndex = piTriListIn[f*3+index];
+			assert(iVertIndex==pGroup->iVertexRepresentitive);
+
+			// is normalized already
+			n = GetNormal(pContext, iVertIndex);
+			
+			// project
+			vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n));
+			vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n));
+			if ( VNotZero(vOs) ) vOs = Normalize(vOs);
+			if ( VNotZero(vOt) ) vOt = Normalize(vOt);
+
+			// original face number
+			iOF_1 = pTriInfos[f].iOrgFaceNumber;
+			
+			iMembers = 0;
+			for (j=0; j<pGroup->iNrFaces; j++)
+			{
+				const int t = pGroup->pFaceIndices[j];	// triangle number
+				const int iOF_2 = pTriInfos[t].iOrgFaceNumber;
+
+				// project
+				SVec3 vOs2 = vsub(pTriInfos[t].vOs, vscale(vdot(n,pTriInfos[t].vOs), n));
+				SVec3 vOt2 = vsub(pTriInfos[t].vOt, vscale(vdot(n,pTriInfos[t].vOt), n));
+				if ( VNotZero(vOs2) ) vOs2 = Normalize(vOs2);
+				if ( VNotZero(vOt2) ) vOt2 = Normalize(vOt2);
+
+				{
+					const tbool bAny = ( (pTriInfos[f].iFlag | pTriInfos[t].iFlag) & GROUP_WITH_ANY )!=0 ? TTRUE : TFALSE;
+					// make sure triangles which belong to the same quad are joined.
+					const tbool bSameOrgFace = iOF_1==iOF_2 ? TTRUE : TFALSE;
+
+					const float fCosS = vdot(vOs,vOs2);
+					const float fCosT = vdot(vOt,vOt2);
+
+					assert(f!=t || bSameOrgFace);	// sanity check
+					if (bAny || bSameOrgFace || (fCosS>fThresCos && fCosT>fThresCos))
+						pTmpMembers[iMembers++] = t;
+				}
+			}
+
+			// sort pTmpMembers
+			tmp_group.iNrFaces = iMembers;
+			tmp_group.pTriMembers = pTmpMembers;
+			if (iMembers>1)
+			{
+				unsigned int uSeed = INTERNAL_RND_SORT_SEED;	// could replace with a random seed?
+				QuickSort(pTmpMembers, 0, iMembers-1, uSeed);
+			}
+
+			// look for an existing match
+			bFound = TFALSE;
+			l=0;
+			while (l<iUniqueSubGroups && !bFound)
+			{
+				bFound = CompareSubGroups(&tmp_group, &pUniSubGroups[l]);
+				if (!bFound) ++l;
+			}
+			
+			// assign tangent space index
+			assert(bFound || l==iUniqueSubGroups);
+			//piTempTangIndices[f*3+index] = iUniqueTspaces+l;
+
+			// if no match was found we allocate a new subgroup
+			if (!bFound)
+			{
+				// insert new subgroup
+				int * pIndices = (int *) malloc(sizeof(int)*iMembers);
+				if (pIndices==NULL)
+				{
+					// clean up and return false
+					int s=0;
+					for (s=0; s<iUniqueSubGroups; s++)
+						free(pUniSubGroups[s].pTriMembers);
+					free(pUniSubGroups);
+					free(pTmpMembers);
+					free(pSubGroupTspace);
+					return TFALSE;
+				}
+				pUniSubGroups[iUniqueSubGroups].iNrFaces = iMembers;
+				pUniSubGroups[iUniqueSubGroups].pTriMembers = pIndices;
+				memcpy(pIndices, tmp_group.pTriMembers, iMembers*sizeof(int));
+				pSubGroupTspace[iUniqueSubGroups] =
+					EvalTspace(tmp_group.pTriMembers, iMembers, piTriListIn, pTriInfos, pContext, pGroup->iVertexRepresentitive);
+				++iUniqueSubGroups;
+			}
+
+			// output tspace
+			{
+				const int iOffs = pTriInfos[f].iTSpacesOffs;
+				const int iVert = pTriInfos[f].vert_num[index];
+				STSpace * pTS_out = &psTspace[iOffs+iVert];
+				assert(pTS_out->iCounter<2);
+				assert(((pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0) == pGroup->bOrientPreservering);
+				if (pTS_out->iCounter==1)
+				{
+					*pTS_out = AvgTSpace(pTS_out, &pSubGroupTspace[l]);
+					pTS_out->iCounter = 2;	// update counter
+					pTS_out->bOrient = pGroup->bOrientPreservering;
+				}
+				else
+				{
+					assert(pTS_out->iCounter==0);
+					*pTS_out = pSubGroupTspace[l];
+					pTS_out->iCounter = 1;	// update counter
+					pTS_out->bOrient = pGroup->bOrientPreservering;
+				}
+			}
+		}
+
+		// clean up and offset iUniqueTspaces
+		for (s=0; s<iUniqueSubGroups; s++)
+			free(pUniSubGroups[s].pTriMembers);
+		iUniqueTspaces += iUniqueSubGroups;
+	}
+
+	// clean up
+	free(pUniSubGroups);
+	free(pTmpMembers);
+	free(pSubGroupTspace);
+
+	return TTRUE;
+}
+
+static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriListIn[], const STriInfo pTriInfos[],
+                          const SMikkTSpaceContext * pContext, const int iVertexRepresentitive)
+{
+	STSpace res;
+	float fAngleSum = 0;
+	int face=0;
+	res.vOs.x=0.0f; res.vOs.y=0.0f; res.vOs.z=0.0f;
+	res.vOt.x=0.0f; res.vOt.y=0.0f; res.vOt.z=0.0f;
+	res.fMagS = 0; res.fMagT = 0;
+
+	for (face=0; face<iFaces; face++)
+	{
+		const int f = face_indices[face];
+
+		// only valid triangles get to add their contribution
+		if ( (pTriInfos[f].iFlag&GROUP_WITH_ANY)==0 )
+		{
+			SVec3 n, vOs, vOt, p0, p1, p2, v1, v2;
+			float fCos, fAngle, fMagS, fMagT;
+			int i=-1, index=-1, i0=-1, i1=-1, i2=-1;
+			if (piTriListIn[3*f+0]==iVertexRepresentitive) i=0;
+			else if (piTriListIn[3*f+1]==iVertexRepresentitive) i=1;
+			else if (piTriListIn[3*f+2]==iVertexRepresentitive) i=2;
+			assert(i>=0 && i<3);
+
+			// project
+			index = piTriListIn[3*f+i];
+			n = GetNormal(pContext, index);
+			vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n));
+			vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n));
+			if ( VNotZero(vOs) ) vOs = Normalize(vOs);
+			if ( VNotZero(vOt) ) vOt = Normalize(vOt);
+
+			i2 = piTriListIn[3*f + (i<2?(i+1):0)];
+			i1 = piTriListIn[3*f + i];
+			i0 = piTriListIn[3*f + (i>0?(i-1):2)];
+
+			p0 = GetPosition(pContext, i0);
+			p1 = GetPosition(pContext, i1);
+			p2 = GetPosition(pContext, i2);
+			v1 = vsub(p0,p1);
+			v2 = vsub(p2,p1);
+
+			// project
+			v1 = vsub(v1, vscale(vdot(n,v1),n)); if ( VNotZero(v1) ) v1 = Normalize(v1);
+			v2 = vsub(v2, vscale(vdot(n,v2),n)); if ( VNotZero(v2) ) v2 = Normalize(v2);
+
+			// weight contribution by the angle
+			// between the two edge vectors
+			fCos = vdot(v1,v2); fCos=fCos>1?1:(fCos<(-1) ? (-1) : fCos);
+			fAngle = (float) acos(fCos);
+			fMagS = pTriInfos[f].fMagS;
+			fMagT = pTriInfos[f].fMagT;
+
+			res.vOs=vadd(res.vOs, vscale(fAngle,vOs));
+			res.vOt=vadd(res.vOt,vscale(fAngle,vOt));
+			res.fMagS+=(fAngle*fMagS);
+			res.fMagT+=(fAngle*fMagT);
+			fAngleSum += fAngle;
+		}
+	}
+
+	// normalize
+	if ( VNotZero(res.vOs) ) res.vOs = Normalize(res.vOs);
+	if ( VNotZero(res.vOt) ) res.vOt = Normalize(res.vOt);
+	if (fAngleSum>0)
+	{
+		res.fMagS /= fAngleSum;
+		res.fMagT /= fAngleSum;
+	}
+
+	return res;
+}
+
+static tbool CompareSubGroups(const SSubGroup * pg1, const SSubGroup * pg2)
+{
+	tbool bStillSame=TTRUE;
+	int i=0;
+	if (pg1->iNrFaces!=pg2->iNrFaces) return TFALSE;
+	while (i<pg1->iNrFaces && bStillSame)
+	{
+		bStillSame = pg1->pTriMembers[i]==pg2->pTriMembers[i] ? TTRUE : TFALSE;
+		if (bStillSame) ++i;
+	}
+	return bStillSame;
+}
+
+static void QuickSort(int* pSortBuffer, int iLeft, int iRight, unsigned int uSeed)
+{
+	int iL, iR, n, index, iMid, iTmp;
+
+	// Random
+	unsigned int t=uSeed&31;
+	t=(uSeed<<t)|(uSeed>>(32-t));
+	uSeed=uSeed+t+3;
+	// Random end
+
+	iL=iLeft; iR=iRight;
+	n = (iR-iL)+1;
+	assert(n>=0);
+	index = (int) (uSeed%n);
+
+	iMid=pSortBuffer[index + iL];
+
+
+	do
+	{
+		while (pSortBuffer[iL] < iMid)
+			++iL;
+		while (pSortBuffer[iR] > iMid)
+			--iR;
+
+		if (iL <= iR)
+		{
+			iTmp = pSortBuffer[iL];
+			pSortBuffer[iL] = pSortBuffer[iR];
+			pSortBuffer[iR] = iTmp;
+			++iL; --iR;
+		}
+	}
+	while (iL <= iR);
+
+	if (iLeft < iR)
+		QuickSort(pSortBuffer, iLeft, iR, uSeed);
+	if (iL < iRight)
+		QuickSort(pSortBuffer, iL, iRight, uSeed);
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////////
+
+static void QuickSortEdges(SEdge * pSortBuffer, int iLeft, int iRight, const int channel, unsigned int uSeed);
+static void GetEdge(int * i0_out, int * i1_out, int * edgenum_out, const int indices[], const int i0_in, const int i1_in);
+
+static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int piTriListIn[], const int iNrTrianglesIn)
+{
+	// build array of edges
+	unsigned int uSeed = INTERNAL_RND_SORT_SEED;				// could replace with a random seed?
+	int iEntries=0, iCurStartIndex=-1, f=0, i=0;
+	for (f=0; f<iNrTrianglesIn; f++)
+		for (i=0; i<3; i++)
+		{
+			const int i0 = piTriListIn[f*3+i];
+			const int i1 = piTriListIn[f*3+(i<2?(i+1):0)];
+			pEdges[f*3+i].i0 = i0 < i1 ? i0 : i1;			// put minimum index in i0
+			pEdges[f*3+i].i1 = !(i0 < i1) ? i0 : i1;		// put maximum index in i1
+			pEdges[f*3+i].f = f;							// record face number
+		}
+
+	// sort over all edges by i0, this is the pricy one.
+	QuickSortEdges(pEdges, 0, iNrTrianglesIn*3-1, 0, uSeed);	// sort channel 0 which is i0
+
+	// sub sort over i1, should be fast.
+	// could replace this with a 64 bit int sort over (i0,i1)
+	// with i0 as msb in the quicksort call above.
+	iEntries = iNrTrianglesIn*3;
+	iCurStartIndex = 0;
+	for (i=1; i<iEntries; i++)
+	{
+		if (pEdges[iCurStartIndex].i0 != pEdges[i].i0)
+		{
+			const int iL = iCurStartIndex;
+			const int iR = i-1;
+			//const int iElems = i-iL;
+			iCurStartIndex = i;
+			QuickSortEdges(pEdges, iL, iR, 1, uSeed);	// sort channel 1 which is i1
+		}
+	}
+
+	// sub sort over f, which should be fast.
+	// this step is to remain compliant with BuildNeighborsSlow() when
+	// more than 2 triangles use the same edge (such as a butterfly topology).
+	iCurStartIndex = 0;
+	for (i=1; i<iEntries; i++)
+	{
+		if (pEdges[iCurStartIndex].i0 != pEdges[i].i0 || pEdges[iCurStartIndex].i1 != pEdges[i].i1)
+		{
+			const int iL = iCurStartIndex;
+			const int iR = i-1;
+			//const int iElems = i-iL;
+			iCurStartIndex = i;
+			QuickSortEdges(pEdges, iL, iR, 2, uSeed);	// sort channel 2 which is f
+		}
+	}
+
+	// pair up, adjacent triangles
+	for (i=0; i<iEntries; i++)
+	{
+		const int i0=pEdges[i].i0;
+		const int i1=pEdges[i].i1;
+		const int f = pEdges[i].f;
+		tbool bUnassigned_A;
+
+		int i0_A, i1_A;
+		int edgenum_A, edgenum_B=0;	// 0,1 or 2
+		GetEdge(&i0_A, &i1_A, &edgenum_A, &piTriListIn[f*3], i0, i1);	// resolve index ordering and edge_num
+		bUnassigned_A = pTriInfos[f].FaceNeighbors[edgenum_A] == -1 ? TTRUE : TFALSE;
+
+		if (bUnassigned_A)
+		{
+			// get true index ordering
+			int j=i+1, t;
+			tbool bNotFound = TTRUE;
+			while (j<iEntries && i0==pEdges[j].i0 && i1==pEdges[j].i1 && bNotFound)
+			{
+				tbool bUnassigned_B;
+				int i0_B, i1_B;
+				t = pEdges[j].f;
+				// flip i0_B and i1_B
+				GetEdge(&i1_B, &i0_B, &edgenum_B, &piTriListIn[t*3], pEdges[j].i0, pEdges[j].i1);	// resolve index ordering and edge_num
+				//assert(!(i0_A==i1_B && i1_A==i0_B));
+				bUnassigned_B =  pTriInfos[t].FaceNeighbors[edgenum_B]==-1 ? TTRUE : TFALSE;
+				if (i0_A==i0_B && i1_A==i1_B && bUnassigned_B)
+					bNotFound = TFALSE;
+				else
+					++j;
+			}
+
+			if (!bNotFound)
+			{
+				int t = pEdges[j].f;
+				pTriInfos[f].FaceNeighbors[edgenum_A] = t;
+				//assert(pTriInfos[t].FaceNeighbors[edgenum_B]==-1);
+				pTriInfos[t].FaceNeighbors[edgenum_B] = f;
+			}
+		}
+	}
+}
+
+static void BuildNeighborsSlow(STriInfo pTriInfos[], const int piTriListIn[], const int iNrTrianglesIn)
+{
+	int f=0, i=0;
+	for (f=0; f<iNrTrianglesIn; f++)
+	{
+		for (i=0; i<3; i++)
+		{
+			// if unassigned
+			if (pTriInfos[f].FaceNeighbors[i] == -1)
+			{
+				const int i0_A = piTriListIn[f*3+i];
+				const int i1_A = piTriListIn[f*3+(i<2?(i+1):0)];
+
+				// search for a neighbor
+				tbool bFound = TFALSE;
+				int t=0, j=0;
+				while (!bFound && t<iNrTrianglesIn)
+				{
+					if (t!=f)
+					{
+						j=0;
+						while (!bFound && j<3)
+						{
+							// in rev order
+							const int i1_B = piTriListIn[t*3+j];
+							const int i0_B = piTriListIn[t*3+(j<2?(j+1):0)];
+							//assert(!(i0_A==i1_B && i1_A==i0_B));
+							if (i0_A==i0_B && i1_A==i1_B)
+								bFound = TTRUE;
+							else
+								++j;
+						}
+					}
+					
+					if (!bFound) ++t;
+				}
+
+				// assign neighbors
+				if (bFound)
+				{
+					pTriInfos[f].FaceNeighbors[i] = t;
+					//assert(pTriInfos[t].FaceNeighbors[j]==-1);
+					pTriInfos[t].FaceNeighbors[j] = f;
+				}
+			}
+		}
+	}
+}
+
+static void QuickSortEdges(SEdge * pSortBuffer, int iLeft, int iRight, const int channel, unsigned int uSeed)
+{
+	unsigned int t;
+	int iL, iR, n, index, iMid;
+
+	// early out
+	SEdge sTmp;
+	const int iElems = iRight-iLeft+1;
+	if (iElems<2) return;
+	else if (iElems==2)
+	{
+		if (pSortBuffer[iLeft].array[channel] > pSortBuffer[iRight].array[channel])
+		{
+			sTmp = pSortBuffer[iLeft];
+			pSortBuffer[iLeft] = pSortBuffer[iRight];
+			pSortBuffer[iRight] = sTmp;
+		}
+		return;
+	}
+
+	// Random
+	t=uSeed&31;
+	t=(uSeed<<t)|(uSeed>>(32-t));
+	uSeed=uSeed+t+3;
+	// Random end
+
+	iL = iLeft;
+	iR = iRight;
+	n = (iR-iL)+1;
+	assert(n>=0);
+	index = (int) (uSeed%n);
+
+	iMid=pSortBuffer[index + iL].array[channel];
+
+	do
+	{
+		while (pSortBuffer[iL].array[channel] < iMid)
+			++iL;
+		while (pSortBuffer[iR].array[channel] > iMid)
+			--iR;
+
+		if (iL <= iR)
+		{
+			sTmp = pSortBuffer[iL];
+			pSortBuffer[iL] = pSortBuffer[iR];
+			pSortBuffer[iR] = sTmp;
+			++iL; --iR;
+		}
+	}
+	while (iL <= iR);
+
+	if (iLeft < iR)
+		QuickSortEdges(pSortBuffer, iLeft, iR, channel, uSeed);
+	if (iL < iRight)
+		QuickSortEdges(pSortBuffer, iL, iRight, channel, uSeed);
+}
+
+// resolve ordering and edge number
+static void GetEdge(int * i0_out, int * i1_out, int * edgenum_out, const int indices[], const int i0_in, const int i1_in)
+{
+	*edgenum_out = -1;
+	
+	// test if first index is on the edge
+	if (indices[0]==i0_in || indices[0]==i1_in)
+	{
+		// test if second index is on the edge
+		if (indices[1]==i0_in || indices[1]==i1_in)
+		{
+			edgenum_out[0]=0;	// first edge
+			i0_out[0]=indices[0];
+			i1_out[0]=indices[1];
+		}
+		else
+		{
+			edgenum_out[0]=2;	// third edge
+			i0_out[0]=indices[2];
+			i1_out[0]=indices[0];
+		}
+	}
+	else
+	{
+		// only second and third index is on the edge
+		edgenum_out[0]=1;	// second edge
+		i0_out[0]=indices[1];
+		i1_out[0]=indices[2];
+	}
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////// Degenerate triangles ////////////////////////////////////
+
+static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int iNrTrianglesIn, const int iTotTris)
+{
+	int iNextGoodTriangleSearchIndex=-1;
+	tbool bStillFindingGoodOnes;
+
+	// locate quads with only one good triangle
+	int t=0;
+	while (t<(iTotTris-1))
+	{
+		const int iFO_a = pTriInfos[t].iOrgFaceNumber;
+		const int iFO_b = pTriInfos[t+1].iOrgFaceNumber;
+		if (iFO_a==iFO_b)	// this is a quad
+		{
+			const tbool bIsDeg_a = (pTriInfos[t].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
+			const tbool bIsDeg_b = (pTriInfos[t+1].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
+			if ((bIsDeg_a^bIsDeg_b)!=0)
+			{
+				pTriInfos[t].iFlag |= QUAD_ONE_DEGEN_TRI;
+				pTriInfos[t+1].iFlag |= QUAD_ONE_DEGEN_TRI;
+			}
+			t += 2;
+		}
+		else
+			++t;
+	}
+
+	// reorder list so all degen triangles are moved to the back
+	// without reordering the good triangles
+	iNextGoodTriangleSearchIndex = 1;
+	t=0;
+	bStillFindingGoodOnes = TTRUE;
+	while (t<iNrTrianglesIn && bStillFindingGoodOnes)
+	{
+		const tbool bIsGood = (pTriInfos[t].iFlag&MARK_DEGENERATE)==0 ? TTRUE : TFALSE;
+		if (bIsGood)
+		{
+			if (iNextGoodTriangleSearchIndex < (t+2))
+				iNextGoodTriangleSearchIndex = t+2;
+		}
+		else
+		{
+			int t0, t1;
+			// search for the first good triangle.
+			tbool bJustADegenerate = TTRUE;
+			while (bJustADegenerate && iNextGoodTriangleSearchIndex<iTotTris)
+			{
+				const tbool bIsGood = (pTriInfos[iNextGoodTriangleSearchIndex].iFlag&MARK_DEGENERATE)==0 ? TTRUE : TFALSE;
+				if (bIsGood) bJustADegenerate=TFALSE;
+				else ++iNextGoodTriangleSearchIndex;
+			}
+
+			t0 = t;
+			t1 = iNextGoodTriangleSearchIndex;
+			++iNextGoodTriangleSearchIndex;
+			assert(iNextGoodTriangleSearchIndex > (t+1));
+
+			// swap triangle t0 and t1
+			if (!bJustADegenerate)
+			{
+				int i=0;
+				for (i=0; i<3; i++)
+				{
+					const int index = piTriList_out[t0*3+i];
+					piTriList_out[t0*3+i] = piTriList_out[t1*3+i];
+					piTriList_out[t1*3+i] = index;
+				}
+				{
+					const STriInfo tri_info = pTriInfos[t0];
+					pTriInfos[t0] = pTriInfos[t1];
+					pTriInfos[t1] = tri_info;
+				}
+			}
+			else
+				bStillFindingGoodOnes = TFALSE;	// this is not supposed to happen
+		}
+
+		if (bStillFindingGoodOnes) ++t;
+	}
+
+	assert(bStillFindingGoodOnes);	// code will still work.
+	assert(iNrTrianglesIn == t);
+}
+
+static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn, const int iTotTris)
+{
+	int t=0, i=0;
+	// deal with degenerate triangles
+	// punishment for degenerate triangles is O(N^2)
+	for (t=iNrTrianglesIn; t<iTotTris; t++)
+	{
+		// degenerate triangles on a quad with one good triangle are skipped
+		// here but processed in the next loop
+		const tbool bSkip = (pTriInfos[t].iFlag&QUAD_ONE_DEGEN_TRI)!=0 ? TTRUE : TFALSE;
+
+		if (!bSkip)
+		{
+			for (i=0; i<3; i++)
+			{
+				const int index1 = piTriListIn[t*3+i];
+				// search through the good triangles
+				tbool bNotFound = TTRUE;
+				int j=0;
+				while (bNotFound && j<(3*iNrTrianglesIn))
+				{
+					const int index2 = piTriListIn[j];
+					if (index1==index2) bNotFound=TFALSE;
+					else ++j;
+				}
+
+				if (!bNotFound)
+				{
+					const int iTri = j/3;
+					const int iVert = j%3;
+					const int iSrcVert=pTriInfos[iTri].vert_num[iVert];
+					const int iSrcOffs=pTriInfos[iTri].iTSpacesOffs;
+					const int iDstVert=pTriInfos[t].vert_num[i];
+					const int iDstOffs=pTriInfos[t].iTSpacesOffs;
+					
+					// copy tspace
+					psTspace[iDstOffs+iDstVert] = psTspace[iSrcOffs+iSrcVert];
+				}
+			}
+		}
+	}
+
+	// deal with degenerate quads with one good triangle
+	for (t=0; t<iNrTrianglesIn; t++)
+	{
+		// this triangle belongs to a quad where the
+		// other triangle is degenerate
+		if ( (pTriInfos[t].iFlag&QUAD_ONE_DEGEN_TRI)!=0 )
+		{
+			SVec3 vDstP;
+			int iOrgF=-1, i=0;
+			tbool bNotFound;
+			unsigned char * pV = pTriInfos[t].vert_num;
+			int iFlag = (1<<pV[0]) | (1<<pV[1]) | (1<<pV[2]);
+			int iMissingIndex = 0;
+			if ((iFlag&2)==0) iMissingIndex=1;
+			else if ((iFlag&4)==0) iMissingIndex=2;
+			else if ((iFlag&8)==0) iMissingIndex=3;
+
+			iOrgF = pTriInfos[t].iOrgFaceNumber;
+			vDstP = GetPosition(pContext, MakeIndex(iOrgF, iMissingIndex));
+			bNotFound = TTRUE;
+			i=0;
+			while (bNotFound && i<3)
+			{
+				const int iVert = pV[i];
+				const SVec3 vSrcP = GetPosition(pContext, MakeIndex(iOrgF, iVert));
+				if (veq(vSrcP, vDstP)==TTRUE)
+				{
+					const int iOffs = pTriInfos[t].iTSpacesOffs;
+					psTspace[iOffs+iMissingIndex] = psTspace[iOffs+iVert];
+					bNotFound=TFALSE;
+				}
+				else
+					++i;
+			}
+			assert(!bNotFound);
+		}
+	}
+}
diff --git a/cpp/open3d/t/geometry/kernel/mikktspace.h b/cpp/open3d/t/geometry/kernel/mikktspace.h
new file mode 100644
index 00000000000..3661b36464f
--- /dev/null
+++ b/cpp/open3d/t/geometry/kernel/mikktspace.h
@@ -0,0 +1,151 @@
+/* # MikkTSpace
+A common standard for tangent space used in baking tools to produce normal maps.
+More information can be found at http://www.mikktspace.com/.
+https://github.com/mmikk/MikkTSpace  [Oct 2025]
+*/
+/** \file mikktspace/mikktspace.h
+ *  \ingroup mikktspace
+ */
+/**
+ *  Copyright (C) 2011 by Morten S. Mikkelsen
+ *
+ *  This software is provided 'as-is', without any express or implied
+ *  warranty.  In no event will the authors be held liable for any damages
+ *  arising from the use of this software.
+ *
+ *  Permission is granted to anyone to use this software for any purpose,
+ *  including commercial applications, and to alter it and redistribute it
+ *  freely, subject to the following restrictions:
+ *
+ *  1. The origin of this software must not be misrepresented; you must not
+ *     claim that you wrote the original software. If you use this software
+ *     in a product, an acknowledgment in the product documentation would be
+ *     appreciated but is not required.
+ *  2. Altered source versions must be plainly marked as such, and must not be
+ *     misrepresented as being the original software.
+ *  3. This notice may not be removed or altered from any source distribution.
+ */
+
+
+#ifndef __MIKKTSPACE_H__
+#define __MIKKTSPACE_H__
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Author: Morten S. Mikkelsen
+ * Version: 1.0
+ *
+ * The files mikktspace.h and mikktspace.c are designed to be
+ * stand-alone files and it is important that they are kept this way.
+ * Not having dependencies on structures/classes/libraries specific
+ * to the program, in which they are used, allows them to be copied
+ * and used as is into any tool, program or plugin.
+ * The code is designed to consistently generate the same
+ * tangent spaces, for a given mesh, in any tool in which it is used.
+ * This is done by performing an internal welding step and subsequently an order-independent evaluation
+ * of tangent space for meshes consisting of triangles and quads.
+ * This means faces can be received in any order and the same is true for
+ * the order of vertices of each face. The generated result will not be affected
+ * by such reordering. Additionally, whether degenerate (vertices or texture coordinates)
+ * primitives are present or not will not affect the generated results either.
+ * Once tangent space calculation is done the vertices of degenerate primitives will simply
+ * inherit tangent space from neighboring non degenerate primitives.
+ * The analysis behind this implementation can be found in my master's thesis
+ * which is available for download --> http://image.diku.dk/projects/media/morten.mikkelsen.08.pdf
+ * Note that though the tangent spaces at the vertices are generated in an order-independent way,
+ * by this implementation, the interpolated tangent space is still affected by which diagonal is
+ * chosen to split each quad. A sensible solution is to have your tools pipeline always
+ * split quads by the shortest diagonal. This choice is order-independent and works with mirroring.
+ * If these have the same length then compare the diagonals defined by the texture coordinates.
+ * XNormal which is a tool for baking normal maps allows you to write your own tangent space plugin
+ * and also quad triangulator plugin.
+ */
+
+
+typedef int tbool;
+typedef struct SMikkTSpaceContext SMikkTSpaceContext;
+
+typedef struct {
+	// Returns the number of faces (triangles/quads) on the mesh to be processed.
+	int (*m_getNumFaces)(const SMikkTSpaceContext * pContext);
+
+	// Returns the number of vertices on face number iFace
+	// iFace is a number in the range {0, 1, ..., getNumFaces()-1}
+	int (*m_getNumVerticesOfFace)(const SMikkTSpaceContext * pContext, const int iFace);
+
+	// returns the position/normal/texcoord of the referenced face of vertex number iVert.
+	// iVert is in the range {0,1,2} for triangles and {0,1,2,3} for quads.
+	void (*m_getPosition)(const SMikkTSpaceContext * pContext, float fvPosOut[], const int iFace, const int iVert);
+	void (*m_getNormal)(const SMikkTSpaceContext * pContext, float fvNormOut[], const int iFace, const int iVert);
+	void (*m_getTexCoord)(const SMikkTSpaceContext * pContext, float fvTexcOut[], const int iFace, const int iVert);
+
+	// either (or both) of the two setTSpace callbacks can be set.
+	// The call-back m_setTSpaceBasic() is sufficient for basic normal mapping.
+
+	// This function is used to return the tangent and fSign to the application.
+	// fvTangent is a unit length vector.
+	// For normal maps it is sufficient to use the following simplified version of the bitangent which is generated at pixel/vertex level.
+	// bitangent = fSign * cross(vN, tangent);
+	// Note that the results are returned unindexed. It is possible to generate a new index list
+	// But averaging/overwriting tangent spaces by using an already existing index list WILL produce INCRORRECT results.
+	// DO NOT! use an already existing index list.
+	void (*m_setTSpaceBasic)(const SMikkTSpaceContext * pContext, const float fvTangent[], const float fSign, const int iFace, const int iVert);
+
+	// This function is used to return tangent space results to the application.
+	// fvTangent and fvBiTangent are unit length vectors and fMagS and fMagT are their
+	// true magnitudes which can be used for relief mapping effects.
+	// fvBiTangent is the "real" bitangent and thus may not be perpendicular to fvTangent.
+	// However, both are perpendicular to the vertex normal.
+	// For normal maps it is sufficient to use the following simplified version of the bitangent which is generated at pixel/vertex level.
+	// fSign = bIsOrientationPreserving ? 1.0f : (-1.0f);
+	// bitangent = fSign * cross(vN, tangent);
+	// Note that the results are returned unindexed. It is possible to generate a new index list
+	// But averaging/overwriting tangent spaces by using an already existing index list WILL produce INCRORRECT results.
+	// DO NOT! use an already existing index list.
+	void (*m_setTSpace)(const SMikkTSpaceContext * pContext, const float fvTangent[], const float fvBiTangent[], const float fMagS, const float fMagT,
+						const tbool bIsOrientationPreserving, const int iFace, const int iVert);
+} SMikkTSpaceInterface;
+
+struct SMikkTSpaceContext
+{
+	SMikkTSpaceInterface * m_pInterface;	// initialized with callback functions
+	void * m_pUserData;						// pointer to client side mesh data etc. (passed as the first parameter with every interface call)
+};
+
+// these are both thread safe!
+tbool genTangSpaceDefault(const SMikkTSpaceContext * pContext);	// Default (recommended) fAngularThreshold is 180 degrees (which means threshold disabled)
+tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThreshold);
+
+
+// To avoid visual errors (distortions/unwanted hard edges in lighting), when using sampled normal maps, the
+// normal map sampler must use the exact inverse of the pixel shader transformation.
+// The most efficient transformation we can possibly do in the pixel shader is
+// achieved by using, directly, the "unnormalized" interpolated tangent, bitangent and vertex normal: vT, vB and vN.
+// pixel shader (fast transform out)
+// vNout = normalize( vNt.x * vT + vNt.y * vB + vNt.z * vN );
+// where vNt is the tangent space normal. The normal map sampler must likewise use the
+// interpolated and "unnormalized" tangent, bitangent and vertex normal to be compliant with the pixel shader.
+// sampler does (exact inverse of pixel shader):
+// float3 row0 = cross(vB, vN);
+// float3 row1 = cross(vN, vT);
+// float3 row2 = cross(vT, vB);
+// float fSign = dot(vT, row0)<0 ? -1 : 1;
+// vNt = normalize( fSign * float3(dot(vNout,row0), dot(vNout,row1), dot(vNout,row2)) );
+// where vNout is the sampled normal in some chosen 3D space.
+//
+// Should you choose to reconstruct the bitangent in the pixel shader instead
+// of the vertex shader, as explained earlier, then be sure to do this in the normal map sampler also.
+// Finally, beware of quad triangulations. If the normal map sampler doesn't use the same triangulation of
+// quads as your renderer then problems will occur since the interpolated tangent spaces will differ
+// eventhough the vertex level tangent spaces match. This can be solved either by triangulating before
+// sampling/exporting or by using the order-independent choice of diagonal for splitting quads suggested earlier.
+// However, this must be used both by the sampler and your tools/rendering pipeline.
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/cpp/open3d/t/io/file_format/FileASSIMP.cpp b/cpp/open3d/t/io/file_format/FileASSIMP.cpp
index 1ce61332bd9..8c2be3348e6 100644
--- a/cpp/open3d/t/io/file_format/FileASSIMP.cpp
+++ b/cpp/open3d/t/io/file_format/FileASSIMP.cpp
@@ -554,6 +554,7 @@ bool WriteTriangleMeshUsingASSIMP(const std::string& filename,
         int n_textures = 0;
         if (w_mesh.GetMaterial().HasAlbedoMap()) ++n_textures;
         if (w_mesh.GetMaterial().HasNormalMap()) ++n_textures;
+        if (w_mesh.GetMaterial().HasAOMap()) ++n_textures;
         if (w_mesh.GetMaterial().HasAORoughnessMetalMap()) {
             ++n_textures;
         } else if (w_mesh.GetMaterial().HasRoughnessMap() &&
@@ -581,6 +582,12 @@ bool WriteTriangleMeshUsingASSIMP(const std::string& filename,
                                        aiTextureType_BASE_COLOR, img);
             ++current_idx;
         }
+        if (w_mesh.GetMaterial().HasAOMap()) {
+            auto img = w_mesh.GetMaterial().GetAOMap();
+            SetTextureMaterialProperty(ai_mat, ai_scene.get(), current_idx,
+                                       aiTextureType_LIGHTMAP, img);
+            ++current_idx;
+        }
         if (w_mesh.GetMaterial().HasAORoughnessMetalMap()) {
             auto img = w_mesh.GetMaterial().GetAORoughnessMetalMap();
             SetTextureMaterialProperty(ai_mat, ai_scene.get(), current_idx,
@@ -588,36 +595,30 @@ bool WriteTriangleMeshUsingASSIMP(const std::string& filename,
             ++current_idx;
         } else if (w_mesh.GetMaterial().HasRoughnessMap() &&
                    w_mesh.GetMaterial().HasMetallicMap()) {
-            auto rough = w_mesh.GetMaterial().GetRoughnessMap();
-            auto metal = w_mesh.GetMaterial().GetMetallicMap();
-            auto rows = rough.GetRows();
-            auto cols = rough.GetCols();
-            auto rough_metal =
-                    geometry::Image(rows, cols, 4, core::Dtype::UInt8);
-            rough_metal.AsTensor() =
-                    core::Tensor::Ones(rough_metal.AsTensor().GetShape(),
-                                       core::Dtype::UInt8) *
-                    255;
-            auto metal_channel = metal.AsTensor().GetItem(
-                    {core::TensorKey::Slice(0, rows + 1, core::None),
-                     core::TensorKey::Slice(0, cols + 1, core::None),
-                     core::TensorKey::Index(0)});
-            auto rough_channel = rough.AsTensor().GetItem(
-                    {core::TensorKey::Slice(0, rows + 1, core::None),
-                     core::TensorKey::Slice(0, cols + 1, core::None),
-                     core::TensorKey::Index(0)});
-            rough_metal.AsTensor().SetItem(
-                    {core::TensorKey::Slice(0, rows + 1, core::None),
-                     core::TensorKey::Slice(0, cols + 1, core::None),
-                     core::TensorKey::Index(2)},  // metallic in blue
-                    metal_channel);
-            rough_metal.AsTensor().SetItem(
-                    {core::TensorKey::Slice(0, rows + 1, core::None),
-                     core::TensorKey::Slice(0, cols + 1, core::None),
-                     core::TensorKey::Index(1)},  // roughness in green
-                    rough_channel);
+            auto rough = w_mesh.GetMaterial().GetRoughnessMap().AsTensor();
+            auto metal = w_mesh.GetMaterial().GetMetallicMap().AsTensor();
+            bool badshape = false;
+            if (rough.GetShape() != metal.GetShape()) {
+                utility::LogWarning(
+                        "RoughnessMap (shape={}) and MetallicMap (shape={}) "
+                        "must have the same shape. Not saving MetallicMap!",
+                        rough.GetShape(), metal.GetShape());
+                badshape = true;
+            }
+            auto rows = rough.GetShape(0);
+            auto cols = rough.GetShape(1);
+            auto rough_metal = core::Tensor::Full({rows, cols, 4}, 255,
+                                                  core::Dtype::UInt8);
+            if (!badshape) {
+                rough_metal.Slice(2, 2, 3) =
+                        metal.Slice(2, 0, 1);  // blue channel is metal
+            }
+            rough_metal.Slice(2, 1, 2) =
+                    rough.Slice(2, 0, 1);  // green channel is roughness
+
+            geometry::Image rough_metal_img(rough_metal);
             SetTextureMaterialProperty(ai_mat, ai_scene.get(), current_idx,
-                                       aiTextureType_UNKNOWN, rough_metal);
+                                       aiTextureType_UNKNOWN, rough_metal_img);
             ++current_idx;
         } else {
             if (w_mesh.GetMaterial().HasRoughnessMap()) {
diff --git a/cpp/open3d/visualization/gui/GLFWWindowSystem.cpp b/cpp/open3d/visualization/gui/GLFWWindowSystem.cpp
index 852f6e3db24..e6e9f0d472b 100644
--- a/cpp/open3d/visualization/gui/GLFWWindowSystem.cpp
+++ b/cpp/open3d/visualization/gui/GLFWWindowSystem.cpp
@@ -12,6 +12,7 @@
 #include <iostream>
 #include <unordered_map>
 
+#include "open3d/utility/Logging.h"
 #include "open3d/visualization/gui/Application.h"
 #include "open3d/visualization/gui/Events.h"
 #include "open3d/visualization/gui/MenuImgui.h"
@@ -151,7 +152,7 @@ GLFWWindowSystem::OSWindow GLFWWindowSystem::CreateOSWindow(Window* o3d_window,
                                                             const char* title,
                                                             int flags) {
     glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
-    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2);
+    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
     glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
     glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
     // NOTE: Setting alpha and stencil bits to match GLX standard default
@@ -164,8 +165,8 @@ GLFWWindowSystem::OSWindow GLFWWindowSystem::CreateOSWindow(Window* o3d_window,
 #if __APPLE__
     glfwWindowHint(GLFW_COCOA_RETINA_FRAMEBUFFER, GLFW_TRUE);
 #endif
-    bool visible = !(flags & FLAG_HIDDEN);
-    glfwWindowHint(GLFW_VISIBLE, visible ? GLFW_TRUE : GLFW_FALSE);
+    // bool visible = !(flags & FLAG_HIDDEN);
+    glfwWindowHint(GLFW_VISIBLE, GLFW_TRUE);
     glfwWindowHint(GLFW_FLOATING,
                    ((flags & FLAG_TOPMOST) != 0 ? GLFW_TRUE : GLFW_FALSE));
 
@@ -183,6 +184,22 @@ GLFWWindowSystem::OSWindow GLFWWindowSystem::CreateOSWindow(Window* o3d_window,
     glfwSetDropCallback(glfw_window, DragDropCallback);
     glfwSetWindowCloseCallback(glfw_window, CloseCallback);
 
+    // Ensure window size is properly set on Wayland
+    if (glfwGetPlatform() == GLFW_PLATFORM_WAYLAND) {
+        glfwSetWindowSize(glfw_window, width, height);
+        glfwWaitEventsTimeout(0.1);
+    }
+    glfwShowWindow(glfw_window);
+    glfwMakeContextCurrent(glfw_window);
+    glfwSwapInterval(1);
+    //if (!visible) {
+    //    glfwHideWindow(glfw_window);
+    //} else {
+    //    glfwShowWindow(glfw_window);
+    //}
+    utility::LogInfo("[GLFW] Created window '{}' ({}x{})\n", title, width,
+                      height);
+
     return glfw_window;
 }
 
@@ -216,12 +233,20 @@ bool GLFWWindowSystem::IsActiveWindow(OSWindow w) const {
 
 Point GLFWWindowSystem::GetWindowPos(OSWindow w) const {
     int x, y;
+    if (glfwGetPlatform() == GLFW_PLATFORM_WAYLAND) {
+        utility::LogDebug("[GLFW] getWindowPos() is not supported on Wayland.");
+        return Point(0, 0);
+    }
     glfwGetWindowPos((GLFWwindow*)w, &x, &y);
     return Point(x, y);
 }
 
 void GLFWWindowSystem::SetWindowPos(OSWindow w, int x, int y) {
-    glfwSetWindowPos((GLFWwindow*)w, x, y);
+    if (glfwGetPlatform() == GLFW_PLATFORM_WAYLAND) {
+        utility::LogDebug("[GLFW] setWindowPos() is not supported on Wayland.");
+    } else {
+        glfwSetWindowPos((GLFWwindow*)w, x, y);
+    }
 }
 
 Size GLFWWindowSystem::GetWindowSize(OSWindow w) const {
diff --git a/cpp/open3d/visualization/rendering/Material.h b/cpp/open3d/visualization/rendering/Material.h
index 2eeddcafc6a..68917a56e21 100644
--- a/cpp/open3d/visualization/rendering/Material.h
+++ b/cpp/open3d/visualization/rendering/Material.h
@@ -58,7 +58,7 @@ class Material {
     /// String reprentation for printing.
     std::string ToString() const;
 
-    /// Returns the texture map map
+    /// Returns the texture maps 
     const TextureMaps &GetTextureMaps() const { return texture_maps_; }
 
     /// Get images (texture maps) of this Material. Throws exception if the
diff --git a/cpp/open3d/visualization/visualizer/Visualizer.cpp b/cpp/open3d/visualization/visualizer/Visualizer.cpp
index 85c87fb02f4..a055b59e86d 100644
--- a/cpp/open3d/visualization/visualizer/Visualizer.cpp
+++ b/cpp/open3d/visualization/visualizer/Visualizer.cpp
@@ -102,7 +102,9 @@ bool Visualizer::CreateVisualizerWindow(
     if (window_) {  // window already created
         utility::LogDebug("[Visualizer] Reusing window.");
         UpdateWindowTitle();
-        glfwSetWindowPos(window_, left, top);
+        if (glfwGetPlatform() != GLFW_PLATFORM_WAYLAND) {
+            glfwSetWindowPos(window_, left, top);
+        }
         glfwSetWindowSize(window_, width, height);
 #ifdef __APPLE__
         glfwSetWindowSize(window_,
@@ -141,7 +143,9 @@ bool Visualizer::CreateVisualizerWindow(
         utility::LogWarning("Failed to create window");
         return false;
     }
-    glfwSetWindowPos(window_, left, top);
+    if (glfwGetPlatform() != GLFW_PLATFORM_WAYLAND) {
+        glfwSetWindowPos(window_, left, top);
+    }
     glfwSetWindowUserPointer(window_, this);
 
 #ifdef __APPLE__
@@ -480,13 +484,20 @@ bool Visualizer::HasGeometry() const { return !geometry_ptrs_.empty(); }
 
 void Visualizer::SetFullScreen(bool fullscreen) {
     if (!fullscreen) {
+        // Wayland ignores window position
         glfwSetWindowMonitor(window_, NULL, saved_window_pos_(0),
                              saved_window_pos_(1), saved_window_size_(0),
                              saved_window_size_(1), GLFW_DONT_CARE);
     } else {
         glfwGetWindowSize(window_, &saved_window_size_(0),
                           &saved_window_size_(1));
-        glfwGetWindowPos(window_, &saved_window_pos_(0), &saved_window_pos_(1));
+        if (glfwGetPlatform() == GLFW_PLATFORM_WAYLAND) {
+            saved_window_pos_[0] = 0;
+            saved_window_pos_[1] = 0;
+        } else {
+            glfwGetWindowPos(window_, &saved_window_pos_(0),
+                             &saved_window_pos_(1));
+        }
         GLFWmonitor *monitor = glfwGetPrimaryMonitor();
         if (const GLFWvidmode *mode = glfwGetVideoMode(monitor)) {
             glfwSetWindowMonitor(window_, monitor, 0, 0, mode->width,
diff --git a/cpp/pybind/core/tensor.cpp b/cpp/pybind/core/tensor.cpp
index 31b7381d402..2c2a240dc52 100644
--- a/cpp/pybind/core/tensor.cpp
+++ b/cpp/pybind/core/tensor.cpp
@@ -489,6 +489,27 @@ void pybind_core_tensor_definitions(py::module& m) {
             "interval.",
             "start"_a, "stop"_a, "step"_a = py::none(), "dtype"_a = py::none(),
             py::kw_only(), "device"_a = py::none());
+    tensor.def_static(
+            "quasirandom", Tensor::Quasirandom, "n"_a = 16, "dims"_a = 2,
+            "dtype"_a = Float32, "device"_a = Device("CPU:0"),
+            R"(Generates a tensor containing points from a quasirandom sequence.
+ 
+ This function creates a tensor of shape (n, dims) where each row is a point in
+ a low-discrepancy quasirandom sequence (specifically, the generalized golden
+ ratio based R_n sequence). The 2D variant is the plastic sequence.  Such
+ sequences are commonly used in numerical integration, computer graphics and
+ sampling tasks where uniform coverage of the space is desired.  See
+ https://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/
+ for more details.
+ 
+ Args:
+     n: Number of points to generate (default: 16).
+     dims: Number of dimensions for each point (default: 2).
+     dtype: Data type of the output tensor (default: Float32).
+     device: Device on which to allocate the tensor (default: CPU:0).
+
+ Returns:
+     A tensor of shape (n, dims) containing quasirandom points.)");
 
     tensor.def(
             "append",
@@ -764,6 +785,19 @@ tuple `output` tensor of shape {n,n} and `ipiv` tensor of shape {n}, where
     tensor.def("__matmul__", &Tensor::Matmul,
                "Computes matrix multiplication of a"
                " 2D tensor with another tensor of compatible shape.");
+    tensor.def("cross", &Tensor::Cross, "other"_a, "axis"_a = -1, R"(
+ Computes the (batched) cross product of the current tensor with another tensor
+ of compatible (i.e. broadcastable) shape, and the size of the specified axis
+ must be 3. If the axis is -1 (default), the last axis is used.
+
+ Args:
+    other The second input tensor to compute the cross product with.
+    axis The axis along which to compute the cross product. Default is -1 (the
+        last axis).
+
+ Returns:
+    A tensor containing the cross product of the two input tensors along the
+    specified axis.)");
     tensor.def("lstsq", &Tensor::LeastSquares,
                "Solves the linear system AX = B with QR decomposition and "
                "returns X. A is a (m, n) matrix with m >= n.",
diff --git a/cpp/pybind/t/geometry/trianglemesh.cpp b/cpp/pybind/t/geometry/trianglemesh.cpp
index 20353b4848f..beeb096b609 100644
--- a/cpp/pybind/t/geometry/trianglemesh.cpp
+++ b/cpp/pybind/t/geometry/trianglemesh.cpp
@@ -1199,6 +1199,72 @@ Example::
     np.testing.assert_allclose(metrics.cpu().numpy(), (0.1, 0.17, 50, 100),
                                rtol=0.05)
     )");
+
+    triangle_mesh.def(
+            "compute_ambient_occlusion", &TriangleMesh::ComputeAmbientOcclusion,
+            "tex_width"_a = 256, "n_rays"_a = 32,
+            "max_hit_distance"_a = INFINITY, "update_material"_a = true,
+            R"(Computes an ambient occlusion texture for the mesh.
+    
+    This method generates an ambient occlusion map by casting rays from the surface
+    of the mesh and measuring occlusion. The mesh must have texture coordinates
+    ("texture_uvs" triangle attribute). The resulting texture is a single-channel
+    grayscale image with values in [0, 255], where 0 is fully occluded and 255 is
+    fully exposed.
+    
+    This function always uses the CPU device.
+    
+    Args:
+        tex_width (int): The width and height of the output texture in pixels (square).
+        n_rays (int): The number of rays to cast per texel for occlusion estimation.
+        max_hit_distance (float): The maximum distance for ray intersection. The
+            default is INFINITY, i.e. very far occlusions also count.
+        update_material (bool): If true, updates the mesh material with the computed
+            ambient occlusion texture.
+    
+    Returns:
+        The computed ambient occlusion texture as an Image (single channel, UInt8).
+    )");
+
+    triangle_mesh.def(
+            "compute_tangent_space", &TriangleMesh::ComputeTangentSpace,
+            "bake"_a = true, "tex_width"_a = 512,
+            R"(Computes tangent space for the triangle mesh with MikkTSpace.
+    The mesh must have vertex positions, vertex normals, and texture UVs
+    (triangle attribute 'texture_uvs'). The computed tangents will be added as
+    vertex attributes 'tangents' shape (N,4). This function works on the CPU and
+    will transfer data to the CPU if necessary.
+    
+    Args:
+        bake (bool): If true, the normals and tangents will also be baked to
+            textures and saved to the material.
+        tex_width (int): Baked texture size. Default 512.
+    )");
+
+    triangle_mesh.def("transform_normal_map", &TriangleMesh::TransformNormalMap,
+                      "normal_map"_a, "to_tangent_space"_a = true,
+                      "update_material"_a = false,
+                      R"(Converts a normal map between world and tangent space.
+    
+    The conversion is performed for each pixel of the map. The mesh must
+    have vertex normals and tangents. The tangent space attributes can be
+    computed with `compute_tangent_space()`.
+    
+    Args:
+        normal_map (o3d.t.geometry.Image): The normal map to convert.
+            When converting to tangent space, this is the world-space normal map.
+            When converting to world space, this is the tangent-space normal map.
+            It is expected to have 3 channels with Float32 or Float64 data type,
+            with values in range [-1, 1], or UInt8 data type in the range [0, 255].
+        to_tangent_space (bool): If true, converts from world to tangent space.
+            If false, converts from tangent to world space.
+        update_material (bool): If true and we are converting to tangent space,
+            the mesh material will be updated to contain the new normal map.
+    
+    Returns:
+        The converted normal map as an Image. This will have 3 channels,
+        UInt8 data type, with values in range [0, 255].
+    )");
 }
 
 }  // namespace geometry
diff --git a/cpp/tests/core/Tensor.cpp b/cpp/tests/core/Tensor.cpp
index 3963a4200a6..bd57c534f70 100644
--- a/cpp/tests/core/Tensor.cpp
+++ b/cpp/tests/core/Tensor.cpp
@@ -267,6 +267,31 @@ TEST_P(TensorPermuteDevicesWithSYCL, Arange) {
                  std::runtime_error);
 }
 
+TEST_P(TensorPermuteDevicesWithSYCL, Quasirandom) {
+    core::Device device = GetParam();
+
+    // Basic functionality
+    core::Tensor qr_ref = core::Tensor::Init<float>({{0.11803399, 0.035687387},
+                                               {0.73606795, 0.5713748},
+                                               {0.35410196, 0.10706216},
+                                               {0.97213596, 0.64274955},
+                                               {0.59016997, 0.17843693},
+                                               {0.20820393, 0.7141243},
+                                               {0.8262379, 0.24981171},
+                                               {0.44427192, 0.7854991}});
+    core::Tensor qr = core::Tensor::Quasirandom(8, 2, core::Float32, device);
+    EXPECT_EQ(qr.GetShape(), core::SizeVector({8, 2}));
+    EXPECT_EQ(qr.GetDtype(), core::Float32);
+    EXPECT_EQ(qr.GetDevice(), device);
+    EXPECT_TRUE(qr.To(core::Device("CPU:0")).AllClose(qr_ref));
+
+    // Test error conditions
+    EXPECT_ANY_THROW(core::Tensor::Quasirandom(0, 2));   // n <= 0
+    EXPECT_ANY_THROW(core::Tensor::Quasirandom(10, 0));  // dims <= 0
+    EXPECT_ANY_THROW(
+            core::Tensor::Quasirandom(10, 2, core::Int32));  // wrong dtype
+}
+
 TEST_P(TensorPermuteDevicesWithSYCL, Fill) {
     core::Device device = GetParam();
     core::Tensor t(std::vector<float>(2 * 3, 0), {2, 3}, core::Float32, device);
@@ -1558,6 +1583,33 @@ TEST_P(TensorPermuteDevicesWithSYCL, Det) {
     EXPECT_ANY_THROW(core::Tensor::Ones({3, 4}, dtype, device).Det());
 }
 
+TEST_P(TensorPermuteDevicesWithSYCL, Cross) {
+    core::Device device = GetParam();
+    core::Tensor a = core::Tensor::Init<float>({{1, 2, 3}, {4, 5, 6}}, device);
+    core::Tensor b =
+            core::Tensor::Init<float>({{7, 8, 9}, {10, 11, 12}}, device);
+
+    core::Tensor c = a.Cross(b);   // axis=-1
+    core::Tensor c_ref =
+            core::Tensor::Init<float>({{-6, 12, -6}, {-6, 12, -6}}, device);
+    EXPECT_TRUE(c.AllClose(c_ref));
+
+    // Test with axis=0
+    c = a.T().Cross(b.T(), 0);
+    EXPECT_TRUE(c.AllClose(c_ref.T()));
+
+    // Test with broadcast
+    a = core::Tensor::Init<float>({1, 2, 3}, device);
+    b = core::Tensor::Init<float>({{7, 8, 9}, {10, 11, 12}}, device);
+    c = a.Cross(b);
+    c_ref = core::Tensor::Init<float>({{-6, 12, -6}, {-9, 18, -9}}, device);
+    EXPECT_TRUE(c.AllClose(c_ref));
+
+    // Test error conditions
+    core::Tensor bad_shape = core::Tensor::Ones({2, 2}, core::Float32, device);
+    EXPECT_ANY_THROW(a.Cross(bad_shape));
+}
+
 TEST_P(TensorPermuteDevicesWithSYCL, ShallowCopyConstructor) {
     core::Device device = GetParam();
     core::Tensor t({2, 3}, core::Float32, device);
diff --git a/cpp/tests/t/geometry/TriangleMesh.cpp b/cpp/tests/t/geometry/TriangleMesh.cpp
index cdcc3f40241..0b4f4547c71 100644
--- a/cpp/tests/t/geometry/TriangleMesh.cpp
+++ b/cpp/tests/t/geometry/TriangleMesh.cpp
@@ -11,6 +11,7 @@
 #include <gtest/gtest.h>
 
 #include "core/CoreTest.h"
+#include "gmock/gmock.h"
 #include "open3d/core/Dtype.h"
 #include "open3d/core/EigenConverter.h"
 #include "open3d/core/SizeVector.h"
@@ -1593,5 +1594,111 @@ TEST_P(TriangleMeshPermuteDevices, SamplePointsUniformly) {
     }
 }
 
+TEST_P(TriangleMeshPermuteDevices, TangentSpace) {
+    using ::testing::ElementsAre;
+    core::Device device("CPU:0");
+
+    // MikkTSpace and UVAtlas are CPU only.
+    core::Device cpu_device("CPU:0");
+    auto mesh = t::geometry::TriangleMesh::CreateTorus(
+            1.0, 0.6, 30, 20, core::Float32, core::Int64, cpu_device);
+    mesh.ComputeVertexNormals();
+    mesh.ComputeUVAtlas();
+    int tex_size = 256;
+
+    // 1. Compute Tangent space.
+    mesh.ComputeTangentSpace(/*bake=*/true, tex_size);
+    EXPECT_TRUE(mesh.HasVertexAttr("tangents"));
+    EXPECT_THAT(mesh.GetVertexAttr("tangents").GetShape(), ElementsAre(600, 4));
+    EXPECT_TRUE(mesh.GetMaterial().HasTextureMap("tangents"));
+    EXPECT_THAT(
+            mesh.GetMaterial().GetTextureMap("tangents").AsTensor().GetShape(),
+            ElementsAre(tex_size, tex_size, 4));
+    EXPECT_TRUE(mesh.GetMaterial().HasTextureMap("normals"));
+    EXPECT_THAT(
+            mesh.GetMaterial().GetTextureMap("normals").AsTensor().GetShape(),
+            ElementsAre(tex_size, tex_size, 3));
+
+    // Visual inspection.
+    t::io::WriteImage("baked_tangents.png",
+                      mesh.GetMaterial()
+                              .GetTextureMap("tangents")
+                              .To(core::UInt8, false, 127.5, 127.5));
+    t::io::WriteImage("baked_normals.png",
+                      mesh.GetMaterial().GetTextureMap("normals").To(
+                              core::UInt8, false, 127.5, 127.5));
+
+    // Create a dummy world-space normal map (Float32, [-1, 1]).
+    // A simple gradient from (-1,-1,-1) to (1,1,1)
+    core::Tensor world_map_t = core::Tensor::Empty({tex_size, tex_size, 3},
+                                                   core::Float32, cpu_device);
+    for (int i = 0; i < tex_size; ++i) {
+        for (int j = 0; j < tex_size; ++j) {
+            float u = static_cast<float>(j) / (tex_size - 1);
+            float v = static_cast<float>(i) / (tex_size - 1);
+            world_map_t[i][j][0] = 2.0f * u - 1.0f;
+            world_map_t[i][j][1] = 2.0f * v - 1.0f;
+            world_map_t[i][j][2] = 2.0f * (u + v) / 2.0f;  // Z > 0
+        }
+    }
+    world_map_t =
+            world_map_t / world_map_t.Norm({2}, true).Clip_(1e-6, INFINITY);
+    t::geometry::Image world_normal_map(world_map_t);
+    t::io::WriteImage("world_normal_map.png",
+                      world_normal_map.To(core::UInt8, false, 127.5f, 127.5f));
+
+    // 2. World to Tangent space.
+    auto tangent_normal_map = mesh.TransformNormalMap(world_normal_map,
+                                                      /*to_tangent_space=*/true,
+                                                      /*update_material=*/true);
+    EXPECT_EQ(tangent_normal_map.GetDtype(), core::UInt8);
+    EXPECT_EQ(tangent_normal_map.GetChannels(), 3);
+    EXPECT_TRUE(mesh.GetMaterial().HasNormalMap());
+
+    t::io::WriteImage("tangent_normal_map.png", tangent_normal_map);
+
+    // 3. Tangent to World space.
+    auto world_normal_map_restored =
+            mesh.TransformNormalMap(tangent_normal_map,
+                                    /*to_tangent_space=*/false);
+
+    EXPECT_EQ(world_normal_map_restored.GetDtype(), core::UInt8);
+    EXPECT_EQ(world_normal_map_restored.GetChannels(), 3);
+    t::io::WriteImage("world_normal_map_restored.png",
+                      world_normal_map_restored);
+
+    // world_normal_map_restored is the same as world_normal_map for the region
+    // with valid UV coordinates.
+}
+
+TEST_P(TriangleMeshPermuteDevices, ComputeAmbientOcclusion) {
+    using ::testing::ElementsAre;
+    core::Device device("CPU:0");
+
+    // UVAtlas is CPU only.
+    core::Device cpu_device("CPU:0");
+    auto mesh = t::geometry::TriangleMesh::CreateTorus(
+           1.0, 0.6, 30, 20, core::Float32, core::Int64, cpu_device);
+    // auto box1 = t::geometry::TriangleMesh::CreateBox(1.f, 1.f, 1.f);
+    // auto mesh = box1.BooleanDifference(box1.Clone().Translate(core::Tensor::Init({0.1, 0.1, 0.1})));
+    mesh.ComputeVertexNormals();
+    mesh.ComputeUVAtlas();
+
+    auto ao_map = mesh.ComputeAmbientOcclusion(
+            /*tex_width=*/256, /*n_rays=*/8, /*max_hit_distance=*/INFINITY,
+            /*update_material=*/true);
+
+    EXPECT_EQ(ao_map.GetDtype(), core::UInt8);
+    EXPECT_THAT(ao_map.AsTensor().GetShape(), ElementsAre(256, 256, 1));
+    EXPECT_TRUE(mesh.GetMaterial().HasAOMap());
+
+    t::io::WriteTriangleMesh("torus_ao.glb", mesh);
+    t::io::WriteImage("torus_ao_texture.png", mesh.GetMaterial().GetAOMap());
+    // Visual inspection.
+    // visualization::Draw({std::shared_ptr<t::geometry::TriangleMesh>(
+    //                            &mesh, [](t::geometry::TriangleMesh*) {})},
+    //                    "Mesh with AO texture");
+}
+
 }  // namespace tests
 }  // namespace open3d