Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions build_fixed.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/bin/bash

# Fixed build script for InfiniCore
# This script sets up the environment and builds with proper linker configuration

echo "Setting up InfiniCore build environment..."

# Initialize conda
eval "$(conda shell.bash hook)"

# Activate the infinicore-env environment
conda activate infinicore-env

# Set CUDA_HOME to the conda environment
export CUDA_HOME=$CONDA_PREFIX

# Clean up conflicting environment variables
unset CC
unset CXX
unset NVCC_PREPEND_FLAGS
unset NVCC_APPEND_FLAGS
unset CUDA_ROOT

# Use system tools
export PATH="/usr/bin:$PATH"

# Create a wrapper for ld that converts -m64 to -m elf_x86_64
mkdir -p /tmp/ld_wrapper
cat > /tmp/ld_wrapper/ld << 'EOF'
#!/bin/bash
# Convert -m64 to -m elf_x86_64 for system linker compatibility
args=()
skip_next=false
for arg in "$@"; do
if [ "$skip_next" = true ]; then
skip_next=false
continue
fi
if [ "$arg" = "-m64" ]; then
args+=("-m" "elf_x86_64")
elif [ "$arg" = "-fopenmp" ]; then
# Skip -fopenmp flag for linker, but add libgomp
args+=("-lgomp")
continue
elif [ "$arg" = "-m" ]; then
# Skip -m flag and its argument if it's elf_x86_64 (to avoid duplication)
skip_next=true
continue
else
args+=("$arg")
fi
done
# Add standard C++ library and other required libraries
args+=("-lstdc++" "-lm" "-lc" "-lgcc_s")
exec /usr/bin/ld "${args[@]}"
EOF
chmod +x /tmp/ld_wrapper/ld
export PATH="/tmp/ld_wrapper:$PATH"

echo "Environment setup complete!"
echo "CUDA_HOME: $CUDA_HOME"
echo "CONDA_PREFIX: $CONDA_PREFIX"

# Configure and build
echo "Configuring xmake..."
xmake f -c

echo "Building InfiniCore..."
xmake build

echo "Build completed!"
139 changes: 139 additions & 0 deletions example_memory_usage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#!/usr/bin/env python3
"""
Example script showing how to use InfiniCore memory statistics
to monitor memory usage during tensor operations.
"""

import sys
import os

# Add the current directory to Python path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

try:
import infinicore
print("✓ Successfully imported infinicore")
except ImportError as e:
print(f"✗ Failed to import infinicore: {e}")
print("Make sure to build the project first with: xmake build _infinicore")
sys.exit(1)

def get_memory_summary():
"""Get a summary of current memory usage."""
try:
device_stats = infinicore.get_device_memory_stats()
return {
'allocations': device_stats.allocation[0].current,
'allocated_bytes': device_stats.allocated_bytes[0].current,
'active_blocks': device_stats.active[0].current,
'device_allocations': device_stats.num_device_alloc,
'device_deallocations': device_stats.num_device_free
}
except Exception as e:
print(f"Warning: Could not get memory stats: {e}")
return None

def print_memory_summary(title, stats):
"""Print a concise memory summary."""
if stats is None:
print(f"{title}: Unable to get memory statistics")
return

print(f"{title}:")
print(f" Allocations: {stats['allocations']}")
print(f" Allocated bytes: {stats['allocated_bytes']:,} bytes ({stats['allocated_bytes'] / 1024 / 1024:.2f} MB)")
print(f" Active blocks: {stats['active_blocks']}")
print(f" Device alloc/dealloc: {stats['device_allocations']}/{stats['device_deallocations']}")

def monitor_memory_usage():
"""Monitor memory usage during tensor operations."""
print("=== InfiniCore Memory Usage Monitor ===\n")

# Initial memory state
initial_stats = get_memory_summary()
print_memory_summary("Initial Memory State", initial_stats)

try:
# Create some tensors to demonstrate memory usage
print("\n1. Creating tensors...")

# Create a large tensor
print(" Creating 1000x1000 float32 tensor...")
tensor1 = infinicore.empty((1000, 1000), dtype=infinicore.float32)
stats_after_tensor1 = get_memory_summary()
print_memory_summary("After creating tensor1", stats_after_tensor1)

# Create another tensor
print("\n Creating 500x500 float32 tensor...")
tensor2 = infinicore.empty((500, 500), dtype=infinicore.float32)
stats_after_tensor2 = get_memory_summary()
print_memory_summary("After creating tensor2", stats_after_tensor2)

# Create a third tensor
print("\n Creating 2000x2000 float32 tensor...")
tensor3 = infinicore.empty((2000, 2000), dtype=infinicore.float32)
stats_after_tensor3 = get_memory_summary()
print_memory_summary("After creating tensor3", stats_after_tensor3)

# Delete some tensors
print("\n2. Deleting tensors...")
del tensor1
stats_after_del1 = get_memory_summary()
print_memory_summary("After deleting tensor1", stats_after_del1)

del tensor2
stats_after_del2 = get_memory_summary()
print_memory_summary("After deleting tensor2", stats_after_del2)

# Final cleanup
print("\n3. Final cleanup...")
del tensor3
final_stats = get_memory_summary()
print_memory_summary("Final Memory State", final_stats)

# Show memory difference
if initial_stats and final_stats:
print(f"\nMemory Usage Summary:")
print(f" Net allocations: {final_stats['allocations'] - initial_stats['allocations']}")
print(f" Net allocated bytes: {final_stats['allocated_bytes'] - initial_stats['allocated_bytes']:,} bytes")
print(f" Net active blocks: {final_stats['active_blocks'] - initial_stats['active_blocks']}")

print("\n✓ Memory monitoring completed successfully!")

except Exception as e:
print(f"✗ Error during memory monitoring: {e}")
import traceback
traceback.print_exc()

def demonstrate_stat_types():
"""Demonstrate different stat types and their usage."""
print("\n=== Stat Types Demonstration ===\n")

try:
# Get device stats
device_stats = infinicore.get_device_memory_stats()

print("StatType.AGGREGATE statistics:")
print(f" Allocation count: {device_stats.allocation[0].current}")
print(f" Allocation peak: {device_stats.allocation[0].peak}")
print(f" Allocation total: {device_stats.allocation[0].allocated}")
print(f" Allocation freed: {device_stats.allocation[0].freed}")

print(f"\nStatType.SMALL_POOL statistics:")
print(f" Allocation count: {device_stats.allocation[1].current}")
print(f" Allocation peak: {device_stats.allocation[1].peak}")

print(f"\nStatType.LARGE_POOL statistics:")
print(f" Allocation count: {device_stats.allocation[2].current}")
print(f" Allocation peak: {device_stats.allocation[2].peak}")

print("\n✓ Stat types demonstration completed!")

except Exception as e:
print(f"✗ Error during stat types demonstration: {e}")
import traceback
traceback.print_exc()

if __name__ == "__main__":
monitor_memory_usage()
demonstrate_stat_types()
6 changes: 3 additions & 3 deletions include/infinicore/context/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ infiniopHandle_t getInfiniopHandle();
void syncStream();
void syncDevice();

std::shared_ptr<Memory> allocateMemory(size_t size);
std::shared_ptr<Memory> allocateHostMemory(size_t size);
std::shared_ptr<Memory> allocatePinnedHostMemory(size_t size);
std::shared_ptr<MemoryBlock> allocateMemory(size_t size);
std::shared_ptr<MemoryBlock> allocateHostMemory(size_t size);
std::shared_ptr<MemoryBlock> allocatePinnedHostMemory(size_t size);

void memcpyH2D(void *dst, const void *src, size_t size);
void memcpyD2H(void *dst, const void *src, size_t size);
Expand Down
31 changes: 3 additions & 28 deletions include/infinicore/memory.hpp
Original file line number Diff line number Diff line change
@@ -1,30 +1,5 @@
#pragma once

#include "device.hpp"

#include <cstddef>
#include <functional>

namespace infinicore {

class Memory {
public:
using Deleter = std::function<void(std::byte *)>;

Memory(std::byte *data, size_t size, Device device, Deleter deleter, bool pin_memory = false);
~Memory();

std::byte *data();
Device device() const;
size_t size() const;
bool is_pinned() const;

private:
std::byte *data_;
size_t size_;
Device device_;
Deleter deleter_;
bool is_pinned_;
};

} // namespace infinicore
#include "memory/memory_block.hpp"
#include "memory/memory_pool.hpp"
#include "memory/memory_segment.hpp"
39 changes: 39 additions & 0 deletions include/infinicore/memory/memory_block.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#pragma once

#include "../device.hpp"

#include <cstddef>
#include <functional>
#include <memory>

namespace infinicore {

class MemoryBlock {
public:
using Deleter = std::function<void(std::byte *)>;

MemoryBlock(std::byte *data, size_t size, Device device, Deleter deleter, bool pin_memory = false);
~MemoryBlock();

// Copy constructor and copy assignment with reference counting
MemoryBlock(const MemoryBlock& other);
MemoryBlock& operator=(const MemoryBlock& other);

// Move constructor and move assignment
MemoryBlock(MemoryBlock&& other) noexcept;
MemoryBlock& operator=(MemoryBlock&& other) noexcept;

std::byte *data() const;
Device device() const;
size_t size() const;
bool is_pinned() const;

private:
std::byte *data_;
size_t size_;
Device device_;
Deleter deleter_;
bool is_pinned_;
};

} // namespace infinicore
52 changes: 52 additions & 0 deletions include/infinicore/memory/memory_pool.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#pragma once

#include <memory>
#include <unordered_map>
#include <mutex>
#include <atomic>
#include <cstddef>
#include <functional>

namespace infinicore {

struct MemoryInfo {
std::byte* ptr;
size_t size;
std::atomic<int> ref_count;
bool is_freed;

MemoryInfo(std::byte* p, size_t s)
: ptr(p), size(s), ref_count(1), is_freed(false) {}
};

class MemoryPool {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

include目录下都是会暴露给外部的接口,这个设计是要把memory pool的接口暴露给外部吗?什么情况下用户或者上层框架需要直接和memory pool交互?

public:
static MemoryPool& instance();

// Register a memory allocation
void registerMemory(std::byte* ptr, size_t size);

// Increment reference count
void addRef(std::byte* ptr);

// Decrement reference count and potentially free memory
void releaseMemory(std::byte* ptr, std::function<void(std::byte*)> actual_deleter);

// Get reference count
int getRefCount(std::byte* ptr) const;

// Check if memory is registered
bool isRegistered(std::byte* ptr) const;

// Check if memory is already freed
bool isFreed(std::byte* ptr) const;

private:
MemoryPool() = default;
~MemoryPool() = default;

mutable std::mutex mutex_;
std::unordered_map<std::byte*, std::shared_ptr<MemoryInfo>> memory_map_;
};

} // namespace infinicore
Empty file.
2 changes: 1 addition & 1 deletion include/infinicore/tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ struct TensorMetaData {

struct TensorData {
size_t offset;
std::shared_ptr<Memory> memory;
std::shared_ptr<MemoryBlock> memory;
};

struct TensorSliceParams {
Expand Down
Loading
Loading