diff --git a/apisix/balancer.lua b/apisix/balancer.lua index ffb797307cd3..50a0a01d9d86 100644 --- a/apisix/balancer.lua +++ b/apisix/balancer.lua @@ -203,11 +203,21 @@ local function pick_server(route, ctx) local nodes_count = #up_conf.nodes if nodes_count == 1 then - local node = up_conf.nodes[1] - ctx.balancer_ip = node.host - ctx.balancer_port = node.port - node.upstream_host = parse_server_for_upstream_host(node, ctx.upstream_scheme) - return node + -- For least_conn balancer, we still need to use the balancer even with single node + -- to track connection counts for future load balancing decisions + if up_conf.type == "least_conn" then + core.log.debug( + "single node with least_conn balancer", + "still using balancer for connection tracking" + ) + else + core.log.info("single node with ", up_conf.type, " balancer - skipping balancer") + local node = up_conf.nodes[1] + ctx.balancer_ip = node.host + ctx.balancer_port = node.port + node.upstream_host = parse_server_for_upstream_host(node, ctx.upstream_scheme) + return node + end end local version = ctx.upstream_version diff --git a/apisix/balancer/least_conn.lua b/apisix/balancer/least_conn.lua index 8923d1781d00..16d1b440f7f6 100644 --- a/apisix/balancer/least_conn.lua +++ b/apisix/balancer/least_conn.lua @@ -17,33 +17,146 @@ local core = require("apisix.core") local binaryHeap = require("binaryheap") +local dkjson = require("dkjson") local ipairs = ipairs local pairs = pairs - +local ngx = ngx +local ngx_shared = ngx.shared +local tostring = tostring local _M = {} +-- Shared dictionary to store connection counts across balancer recreations +local CONN_COUNT_DICT_NAME = "balancer-least-conn" +local conn_count_dict local function least_score(a, b) return a.score < b.score end +-- Get the connection count key for a specific upstream and server +local function get_conn_count_key(upstream, server) + local upstream_id = upstream.id + if not upstream_id then + -- Fallback to a hash of the upstream configuration using stable encoding + upstream_id = ngx.crc32_short(dkjson.encode(upstream)) + core.log.debug("generated upstream_id from hash: ", upstream_id) + end + local key = "conn_count:" .. tostring(upstream_id) .. ":" .. server + core.log.debug("generated connection count key: ", key) + return key +end + +-- Get the current connection count for a server from shared dict +local function get_server_conn_count(upstream, server) + local key = get_conn_count_key(upstream, server) + local count, err = conn_count_dict:get(key) + if err then + core.log.error("failed to get connection count for ", server, ": ", err) + return 0 + end + local result = count or 0 + core.log.debug("retrieved connection count for server ", server, ": ", result) + return result +end + +-- Increment the connection count for a server +local function incr_server_conn_count(upstream, server, delta) + local key = get_conn_count_key(upstream, server) + local new_count, err = conn_count_dict:incr(key, delta or 1, 0) + if not new_count then + core.log.error("failed to increment connection count for ", server, ": ", err) + return 0 + end + core.log.debug("incremented connection count for server ", server, " by ", delta or 1, + ", new count: ", new_count) + return new_count +end + +-- Clean up connection counts for servers that are no longer in the upstream +local function cleanup_stale_conn_counts(upstream, current_servers) + local upstream_id = upstream.id + if not upstream_id then + upstream_id = ngx.crc32_short(dkjson.encode(upstream)) + end + + local prefix = "conn_count:" .. tostring(upstream_id) .. ":" + core.log.debug("cleaning up stale connection counts with prefix: ", prefix) + local keys, err = conn_count_dict:get_keys(0) -- Get all keys + if err then + core.log.error("failed to get keys from shared dict: ", err) + return + end + + for _, key in ipairs(keys or {}) do + if core.string.has_prefix(key, prefix) then + local server = key:sub(#prefix + 1) + if not current_servers[server] then + -- This server is no longer in the upstream, clean it up + local ok, delete_err = conn_count_dict:delete(key) + if not ok and delete_err then + core.log.error("failed to delete stale connection count for server ", + server, ": ", delete_err) + else + core.log.info("cleaned up stale connection count for server: ", server) + end + end + end + end +end function _M.new(up_nodes, upstream) + if not conn_count_dict then + conn_count_dict = ngx_shared[CONN_COUNT_DICT_NAME] + end + + -- Enable persistent counting only for WebSocket or when explicitly requested + -- This maintains backward compatibility with existing behavior + local use_persistent_counting = conn_count_dict ~= nil and + (upstream.scheme == "websocket" or upstream.persistent_conn_counting == true) + + if not use_persistent_counting and conn_count_dict then + core.log.debug("shared dict available but persistent counting not enabled for scheme: ", + upstream.scheme or "http", ", using traditional least_conn mode") + elseif use_persistent_counting and not conn_count_dict then + core.log.warn("persistent counting requested but shared dict '", + CONN_COUNT_DICT_NAME, "' not found, using traditional least_conn mode") + use_persistent_counting = false + end + local servers_heap = binaryHeap.minUnique(least_score) + + if use_persistent_counting then + -- Clean up stale connection counts for removed servers + cleanup_stale_conn_counts(upstream, up_nodes) + end + for server, weight in pairs(up_nodes) do - local score = 1 / weight + local score + if use_persistent_counting then + -- True least connection mode: use persistent connection counts + local conn_count = get_server_conn_count(upstream, server) + score = (conn_count + 1) / weight + core.log.debug("initializing server ", server, " with persistent counting", + " | weight: ", weight, " | conn_count: ", conn_count, " | score: ", score) + else + -- Traditional mode: use original weighted round-robin behavior + score = 1 / weight + end + -- Note: the argument order of insert is different from others servers_heap:insert({ server = server, - effect_weight = 1 / weight, + weight = weight, + effect_weight = 1 / weight, -- For backward compatibility score = score, + use_persistent_counting = use_persistent_counting, }, server) end return { upstream = upstream, - get = function (ctx) + get = function(ctx) local server, info, err if ctx.balancer_tried_servers then local tried_server_list = {} @@ -75,14 +188,40 @@ function _M.new(up_nodes, upstream) return nil, err end - info.score = info.score + info.effect_weight - servers_heap:update(server, info) + if info.use_persistent_counting then + -- True least connection mode: update based on persistent connection counts + local current_conn_count = get_server_conn_count(upstream, server) + info.score = (current_conn_count + 1) / info.weight + servers_heap:update(server, info) + incr_server_conn_count(upstream, server, 1) + else + -- Traditional mode: use original weighted round-robin logic + info.score = info.score + info.effect_weight + servers_heap:update(server, info) + end return server end, - after_balance = function (ctx, before_retry) + after_balance = function(ctx, before_retry) local server = ctx.balancer_server local info = servers_heap:valueByPayload(server) - info.score = info.score - info.effect_weight + if not info then + core.log.error("server info not found for: ", server) + return + end + + if info.use_persistent_counting then + -- True least connection mode: update based on persistent connection counts + incr_server_conn_count(upstream, server, -1) + local current_conn_count = get_server_conn_count(upstream, server) + info.score = (current_conn_count + 1) / info.weight + if info.score < 0 then + -- Prevent negative scores + info.score = 0 + end + else + -- Traditional mode: use original weighted round-robin logic + info.score = info.score - info.effect_weight + end servers_heap:update(server, info) if not before_retry then @@ -100,7 +239,7 @@ function _M.new(up_nodes, upstream) ctx.balancer_tried_servers[server] = true end, - before_retry_next_priority = function (ctx) + before_retry_next_priority = function(ctx) if ctx.balancer_tried_servers then core.tablepool.release("balancer_tried_servers", ctx.balancer_tried_servers) ctx.balancer_tried_servers = nil @@ -109,5 +248,42 @@ function _M.new(up_nodes, upstream) } end +local function cleanup_all_conn_counts() + if not conn_count_dict then + conn_count_dict = ngx_shared[CONN_COUNT_DICT_NAME] + end + + if not conn_count_dict then + -- No shared dict available, nothing to cleanup + return + end + + local keys, err = conn_count_dict:get_keys(0) -- Get all keys + if err then + core.log.error("failed to get keys from shared dict during cleanup: ", err) + return + end + + local cleaned_count = 0 + for _, key in ipairs(keys or {}) do + if core.string.has_prefix(key, "conn_count:") then + local ok, delete_err = conn_count_dict:delete(key) + if not ok and delete_err then + core.log.warn("failed to delete connection count key during cleanup: ", + key, ", error: ", delete_err) + else + cleaned_count = cleaned_count + 1 + end + end + end + + if cleaned_count > 0 then + core.log.info("cleaned up ", cleaned_count, " connection count entries from shared dict") + end +end + +function _M.cleanup_all() + cleanup_all_conn_counts() +end return _M diff --git a/apisix/cli/config.lua b/apisix/cli/config.lua index 35212eea7cf1..d47511f6e458 100644 --- a/apisix/cli/config.lua +++ b/apisix/cli/config.lua @@ -105,6 +105,7 @@ local _M = { ["prometheus-cache"] = "10m", ["standalone-config"] = "10m", ["status-report"] = "1m", + ["balancer-least-conn"] = "10m", } }, stream = { @@ -121,6 +122,7 @@ local _M = { ["worker-events-stream"] = "10m", ["tars-stream"] = "1m", ["upstream-healthcheck-stream"] = "10m", + ["balancer-least-conn"] = "10m", } }, main_configuration_snippet = "", @@ -168,6 +170,7 @@ local _M = { ["balancer-ewma"] = "10m", ["balancer-ewma-locks"] = "10m", ["balancer-ewma-last-touched-at"] = "10m", + ["balancer-least-conn"] = "10m", ["plugin-limit-req-redis-cluster-slot-lock"] = "1m", ["plugin-limit-count-redis-cluster-slot-lock"] = "1m", ["plugin-limit-conn-redis-cluster-slot-lock"] = "1m", diff --git a/apisix/cli/ngx_tpl.lua b/apisix/cli/ngx_tpl.lua index 18b77fd3c590..05e28e65ddb3 100644 --- a/apisix/cli/ngx_tpl.lua +++ b/apisix/cli/ngx_tpl.lua @@ -76,6 +76,9 @@ lua { {% if status then %} lua_shared_dict status-report {* meta.lua_shared_dict["status-report"] *}; {% end %} + {% if enable_stream then %} + lua_shared_dict balancer-least-conn {* meta.lua_shared_dict["balancer-least-conn"] *}; + {% end %} lua_shared_dict nacos 10m; } @@ -290,6 +293,9 @@ http { lua_shared_dict balancer-ewma {* http.lua_shared_dict["balancer-ewma"] *}; lua_shared_dict balancer-ewma-locks {* http.lua_shared_dict["balancer-ewma-locks"] *}; lua_shared_dict balancer-ewma-last-touched-at {* http.lua_shared_dict["balancer-ewma-last-touched-at"] *}; + {% if not enable_stream then %} + lua_shared_dict balancer-least-conn {* http.lua_shared_dict["balancer-least-conn"] *}; + {% end %} lua_shared_dict etcd-cluster-health-check {* http.lua_shared_dict["etcd-cluster-health-check"] *}; # etcd health check # for discovery shared dict diff --git a/docs/en/latest/balancer-least-conn.md b/docs/en/latest/balancer-least-conn.md new file mode 100644 index 000000000000..1a17680c931d --- /dev/null +++ b/docs/en/latest/balancer-least-conn.md @@ -0,0 +1,477 @@ +--- +title: Least Connection Load Balancer +keywords: + - APISIX + - API Gateway + - Routing + - Least Connection + - Upstream +description: This document introduces the Least Connection Load Balancer (`least_conn`) in Apache APISIX, including its working principle, configuration methods, and use cases. +--- + + + +## Overview + +The `least_conn` load balancer in Apache APISIX provides two modes of operation: + +1. **Traditional Mode** (default): A weighted round-robin algorithm optimized for performance +2. **Persistent Connection Counting Mode**: True least-connection algorithm that maintains accurate connection counts across balancer recreations + +This algorithm is particularly effective for scenarios where request processing times vary significantly or when dealing with long-lived connections such as WebSocket connections, where the second mode provides significant benefits for load distribution after upstream scaling. + +## Algorithm Details + +### Core Principle + +#### Traditional Mode (Default) + +In traditional mode, the algorithm uses a weighted round-robin approach with dynamic scoring: + +- Initialize each server with score = `1 / weight` +- On connection: increment score by `1 / weight` +- On completion: decrement score by `1 / weight` + +This provides good performance for most use cases while maintaining backward compatibility. + +#### Persistent Connection Counting Mode + +When enabled, the algorithm maintains accurate connection counts for each upstream server in shared memory: + +- Tracks real connection counts across configuration reloads +- Survives upstream node scaling operations +- Provides true least-connection behavior for long-lived connections + +The algorithm uses a binary min-heap data structure to efficiently track and select servers with the lowest scores. + +### Score Calculation + +#### Traditional Mode + +```lua +-- Initialization +score = 1 / weight + +-- On connection +score = score + (1 / weight) + +-- On completion +score = score - (1 / weight) +``` + +#### Persistent Connection Counting Mode + +```lua +-- Initialization and updates +score = (connection_count + 1) / weight +``` + +Where: + +- `connection_count` - Current number of active connections to the server (persisted) +- `weight` - Server weight configuration value + +Servers with lower scores are preferred for new connections. In persistent mode, the `+1` represents the potential new connection being considered. + +### Connection State Management + +#### Traditional Mode + +- **Connection Start**: Score updated to `score + (1 / weight)` +- **Connection End**: Score updated to `score - (1 / weight)` +- **State**: Maintained only within current balancer instance +- **Heap Maintenance**: Binary heap automatically reorders servers by score + +#### Persistent Connection Counting Mode + +- **Connection Start**: Connection count incremented, score updated to `(new_count + 1) / weight` +- **Connection End**: Connection count decremented, score updated to `(new_count - 1) / weight` +- **Score Protection**: Prevents negative scores by setting minimum score to 0 +- **Heap Maintenance**: Binary heap automatically reorders servers by score + +##### Persistence Strategy + +Connection counts are stored in nginx shared dictionary with structured keys: + +``` +conn_count:{upstream_id}:{server_address} +``` + +This ensures connection state survives: + +- Upstream configuration changes +- Balancer instance recreation +- Worker process restarts +- Upstream node scaling operations +- Node additions/removals + +### Connection Tracking + +#### Persistent State Management + +The balancer uses nginx shared dictionary (`balancer-least-conn`) to maintain connection counts across: + +- Balancer instance recreations +- Upstream configuration changes +- Worker process restarts +- Node additions/removals + +#### Connection Count Keys + +Connection counts are stored using structured keys: + +``` +conn_count:{upstream_id}:{server_address} +``` + +Where: + +- `upstream_id` - Unique identifier for the upstream configuration +- `server_address` - Server address (e.g., "127.0.0.1:8080") + +#### Upstream ID Generation + +1. **Primary**: Uses `upstream.id` if available +2. **Fallback**: Generates CRC32 hash of stable JSON encoding of upstream configuration + +```lua +local upstream_id = upstream.id +if not upstream_id then + upstream_id = ngx.crc32_short(dkjson.encode(upstream)) +end +``` + +The implementation uses `dkjson.encode` instead of `core.json.encode` to ensure deterministic JSON serialization, which is crucial for generating consistent upstream IDs across different worker processes and configuration reloads. + +### Connection Lifecycle + +#### 1. Connection Establishment + +When a new request is routed: + +1. Select server with lowest score from the heap +2. Update server score to `(current_count + 1) / weight` +3. Increment connection count in shared dictionary +4. Update server position in the heap + +#### 2. Connection Completion + +When a request completes: + +1. Calculate new score as `(current_count - 1) / weight` +2. Ensure score is not negative (minimum 0) +3. Decrement connection count in shared dictionary +4. Update server position in the heap + +#### 3. Cleanup Process + +During balancer recreation: + +1. Identify current active servers +2. Remove connection counts for servers no longer in upstream +3. Preserve counts for existing servers + +### Data Structures + +#### Binary Heap + +- **Type**: Min-heap based on server scores +- **Purpose**: Efficient selection of server with lowest score +- **Operations**: O(log n) insertion, deletion, and updates + +#### Shared Dictionary + +- **Name**: `balancer-least-conn` +- **Size**: 10MB (configurable) +- **Scope**: Shared across all worker processes +- **Persistence**: Survives configuration reloads + +## Configuration + +### Automatic Setup + +The `balancer-least-conn` shared dictionary is automatically configured by APISIX with a default size of 10MB. No manual configuration is required. + +### Custom Configuration + +To customize the shared dictionary size, modify the `nginx_config.http.lua_shared_dict` section in your `conf/config.yaml`: + +```yaml +nginx_config: + http: + lua_shared_dict: + balancer-least-conn: 20m # Custom size (default: 10m) +``` + +### Upstream Configuration + +#### Traditional Mode (Default) + +```yaml +upstreams: + - id: 1 + type: least_conn + nodes: + "127.0.0.1:8080": 1 + "127.0.0.1:8081": 2 + "127.0.0.1:8082": 1 +``` + +#### Persistent Connection Counting Mode + +##### For WebSocket (Automatic) + +```yaml +upstreams: + - id: websocket_upstream + type: least_conn + scheme: websocket # Automatically enables persistent counting + nodes: + "127.0.0.1:8080": 1 + "127.0.0.1:8081": 1 + "127.0.0.1:8082": 1 +``` + +##### Manual Activation + +```yaml +upstreams: + - id: custom_upstream + type: least_conn + persistent_conn_counting: true # Explicitly enable persistent counting + nodes: + "127.0.0.1:8080": 1 + "127.0.0.1:8081": 1 + "127.0.0.1:8082": 1 +``` + +## Performance Characteristics + +### Time Complexity + +- **Server Selection**: O(1) - heap peek operation +- **Connection Update**: O(log n) - heap update operation +- **Cleanup**: O(k) where k is the number of stored keys + +### Memory Usage + +- **Per Server**: ~100 bytes (key + value + overhead) +- **Total**: Scales linearly with number of servers across all upstreams + +### Scalability + +- **Servers**: Efficiently handles hundreds of servers per upstream +- **Upstreams**: Supports multiple upstreams with isolated connection tracking +- **Requests**: Minimal per-request overhead + +## Use Cases + +### Traditional Mode + +#### Optimal Scenarios + +1. **High-throughput HTTP APIs**: Fast, short-lived connections +2. **Microservices**: Request/response patterns +3. **Standard web applications**: Regular HTTP traffic + +#### Advantages + +- Lower memory usage +- Better performance for short connections +- Simple configuration + +### Persistent Connection Counting Mode + +#### Optimal Scenarios + +1. **WebSocket Applications**: Long-lived connections benefit from accurate load distribution across scaling operations +2. **Server-Sent Events (SSE)**: Persistent streaming connections +3. **Long-polling**: Extended HTTP connections +4. **Variable Processing Times**: Requests with unpredictable duration +5. **Database Connection Pools**: Connection-oriented services + +#### Use After Node Scaling + +Particularly beneficial when: + +- Adding new upstream nodes to existing deployments +- Existing long connections remain on original nodes +- Need to balance load across all available nodes + +### Considerations + +1. **Short-lived Connections**: Traditional mode has lower overhead for very short requests +2. **Memory Usage**: Persistent mode requires shared memory for connection state +3. **Backward Compatibility**: Traditional mode maintains existing behavior + +## WebSocket Load Balancing Improvements + +### Problem Addressed + +Prior to this enhancement, when upstream nodes were scaled out (e.g., from 2 to 3 nodes), WebSocket load balancing experienced imbalanced distribution: + +- Existing WebSocket long connections remained on original nodes +- New connections were distributed across all nodes +- Result: Original nodes overloaded, new nodes underutilized + +### Solution + +The persistent connection counting mode specifically addresses this by: + +1. **Tracking Real Connections**: Maintains accurate connection counts in shared memory +2. **Surviving Scaling Events**: Connection counts persist through upstream configuration changes +3. **Balancing New Connections**: New connections automatically route to less loaded nodes +4. **Gradual Rebalancing**: As connections naturally terminate and reconnect, load evens out + +### Example Scenario + +**Before Enhancement:** + +``` +Initial: Node1(50 conn), Node2(50 conn) +After scaling to 3 nodes: Node1(50 conn), Node2(50 conn), Node3(0 conn) +New connections distributed: Node1(60 conn), Node2(60 conn), Node3(40 conn) +``` + +**With Persistent Counting:** + +``` +Initial: Node1(50 conn), Node2(50 conn) +After scaling to 3 nodes: Node1(50 conn), Node2(50 conn), Node3(0 conn) +New connections route to Node3 until balanced: Node1(50 conn), Node2(50 conn), Node3(50 conn) +``` + +## Monitoring and Debugging + +### Log Messages + +#### Debug Logs + +Enable debug logging to monitor balancer behavior: + +**Balancer Creation** + +``` +creating new least_conn balancer for upstream: upstream_123 +``` + +**Connection Count Operations** + +``` +generated connection count key: conn_count:upstream_123:127.0.0.1:8080 +retrieved connection count for 127.0.0.1:8080: 5 +setting connection count for 127.0.0.1:8080 to 6 +incrementing connection count for 127.0.0.1:8080 by 1, new count: 6 +``` + +**Server Selection** + +``` +selected server: 127.0.0.1:8080 with current score: 1.2 +after_balance for server: 127.0.0.1:8080, before_retry: false +``` + +**Cleanup Operations** + +``` +cleaning up stale connection counts for upstream: upstream_123 +cleaned up stale connection count for server: 127.0.0.1:8082 +``` + +#### Initialization + +``` +initializing server 127.0.0.1:8080 with weight 1, base_score 1, conn_count 0, final_score 1 +``` + +#### Errors + +``` +failed to set connection count for 127.0.0.1:8080: no memory +failed to increment connection count for 127.0.0.1:8080: no memory +``` + +### Shared Dictionary Monitoring + +Check shared dictionary usage: + +```lua +local dict = ngx.shared["balancer-least-conn"] +local free_space = dict:free_space() +local capacity = dict:capacity() +``` + +## Error Handling + +### Missing Shared Dictionary + +If the shared dictionary is not available (which should not happen with default configuration), the balancer will fail to initialize with: + +``` +shared dict 'balancer-least-conn' not found +``` + +### Memory Exhaustion + +When shared dictionary runs out of memory: + +- Connection count updates will fail +- Warning messages will be logged +- Balancer continues to function with potentially stale counts + +### Recovery Strategies + +1. **Increase Dictionary Size**: Allocate more memory +2. **Cleanup Frequency**: Implement periodic cleanup of stale entries +3. **Monitoring**: Set up alerts for dictionary usage + +## Best Practices + +### Configuration + +1. **Dictionary Size**: Default 10MB is sufficient for most cases (supports ~100k connections) +2. **Server Weights**: Use appropriate weights to reflect server capacity +3. **Health Checks**: Combine with health checks for robust load balancing + +### Monitoring + +1. **Connection Counts**: Monitor for unexpected accumulation +2. **Memory Usage**: Track shared dictionary utilization +3. **Performance**: Measure request distribution effectiveness + +### Troubleshooting + +1. **Uneven Distribution**: Check for connection count accumulation +2. **Memory Issues**: Monitor shared dictionary free space +3. **Configuration**: Verify shared dictionary is properly configured + +## Migration and Compatibility + +### Backward Compatibility + +- Graceful degradation when shared dictionary is unavailable +- No breaking changes to existing API +- Maintains existing behavior patterns + +### Upgrade Considerations + +1. **Configuration**: Shared dictionary is automatically configured +2. **Memory**: Default allocation should be sufficient for most use cases +3. **Testing**: Validate load distribution in staging environment diff --git a/docs/en/latest/config.json b/docs/en/latest/config.json index bee266f04d63..19b550b91b7c 100644 --- a/docs/en/latest/config.json +++ b/docs/en/latest/config.json @@ -313,6 +313,10 @@ { "type": "doc", "id": "debug-mode" + }, + { + "type": "doc", + "id": "balancer-least-conn" } ] }, diff --git a/docs/zh/latest/balancer-least-conn.md b/docs/zh/latest/balancer-least-conn.md new file mode 100644 index 000000000000..e11bfccf47ef --- /dev/null +++ b/docs/zh/latest/balancer-least-conn.md @@ -0,0 +1,476 @@ +--- +title: 最少连接负载均衡器 +keywords: + - APISIX + - API 网关 + - 路由 + - 最小连接 + - 上游 +description: 本文介绍了 Apache APISIX 中的最少连接负载均衡器(`least_conn`),包括其工作原理、配置方法和使用场景。 +--- + + + +## 概述 + +Apache APISIX 中的 `least_conn` 负载均衡器提供两种操作模式: + +1. **传统模式**(默认):性能优化的加权轮询算法 +2. **持久化连接计数模式**:真正的最少连接算法,在负载均衡器重建过程中保持准确的连接计数 + +该算法特别适用于请求处理时间差异较大的场景,或处理长连接(如 WebSocket 连接)的情况,其中第二种模式在上游扩容后为负载分布提供显著优势。 + +## 算法详情 + +### 核心原理 + +#### 传统模式(默认) + +在传统模式下,算法使用带有动态评分的加权轮询方法: + +- 初始化每个服务器的分数 = `1 / weight` +- 连接时:分数增加 `1 / weight` +- 完成时:分数减少 `1 / weight` + +这为大多数用例提供了良好的性能,同时保持向后兼容性。 + +#### 持久化连接计数模式 + +当启用时,算法在共享内存中为每个上游服务器维护准确的连接计数: + +- 跨配置重载跟踪真实连接计数 +- 在上游节点扩容操作中保持状态 +- 为长连接提供真正的最少连接行为 + +该算法使用二进制最小堆数据结构来高效跟踪和选择得分最低的服务器。 + +### 得分计算 + +#### 传统模式 + +```lua +-- 初始化 +score = 1 / weight + +-- 连接时 +score = score + (1 / weight) + +-- 完成时 +score = score - (1 / weight) +``` + +#### 持久化连接计数模式 + +```lua +-- 初始化和更新 +score = (connection_count + 1) / weight +``` + +其中: + +- `connection_count` - 服务器当前活跃连接数(持久化) +- `weight` - 服务器权重配置值 + +得分较低的服务器优先获得新连接。在持久化模式下,`+1` 代表正在考虑的潜在新连接。 + +### 连接状态管理 + +#### 传统模式 + +- **连接开始**:分数更新为 `score + (1 / weight)` +- **连接结束**:分数更新为 `score - (1 / weight)` +- **状态**:仅在当前负载均衡器实例内维护 +- **堆维护**:二进制堆自动按得分重新排序服务器 + +#### 持久化连接计数模式 + +- **连接开始**:连接计数递增,得分更新为 `(new_count + 1) / weight` +- **连接结束**:连接计数递减,得分更新为 `(new_count - 1) / weight` +- **得分保护**:通过设置最小得分为 0 防止出现负分 +- **堆维护**:二进制堆自动按得分重新排序服务器 + +##### 持久化策略 + +连接计数存储在 nginx 共享字典中,使用结构化键: + +``` +conn_count:{upstream_id}:{server_address} +``` + +这确保连接状态在以下情况下保持: + +- 上游配置变更 +- 负载均衡器实例重建 +- 工作进程重启 +- 上游节点扩容操作 + +### 连接跟踪 + +#### 持久状态管理 + +负载均衡器使用 nginx 共享字典(`balancer-least-conn`)在以下情况下维护连接计数: + +- 负载均衡器实例重建 +- 上游配置变更 +- 工作进程重启 +- 节点添加/移除 + +#### 连接计数键 + +连接计数使用结构化键存储: + +``` +conn_count:{upstream_id}:{server_address} +``` + +其中: + +- `upstream_id` - 上游配置的唯一标识符 +- `server_address` - 服务器地址(例如:"127.0.0.1:8080") + +#### 上游 ID 生成 + +1. **主要方式**:如果可用,使用 `upstream.id` +2. **备用方式**:生成上游配置稳定 JSON 编码的 CRC32 哈希 + +```lua +local upstream_id = upstream.id +if not upstream_id then + upstream_id = ngx.crc32_short(dkjson.encode(upstream)) +end +``` + +实现使用 `dkjson.encode` 而不是 `core.json.encode` 来确保确定性的 JSON 序列化,这对于在不同工作进程和配置重载之间生成一致的上游 ID 至关重要。 + +### 连接生命周期 + +#### 1. 连接建立 + +当路由新请求时: + +1. 从堆中选择得分最低的服务器 +2. 将服务器得分更新为 `(current_count + 1) / weight` +3. 在共享字典中递增连接计数 +4. 更新堆中服务器的位置 + +#### 2. 连接完成 + +当请求完成时: + +1. 计算新得分为 `(current_count - 1) / weight` +2. 保证得分不为负(最小为 0) +3. 在共享字典中递减连接计数 +4. 更新堆中服务器的位置 + +#### 3. 清理过程 + +在负载均衡器重建期间: + +1. 识别当前活跃的服务器 +2. 移除不再在上游中的服务器的连接计数 +3. 保留现有服务器的计数 + +### 数据结构 + +#### 二进制堆 + +- **类型**:基于服务器得分的最小堆 +- **目的**:高效选择得分最低的服务器 +- **操作**:O(log n) 插入、删除和更新 + +#### 共享字典 + +- **名称**:`balancer-least-conn` +- **大小**:10MB(可配置) +- **范围**:在所有工作进程间共享 +- **持久性**:在配置重载后保持 + +## 配置 + +### 自动设置 + +`balancer-least-conn` 共享字典由 APISIX 自动配置,默认大小为 10MB。无需手动配置。 + +### 自定义配置 + +要自定义共享字典大小,请修改 `conf/config.yaml` 中的 `nginx_config.http.lua_shared_dict` 部分: + +```yaml +nginx_config: + http: + lua_shared_dict: + balancer-least-conn: 20m # 自定义大小(默认:10m) +``` + +### 上游配置 + +#### 传统模式(默认) + +```yaml +upstreams: + - id: 1 + type: least_conn + nodes: + "127.0.0.1:8080": 1 + "127.0.0.1:8081": 2 + "127.0.0.1:8082": 1 +``` + +#### 持久化连接计数模式 + +##### WebSocket(自动启用) + +```yaml +upstreams: + - id: websocket_upstream + type: least_conn + scheme: websocket # 自动启用持久化计数 + nodes: + "127.0.0.1:8080": 1 + "127.0.0.1:8081": 1 + "127.0.0.1:8082": 1 +``` + +##### 手动激活 + +```yaml +upstreams: + - id: custom_upstream + type: least_conn + persistent_conn_counting: true # 显式启用持久化计数 + nodes: + "127.0.0.1:8080": 1 + "127.0.0.1:8081": 1 + "127.0.0.1:8082": 1 +``` + +## 性能特征 + +### 时间复杂度 + +- **服务器选择**:O(1) - 堆查看操作 +- **连接更新**:O(log n) - 堆更新操作 +- **清理**:O(k),其中 k 是存储键的数量 + +### 内存使用 + +- **每个服务器**:约 100 字节(键 + 值 + 开销) +- **总计**:与所有上游的服务器数量线性扩展 + +### 可扩展性 + +- **服务器**:高效处理每个上游数百个服务器 +- **上游**:支持多个上游,具有隔离的连接跟踪 +- **请求**:最小的每请求开销 + +## 使用场景 + +### 传统模式 + +#### 最佳场景 + +1. **高吞吐量 HTTP API**:快速、短连接 +2. **微服务**:请求/响应模式 +3. **标准 Web 应用**:常规 HTTP 流量 + +#### 优势 + +- 较低的内存使用 +- 短连接的更好性能 +- 简单配置 + +### 持久化连接计数模式 + +#### 最佳场景 + +1. **WebSocket 应用**:长连接在扩容操作中受益于准确的负载分布 +2. **服务器发送事件(SSE)**:持久流连接 +3. **长轮询**:扩展的 HTTP 连接 +4. **可变处理时间**:持续时间不可预测的请求 +5. **数据库连接池**:面向连接的服务 + +#### 节点扩容后的使用 + +特别适用于以下情况: + +- 向现有部署添加新的上游节点 +- 现有长连接保留在原始节点上 +- 需要在所有可用节点间平衡负载 + +### 注意事项 + +1. **短连接**:传统模式对于非常短的请求开销更低 +2. **内存使用**:持久化模式需要共享内存来存储连接状态 +3. **向后兼容性**:传统模式保持现有行为 + +## WebSocket 负载均衡改进 + +### 解决的问题 + +在此增强之前,当上游节点扩容(例如从 2 个节点扩展到 3 个节点)时,WebSocket 负载均衡会出现不平衡分布: + +- 现有 WebSocket 长连接保留在原始节点上 +- 新连接分布在所有节点上 +- 结果:原始节点过载,新节点利用不足 + +### 解决方案 + +持久化连接计数模式专门通过以下方式解决此问题: + +1. **跟踪真实连接**:在共享内存中维护准确的连接计数 +2. **在扩容事件中保持状态**:连接计数在上游配置更改中持续存在 +3. **平衡新连接**:新连接自动路由到负载较轻的节点 +4. **逐步重平衡**:随着连接自然终止和重连,负载逐渐平衡 + +### 示例场景 + +**增强前:** + +``` +初始:Node1(50连接),Node2(50连接) +扩容到3个节点后:Node1(50连接),Node2(50连接),Node3(0连接) +新连接分布:Node1(60连接),Node2(60连接),Node3(40连接) +``` + +**使用持久化计数:** + +``` +初始:Node1(50连接),Node2(50连接) +扩容到3个节点后:Node1(50连接),Node2(50连接),Node3(0连接) +新连接路由到Node3直到平衡:Node1(50连接),Node2(50连接),Node3(50连接) +``` + +## 监控和调试 + +### 日志消息 + +#### 调试日志 + +启用调试日志来监控负载均衡器行为: + +**负载均衡器创建** + +``` +creating new least_conn balancer for upstream: upstream_123 +``` + +**连接数操作** + +``` +generated connection count key: conn_count:upstream_123:127.0.0.1:8080 +retrieved connection count for 127.0.0.1:8080: 5 +setting connection count for 127.0.0.1:8080 to 6 +incrementing connection count for 127.0.0.1:8080 by 1, new count: 6 +``` + +**服务器选择** + +``` +selected server: 127.0.0.1:8080 with current score: 1.2 +after_balance for server: 127.0.0.1:8080, before_retry: false +``` + +**清理操作** + +``` +cleaning up stale connection counts for upstream: upstream_123 +cleaned up stale connection count for server: 127.0.0.1:8082 +``` + +#### 初始化 + +``` +initializing server 127.0.0.1:8080 with weight 1, base_score 1, conn_count 0, final_score 1 +``` + +#### 错误 + +``` +failed to set connection count for 127.0.0.1:8080: no memory +failed to increment connection count for 127.0.0.1:8080: no memory +``` + +### 共享字典监控 + +检查共享字典使用情况: + +```lua +local dict = ngx.shared["balancer-least-conn"] +local free_space = dict:free_space() +local capacity = dict:capacity() +``` + +## 错误处理 + +### 缺少共享字典 + +如果共享字典不可用(在默认配置下不应该发生),负载均衡器将初始化失败并显示: + +``` +shared dict 'balancer-least-conn' not found +``` + +### 内存耗尽 + +当共享字典内存不足时: + +- 连接计数更新将失败 +- 将记录警告消息 +- 负载均衡器继续运行,但可能使用过时的计数 + +### 恢复策略 + +1. **增加字典大小**:分配更多内存 +2. **清理频率**:实现过时条目的定期清理 +3. **监控**:为字典使用情况设置警报 + +## 最佳实践 + +### 配置 + +1. **字典大小**:默认 10MB 对大多数情况足够(支持约 10 万连接) +2. **服务器权重**:使用适当的权重来反映服务器容量 +3. **健康检查**:与健康检查结合使用以实现稳健的负载均衡 + +### 监控 + +1. **连接计数**:监控意外的累积 +2. **内存使用**:跟踪共享字典利用率 +3. **性能**:测量请求分布的有效性 + +### 故障排除 + +1. **不均匀分布**:检查连接计数累积 +2. **内存问题**:监控共享字典可用空间 +3. **配置**:验证共享字典是否正确配置 + +## 迁移和兼容性 + +### 向后兼容性 + +- 当共享字典不可用时优雅降级 +- 对现有 API 无破坏性更改 +- 保持现有行为模式 + +### 升级注意事项 + +1. **配置**:共享字典自动配置 +2. **内存**:默认分配对大多数用例应该足够 +3. **测试**:在测试环境中验证负载分布 diff --git a/docs/zh/latest/config.json b/docs/zh/latest/config.json index c0489e6415de..3a40d5def9a9 100644 --- a/docs/zh/latest/config.json +++ b/docs/zh/latest/config.json @@ -268,6 +268,10 @@ { "type": "doc", "id": "debug-mode" + }, + { + "type": "doc", + "id": "balancer-least-conn" } ] }, diff --git a/t/APISIX.pm b/t/APISIX.pm index 4ef30e506e71..ad2a452df4b9 100644 --- a/t/APISIX.pm +++ b/t/APISIX.pm @@ -599,6 +599,7 @@ _EOC_ lua_shared_dict balancer-ewma 1m; lua_shared_dict balancer-ewma-locks 1m; lua_shared_dict balancer-ewma-last-touched-at 1m; + lua_shared_dict balancer-least-conn 10m; lua_shared_dict plugin-limit-req-redis-cluster-slot-lock 1m; lua_shared_dict plugin-limit-count-redis-cluster-slot-lock 1m; lua_shared_dict plugin-limit-conn-redis-cluster-slot-lock 1m;