pingyu
diff --git a/‎internal/client/client.go‎
Lines changed: 47 additions & 347 deletions b/‎internal/client/client.go‎
Lines changed: 47 additions & 347 deletions
diff --git a/‎internal/client/client_async.go‎
Lines changed: 6 additions & 6 deletions b/‎internal/client/client_async.go‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎internal/client/client_batch.go‎
Lines changed: 0 additions & 310 deletions b/‎internal/client/client_batch.go‎
Lines changed: 0 additions & 310 deletions
@@ -72,10 +72,10 @@ func (c *RPCClient) SendRequestAsync(ctx context.Context, addr string, req *tikv
 	}
 	tikvrpc.AttachContext(req, req.Context)
 
-	// TODO(zyguan): If the client created `WithGRPCDialOptions(grpc.WithBlock())`, `getConnArray` might be blocked for
+	// TODO(zyguan): If the client created `WithGRPCDialOptions(grpc.WithBlock())`, `getConnPool` might be blocked for
 	// a while when the corresponding conn array is uninitialized. However, since tidb won't set this option, we just
-	// keep `getConnArray` synchronous for now.
-	connArray, err := c.getConnArray(addr, true)
+	// keep `getConnPool` synchronous for now.
+	connPool, err := c.getConnPool(addr, true)
 	if err != nil {
 		cb.Invoke(nil, err)
 		return
@@ -113,7 +113,7 @@ func (c *RPCClient) SendRequestAsync(ctx context.Context, addr string, req *tikv
 		metrics.BatchRequestDurationDone.Observe(elapsed.Seconds())
 
 		// rpc metrics
-		connArray.updateRPCMetrics(req, resp, elapsed)
+		connPool.updateRPCMetrics(req, resp, elapsed)
 
 		// tracing
 		if spanRPC != nil {
@@ -131,15 +131,15 @@ func (c *RPCClient) SendRequestAsync(ctx context.Context, addr string, req *tikv
 			resp, err = c.option.codec.DecodeResponse(req, resp)
 		}
 
-		return resp, WrapErrConn(err, connArray)
+		return resp, WrapErrConn(err, connPool)
 	})
 
 	stop = context.AfterFunc(ctx, func() {
 		logutil.Logger(ctx).Debug("async send request cancelled (context done)", zap.String("to", addr), zap.Error(ctx.Err()))
 		entry.error(ctx.Err())
 	})
 
-	batchConn := connArray.batchConn
+	batchConn := connPool.batchConn
 	if val, err := util.EvalFailpoint("mockBatchCommandsChannelFullOnAsyncSend"); err == nil {
 		mockBatchCommandsChannelFullOnAsyncSend(ctx, batchConn, cb, val)
 	}
 
@@ -40,7 +40,6 @@ import (
 	"encoding/json"
 	"fmt"
 	"math"
-	"runtime"
 	"runtime/trace"
 	"strings"
 	"sync"
@@ -264,159 +263,6 @@ type batchConnMetrics struct {
 	bestBatchSize prometheus.Observer
 }
 
-type batchConn struct {
-	// An atomic flag indicates whether the batch is idle or not.
-	// 0 for busy, others for idle.
-	idle uint32
-
-	// batchCommandsCh used for batch commands.
-	batchCommandsCh        chan *batchCommandsEntry
-	batchCommandsClients   []*batchCommandsClient
-	tikvTransportLayerLoad uint64
-	closed                 chan struct{}
-
-	reqBuilder *batchCommandsBuilder
-
-	// Notify rpcClient to check the idle flag
-	idleNotify *uint32
-	idleDetect *time.Timer
-
-	fetchMoreTimer *time.Timer
-
-	index uint32
-
-	metrics batchConnMetrics
-}
-
-func newBatchConn(connCount, maxBatchSize uint, idleNotify *uint32) *batchConn {
-	return &batchConn{
-		batchCommandsCh:        make(chan *batchCommandsEntry, maxBatchSize),
-		batchCommandsClients:   make([]*batchCommandsClient, 0, connCount),
-		tikvTransportLayerLoad: 0,
-		closed:                 make(chan struct{}),
-		reqBuilder:             newBatchCommandsBuilder(maxBatchSize),
-		idleNotify:             idleNotify,
-		idleDetect:             time.NewTimer(idleTimeout),
-	}
-}
-
-func (a *batchConn) initMetrics(target string) {
-	a.metrics.pendingRequests = metrics.TiKVBatchPendingRequests.WithLabelValues(target)
-	a.metrics.batchSize = metrics.TiKVBatchRequests.WithLabelValues(target)
-	a.metrics.sendLoopWaitHeadDur = metrics.TiKVBatchSendLoopDuration.WithLabelValues(target, "wait-head")
-	a.metrics.sendLoopWaitMoreDur = metrics.TiKVBatchSendLoopDuration.WithLabelValues(target, "wait-more")
-	a.metrics.sendLoopSendDur = metrics.TiKVBatchSendLoopDuration.WithLabelValues(target, "send")
-	a.metrics.recvLoopRecvDur = metrics.TiKVBatchRecvLoopDuration.WithLabelValues(target, "recv")
-	a.metrics.recvLoopProcessDur = metrics.TiKVBatchRecvLoopDuration.WithLabelValues(target, "process")
-	a.metrics.batchSendTailLat = metrics.TiKVBatchSendTailLatency.WithLabelValues(target)
-	a.metrics.batchRecvTailLat = metrics.TiKVBatchRecvTailLatency.WithLabelValues(target)
-	a.metrics.headArrivalInterval = metrics.TiKVBatchHeadArrivalInterval.WithLabelValues(target)
-	a.metrics.batchMoreRequests = metrics.TiKVBatchMoreRequests.WithLabelValues(target)
-	a.metrics.bestBatchSize = metrics.TiKVBatchBestSize.WithLabelValues(target)
-}
-
-func (a *batchConn) isIdle() bool {
-	return atomic.LoadUint32(&a.idle) != 0
-}
-
-// fetchAllPendingRequests fetches all pending requests from the channel.
-func (a *batchConn) fetchAllPendingRequests(maxBatchSize int) (headRecvTime time.Time, headArrivalInterval time.Duration) {
-	// Block on the first element.
-	latestReqStartTime := a.reqBuilder.latestReqStartTime
-	var headEntry *batchCommandsEntry
-	select {
-	case headEntry = <-a.batchCommandsCh:
-		if !a.idleDetect.Stop() {
-			<-a.idleDetect.C
-		}
-		a.idleDetect.Reset(idleTimeout)
-	case <-a.idleDetect.C:
-		a.idleDetect.Reset(idleTimeout)
-		atomic.AddUint32(&a.idle, 1)
-		atomic.CompareAndSwapUint32(a.idleNotify, 0, 1)
-		// This batchConn to be recycled
-		return time.Now(), 0
-	case <-a.closed:
-		return time.Now(), 0
-	}
-	if headEntry == nil {
-		return time.Now(), 0
-	}
-	headRecvTime = time.Now()
-	if headEntry.start.After(latestReqStartTime) && !latestReqStartTime.IsZero() {
-		headArrivalInterval = headEntry.start.Sub(latestReqStartTime)
-	}
-	a.reqBuilder.push(headEntry)
-
-	// This loop is for trying best to collect more requests.
-	for a.reqBuilder.len() < maxBatchSize {
-		select {
-		case entry := <-a.batchCommandsCh:
-			if entry == nil {
-				return
-			}
-			a.reqBuilder.push(entry)
-		default:
-			return
-		}
-	}
-	return
-}
-
-// fetchMorePendingRequests fetches more pending requests from the channel.
-func (a *batchConn) fetchMorePendingRequests(
-	maxBatchSize int,
-	batchWaitSize int,
-	maxWaitTime time.Duration,
-) {
-	// Try to collect `batchWaitSize` requests, or wait `maxWaitTime`.
-	if a.fetchMoreTimer == nil {
-		a.fetchMoreTimer = time.NewTimer(maxWaitTime)
-	} else {
-		a.fetchMoreTimer.Reset(maxWaitTime)
-	}
-	for a.reqBuilder.len() < batchWaitSize {
-		select {
-		case entry := <-a.batchCommandsCh:
-			if entry == nil {
-				if !a.fetchMoreTimer.Stop() {
-					<-a.fetchMoreTimer.C
-				}
-				return
-			}
-			a.reqBuilder.push(entry)
-		case <-a.fetchMoreTimer.C:
-			return
-		}
-	}
-	if !a.fetchMoreTimer.Stop() {
-		<-a.fetchMoreTimer.C
-	}
-
-	// Do an additional non-block try. Here we test the length with `maxBatchSize` instead
-	// of `batchWaitSize` because trying best to fetch more requests is necessary so that
-	// we can adjust the `batchWaitSize` dynamically.
-	yielded := false
-	for a.reqBuilder.len() < maxBatchSize {
-		select {
-		case entry := <-a.batchCommandsCh:
-			if entry == nil {
-				return
-			}
-			a.reqBuilder.push(entry)
-		default:
-			if yielded {
-				return
-			}
-			// yield once to batch more requests.
-			runtime.Gosched()
-			yielded = true
-		}
-	}
-}
-
-const idleTimeout = 3 * time.Minute
-
 var (
 	// presetBatchPolicies defines a set of [turboBatchOptions] as batch policies.
 	presetBatchPolicies = map[string]turboBatchOptions{
@@ -534,150 +380,6 @@ func (t *turboBatchTrigger) preferredBatchWaitSize(avgBatchWaitSize float64, def
 	return batchWaitSize
 }
 
-// BatchSendLoopPanicCounter is only used for testing.
-var BatchSendLoopPanicCounter int64 = 0
-
-var initBatchPolicyWarn sync.Once
-
-func (a *batchConn) batchSendLoop(cfg config.TiKVClient) {
-	defer func() {
-		if r := recover(); r != nil {
-			metrics.TiKVPanicCounter.WithLabelValues(metrics.LabelBatchSendLoop).Inc()
-			logutil.BgLogger().Error("batchSendLoop",
-				zap.Any("r", r),
-				zap.Stack("stack"))
-			atomic.AddInt64(&BatchSendLoopPanicCounter, 1)
-			logutil.BgLogger().Info("restart batchSendLoop", zap.Int64("count", atomic.LoadInt64(&BatchSendLoopPanicCounter)))
-			go a.batchSendLoop(cfg)
-		}
-	}()
-
-	trigger, ok := newTurboBatchTriggerFromPolicy(cfg.BatchPolicy)
-	if !ok {
-		initBatchPolicyWarn.Do(func() {
-			logutil.BgLogger().Warn("fallback to default batch policy due to invalid value", zap.String("value", cfg.BatchPolicy))
-		})
-	}
-	turboBatchWaitTime := trigger.turboWaitTime()
-
-	avgBatchWaitSize := float64(cfg.BatchWaitSize)
-	for {
-		sendLoopStartTime := time.Now()
-		a.reqBuilder.reset()
-
-		headRecvTime, headArrivalInterval := a.fetchAllPendingRequests(int(cfg.MaxBatchSize))
-		if a.reqBuilder.len() == 0 {
-			// the conn is closed or recycled.
-			return
-		}
-
-		// curl -X PUT -d 'return(true)' http://0.0.0.0:10080/fail/tikvclient/mockBlockOnBatchClient
-		if val, err := util.EvalFailpoint("mockBlockOnBatchClient"); err == nil {
-			if val.(bool) {
-				time.Sleep(1 * time.Hour)
-			}
-		}
-
-		if batchSize := a.reqBuilder.len(); batchSize < int(cfg.MaxBatchSize) {
-			if cfg.MaxBatchWaitTime > 0 && atomic.LoadUint64(&a.tikvTransportLayerLoad) > uint64(cfg.OverloadThreshold) {
-				// If the target TiKV is overload, wait a while to collect more requests.
-				metrics.TiKVBatchWaitOverLoad.Inc()
-				a.fetchMorePendingRequests(int(cfg.MaxBatchSize), int(cfg.BatchWaitSize), cfg.MaxBatchWaitTime)
-			} else if turboBatchWaitTime > 0 && headArrivalInterval > 0 && trigger.needFetchMore(headArrivalInterval) {
-				batchWaitSize := trigger.preferredBatchWaitSize(avgBatchWaitSize, int(cfg.BatchWaitSize))
-				a.fetchMorePendingRequests(int(cfg.MaxBatchSize), batchWaitSize, turboBatchWaitTime)
-				a.metrics.batchMoreRequests.Observe(float64(a.reqBuilder.len() - batchSize))
-			}
-		}
-		length := a.reqBuilder.len()
-		avgBatchWaitSize = 0.2*float64(length) + 0.8*avgBatchWaitSize
-		a.metrics.pendingRequests.Observe(float64(len(a.batchCommandsCh) + length))
-		a.metrics.bestBatchSize.Observe(avgBatchWaitSize)
-		a.metrics.headArrivalInterval.Observe(headArrivalInterval.Seconds())
-		a.metrics.sendLoopWaitHeadDur.Observe(headRecvTime.Sub(sendLoopStartTime).Seconds())
-		a.metrics.sendLoopWaitMoreDur.Observe(time.Since(sendLoopStartTime).Seconds())
-
-		a.getClientAndSend()
-
-		sendLoopEndTime := time.Now()
-		a.metrics.sendLoopSendDur.Observe(sendLoopEndTime.Sub(sendLoopStartTime).Seconds())
-		if dur := sendLoopEndTime.Sub(headRecvTime); dur > batchSendTailLatThreshold {
-			a.metrics.batchSendTailLat.Observe(dur.Seconds())
-		}
-	}
-}
-
-const (
-	SendFailedReasonNoAvailableLimit   = "concurrency limit exceeded"
-	SendFailedReasonTryLockForSendFail = "tryLockForSend fail"
-)
-
-func (a *batchConn) getClientAndSend() {
-	if val, err := util.EvalFailpoint("mockBatchClientSendDelay"); err == nil {
-		if timeout, ok := val.(int); ok && timeout > 0 {
-			time.Sleep(time.Duration(timeout * int(time.Millisecond)))
-		}
-	}
-
-	// Choose a connection by round-robbin.
-	var (
-		cli    *batchCommandsClient
-		target string
-	)
-	reasons := make([]string, 0)
-	hasHighPriorityTask := a.reqBuilder.hasHighPriorityTask()
-	for i := 0; i < len(a.batchCommandsClients); i++ {
-		a.index = (a.index + 1) % uint32(len(a.batchCommandsClients))
-		target = a.batchCommandsClients[a.index].target
-		// The lock protects the batchCommandsClient from been closed while it's in use.
-		c := a.batchCommandsClients[a.index]
-		if hasHighPriorityTask || c.available() > 0 {
-			if c.tryLockForSend() {
-				cli = c
-				break
-			} else {
-				reasons = append(reasons, SendFailedReasonTryLockForSendFail)
-			}
-		} else {
-			reasons = append(reasons, SendFailedReasonNoAvailableLimit)
-		}
-	}
-	if cli == nil {
-		logutil.BgLogger().Info("no available connections", zap.String("target", target), zap.Any("reasons", reasons))
-		metrics.TiKVNoAvailableConnectionCounter.Inc()
-		if config.GetGlobalConfig().TiKVClient.MaxConcurrencyRequestLimit == config.DefMaxConcurrencyRequestLimit {
-			// Only cancel requests when MaxConcurrencyRequestLimit feature is not enabled, to be compatible with the behavior of older versions.
-			// TODO: But when MaxConcurrencyRequestLimit feature is enabled, the requests won't be canceled and will wait until timeout.
-			// This behavior may not be reasonable, as the timeout is usually 40s or 60s, which is too long to retry in time.
-			a.reqBuilder.cancel(errors.New("no available connections"))
-		}
-		return
-	}
-	defer cli.unlockForSend()
-	available := cli.available()
-	reqSendTime := time.Now()
-	batch := 0
-	req, forwardingReqs := a.reqBuilder.buildWithLimit(available, func(id uint64, e *batchCommandsEntry) {
-		cli.batched.Store(id, e)
-		cli.sent.Add(1)
-		atomic.StoreInt64(&e.sendLat, int64(reqSendTime.Sub(e.start)))
-		if trace.IsEnabled() {
-			trace.Log(e.ctx, "rpc", "send")
-		}
-	})
-	if req != nil {
-		batch += len(req.RequestIds)
-		cli.send("", req)
-	}
-	for forwardedHost, req := range forwardingReqs {
-		batch += len(req.RequestIds)
-		cli.send(forwardedHost, req)
-	}
-	if batch > 0 {
-		a.metrics.batchSize.Observe(float64(batch))
-	}
-}
-
 type tryLock struct {
 	*sync.Cond
 	reCreating bool
@@ -1127,18 +829,6 @@ func (c *batchCommandsClient) initBatchClient(forwardedHost string) error {
 	return nil
 }
 
-func (a *batchConn) Close() {
-	// Close all batchRecvLoop.
-	for _, c := range a.batchCommandsClients {
-		// After connections are closed, `batchRecvLoop`s will check the flag.
-		atomic.StoreInt32(&c.closed, 1)
-	}
-	// Don't close(batchCommandsCh) because when Close() is called, someone maybe
-	// calling SendRequest and writing batchCommandsCh, if we close it here the
-	// writing goroutine will panic.
-	close(a.closed)
-}
-
 func sendBatchRequest(
 	ctx context.Context,
 	addr string,