@@ -27,6 +27,7 @@ import (
2727
2828 "github.com/alecthomas/units"
2929 "github.com/go-kit/log"
30+ "github.com/go-kit/log/level"
3031 "github.com/gogo/protobuf/proto"
3132 "github.com/golang/snappy"
3233 "github.com/pkg/errors"
@@ -40,19 +41,65 @@ import (
4041 "github.com/prometheus/prometheus/storage"
4142 "github.com/prometheus/prometheus/tsdb"
4243 "github.com/stretchr/testify/require"
44+ "google.golang.org/grpc/credentials/insecure"
45+ "google.golang.org/grpc/resolver"
4346
4447 "github.com/efficientgo/core/testutil"
4548
4649 "github.com/thanos-io/thanos/pkg/block/metadata"
50+ "github.com/thanos-io/thanos/pkg/component"
4751 "github.com/thanos-io/thanos/pkg/extkingpin"
4852 "github.com/thanos-io/thanos/pkg/logging"
4953 "github.com/thanos-io/thanos/pkg/runutil"
54+ grpcserver "github.com/thanos-io/thanos/pkg/server/grpc"
55+ "github.com/thanos-io/thanos/pkg/store"
5056 "github.com/thanos-io/thanos/pkg/store/labelpb"
5157 "github.com/thanos-io/thanos/pkg/store/storepb"
5258 "github.com/thanos-io/thanos/pkg/store/storepb/prompb"
5359 "github.com/thanos-io/thanos/pkg/tenancy"
5460)
5561
62+ const dnsScheme = "dns"
63+
64+ type dnsResolver struct {
65+ logger log.Logger
66+ target resolver.Target
67+ cc resolver.ClientConn
68+ addrStore map [string ][]string
69+ }
70+
71+ func (r * dnsResolver ) start () {
72+ addrStrs := r .addrStore [r .target .Endpoint ()]
73+ addrs := make ([]resolver.Address , len (addrStrs ))
74+ for i , s := range addrStrs {
75+ addrs [i ] = resolver.Address {Addr : s }
76+ }
77+ if err := r .cc .UpdateState (resolver.State {Addresses : addrs }); err != nil {
78+ level .Error (r .logger ).Log ("msg" , "failed to update state" , "err" , err )
79+ }
80+ }
81+
82+ func (* dnsResolver ) ResolveNow (_ resolver.ResolveNowOptions ) {}
83+
84+ func (* dnsResolver ) Close () {}
85+
86+ type dnsResolverBuilder struct {
87+ logger log.Logger
88+ addrStore map [string ][]string
89+ }
90+
91+ func (b * dnsResolverBuilder ) Build (target resolver.Target , cc resolver.ClientConn , opts resolver.BuildOptions ) (resolver.Resolver , error ) {
92+ r := & dnsResolver {
93+ logger : b .logger ,
94+ target : target ,
95+ cc : cc ,
96+ addrStore : b .addrStore ,
97+ }
98+ r .start ()
99+ return r , nil
100+ }
101+ func (* dnsResolverBuilder ) Scheme () string { return dnsScheme }
102+
56103type fakeTenantAppendable struct {
57104 f * fakeAppendable
58105}
@@ -191,6 +238,10 @@ func (g *fakePeersGroup) close(addr string) error {
191238 return nil
192239}
193240
241+ func (g * fakePeersGroup ) closeAll () error {
242+ return nil
243+ }
244+
194245func (g * fakePeersGroup ) getConnection (_ context.Context , addr string ) (WriteableStoreAsyncClient , error ) {
195246 c , ok := g .clients [addr ]
196247 if ! ok {
@@ -1735,3 +1786,92 @@ func TestHandlerFlippingHashrings(t *testing.T) {
17351786 cancel ()
17361787 wg .Wait ()
17371788}
1789+
1790+ func TestIngestorRestart (t * testing.T ) {
1791+ var err error
1792+ logger := log .NewLogfmtLogger (os .Stderr )
1793+ addr1 , addr2 , addr3 := "localhost:14090" , "localhost:14091" , "localhost:14092"
1794+ ing1 , ing2 := startIngestor (logger , addr1 , 0 ), startIngestor (logger , addr2 , 0 )
1795+ defer ing1 .Shutdown (err ) // srv1 is stable and will only be closed after the test ends
1796+
1797+ clientAddr := "ingestor.com"
1798+ dnsBuilder := & dnsResolverBuilder {
1799+ logger : logger ,
1800+ addrStore : map [string ][]string {clientAddr : {addr2 }},
1801+ }
1802+ resolver .Register (dnsBuilder )
1803+ dialOpts := []grpc.DialOption {
1804+ grpc .WithIdleTimeout (1 * time .Second ), // set idle timeout to 1s will re-establish the connection quickly
1805+ grpc .WithTransportCredentials (insecure .NewCredentials ()),
1806+ grpc .WithResolvers (resolver .Get (dnsScheme )),
1807+ }
1808+ client := NewHandler (logger , & Options {
1809+ MaxBackoff : 1 * time .Second ,
1810+ DialOpts : dialOpts ,
1811+ ReplicationFactor : 2 ,
1812+ ReceiverMode : RouterOnly ,
1813+ ForwardTimeout : 15 * time .Second ,
1814+ })
1815+ // one of the endpoints is DNS and wire up to different backend address on the fly
1816+ client .Hashring (& simpleHashring {addr1 , fmt .Sprintf ("%s:///%s" , dnsScheme , clientAddr )})
1817+ defer client .Close ()
1818+
1819+ ctx := context .TODO ()
1820+ data := & prompb.WriteRequest {
1821+ Timeseries : []prompb.TimeSeries {
1822+ {
1823+ Labels : labelpb .ZLabelsFromPromLabels (labels .FromStrings ("foo" , addr3 )),
1824+ Samples : []prompb.Sample {{Timestamp : time .Now ().Unix (), Value : 123 }},
1825+ },
1826+ },
1827+ }
1828+
1829+ err = client .handleRequest (ctx , 0 , "test" , data )
1830+ require .NoError (t , err )
1831+
1832+ // close srv2 to simulate ingestor down
1833+ ing2 .Shutdown (err )
1834+ ing3 := startIngestor (logger , addr3 , 2 * time .Second )
1835+ defer ing3 .Shutdown (err )
1836+ // bind the new backend to the same DNS
1837+ dnsBuilder .addrStore [clientAddr ] = []string {addr3 }
1838+
1839+ iter , errs := 10 , 0
1840+ for i := 0 ; i < iter ; i ++ {
1841+ err = client .handleRequest (ctx , 0 , "test" , data )
1842+ if err != nil {
1843+ require .Error (t , errUnavailable , err )
1844+ errs ++
1845+ } else {
1846+ break
1847+ }
1848+ time .Sleep (500 * time .Millisecond )
1849+ }
1850+ require .Greater (t , errs , 0 , "expected to have unavailable errors initially" )
1851+ require .Less (t , errs , iter , "expected to recover quickly after server restarts" )
1852+ }
1853+
1854+ type fakeStoreServer struct {
1855+ logger log.Logger
1856+ }
1857+
1858+ func (f * fakeStoreServer ) RemoteWrite (_ context.Context , in * storepb.WriteRequest ) (* storepb.WriteResponse , error ) {
1859+ level .Debug (f .logger ).Log ("msg" , "received remote write request" , "request" , in .String ())
1860+ return & storepb.WriteResponse {}, nil
1861+ }
1862+
1863+ func startIngestor (logger log.Logger , serverAddress string , delay time.Duration ) * grpcserver.Server {
1864+ h := & fakeStoreServer {logger : logger }
1865+ srv := grpcserver .TestServer (logger , component .Receive , serverAddress ,
1866+ grpcserver .WithServer (store .RegisterWritableStoreServer (h )),
1867+ )
1868+ go func () {
1869+ if delay > 0 {
1870+ time .Sleep (delay )
1871+ }
1872+ if err := srv .ListenAndServe (); err != nil {
1873+ level .Error (logger ).Log ("msg" , "server error" , "addr" , serverAddress , "err" , err )
1874+ }
1875+ }()
1876+ return srv
1877+ }
0 commit comments