Skip to content
This repository was archived by the owner on Jun 5, 2023. It is now read-only.

td-agent take 2 minutes to stop if influxDB ip is incorrect #281

@bhakta0007

Description

@bhakta0007
root@bhakta-edge1:/lib/systemd/system# dpkg -l | grep td-
ii  td-agent                              3.8.1-0                            amd64        Treasure Agent: A data collector for Treasure Data

service td-agent stop or service td-agent restart on ubuntu take 2 minutes for service to stop when I have incorrect IP address for influx DB host.

<match app.agent>
  @type influxdb
  host  192.168.31.31
  port  8086
  dbname at_logs
  user  fluentd
  password  maskedPassword@123
  measurement my_measurement
  use_ssl false
  time_precision ns
  tag_keys ["levelname", "host"]
  sequence_tag _seq
  <buffer>
    @type memory
    chunk_limit_records 1024
    flush_interval 30
    retry_wait 1.0
  </buffer>
</match>

strace of the process shows it's waiting to connect (tried this right after starting the td-agent with the above invalid influxdb server config).

write(2, "W, [2021-04-11T11:10:04.063194 #"..., 253) = 253
futex(0x7f28ea84b0d8, FUTEX_WAIT_PRIVATE, 0, {tv_sec=29, tv_nsec=999998676}) = -1 ETIMEDOUT (Connection timed out)
futex(0x7f28ea84b128, FUTEX_WAKE_PRIVATE, 1) = 0
clone(child_stack=0x7f28e8973f70, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f28e89749d0, tls=0x7f28e8974700, child_tidptr=0x7f28e89749d0) = 797
socket(AF_INET, SOCK_STREAM|SOCK_CLOEXEC, IPPROTO_TCP) = 10
futex(0x7f28ea84f068, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f28ea84f010, FUTEX_WAKE_PRIVATE, 1) = 1
getpid()                                = 673
getpid()                                = 673
write(6, "!", 1)                        = 1
futex(0x7f28ea84f010, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f28ea84f010, FUTEX_WAKE_PRIVATE, 1) = 0
connect(10, {sa_family=AF_INET, sin_port=htons(8086), sin_addr=inet_addr("192.168.31.31")}, 16) = -1 ECONNREFUSED (Connection refused)
close(10)                               = 0
futex(0x7f28e39f9cd8, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f28e39f9d28, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f28ea84f068, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f28ea84f010, FUTEX_WAKE_PRIVATE, 1) = 1
getpid()                                = 673
write(2, "W, [2021-04-11T11:10:34.068623 #"..., 253) = 253
futex(0x7f28ea84b0d8, FUTEX_WAIT_PRIVATE, 0, {tv_sec=29, tv_nsec=999998803}) = ? ERESTART_RESTARTBLOCK (Interrupted by signal)
--- SIGTERM {si_signo=SIGTERM, si_code=SI_USER, si_pid=660, si_uid=113} ---
getpid()                                = 673
getpid()                                = 673
write(4, "!", 1)                        = 1
rt_sigreturn({mask=[]})                 = -1 EINTR (Interrupted system call)
futex(0x7f28ea84b128, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f28ea84b0dc, FUTEX_WAIT_PRIVATE, 0, {tv_sec=29, tv_nsec=200914940}) = -1 ETIMEDOUT (Connection timed out)
futex(0x7f28ea84b128, FUTEX_WAKE_PRIVATE, 1) = 0
clone(child_stack=0x7f28e8973f70, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f28e89749d0, tls=0x7f28e8974700, child_tidptr=0x7f28e89749d0) = 820
socket(AF_INET, SOCK_STREAM|SOCK_CLOEXEC, IPPROTO_TCP) = 10
futex(0x7f28ea84f06c, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f28ea84f010, FUTEX_WAKE_PRIVATE, 1) = 1
getpid()                                = 673
getpid()                                = 673
write(6, "!", 1)                        = 1
futex(0x7f28ea84f010, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f28ea84f010, FUTEX_WAKE_PRIVATE, 1) = 0
connect(10, {sa_family=AF_INET, sin_port=htons(8086), sin_addr=inet_addr("192.168.31.31")}, 16) = -1 ECONNREFUSED (Connection refused)
close(10)                               = 0
futex(0x7f28e38760d8, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f28e3876128, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f28ea84f06c, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f28ea84f010, FUTEX_WAKE_PRIVATE, 1) = 1
getpid()                                = 673
write(2, "W, [2021-04-11T11:11:04.073785 #"..., 253) = 253
futex(0x7f28ea84b0dc, FUTEX_WAIT_PRIVATE, 0, {tv_sec=29, tv_nsec=999998510}) = ?
+++ killed by SIGKILL +++

And then finally its blown up by sigkill due to the

TimeoutStopSec=120
that is present in the /lib/systemd/system/td-agent.service

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions