Skip to content

Deadlock in tail plugin during memory management  #9557

@uristernik

Description

@uristernik

Bug Report

Describe the bug

Fluent-bit pod deadlocks, causing CPU to rise, no further log processing happens.
I created a core dump and examined it:

Thread 6 (Thread 0xffffa83ea9c0 (LWP 23)):
#0  0x0000ffffad247c74 in __GI_epoll_pwait (epfd=69, events=0xffffa740c000, maxevents=256, timeout=-1, set=0x0) at ../sysdeps/unix/sysv/linux/epoll_pwait.c:40
#1  0x0000aaaae1331e90 in _mk_event_wait_2 (loop=0xffffa7416000, timeout=-1) at /src/fluent-bit/lib/monkey/mk_core/mk_event_epoll.c:444
#2  0x0000aaaae13322f4 in mk_event_wait (loop=0xffffa7416000) at /src/fluent-bit/lib/monkey/mk_core/mk_event.c:207
#3  0x0000aaaae132a9b0 in mk_server_worker_loop (server=0xffffaaa65700) at /src/fluent-bit/lib/monkey/mk_server/mk_server.c:506
#4  0x0000aaaae1321aa0 in mk_sched_launch_worker_loop (data=0xffffa8e00008) at /src/fluent-bit/lib/monkey/mk_server/mk_scheduler.c:417
#5  0x0000ffffad1dee30 in start_thread (arg=0xffffa9d69f77) at ./nptl/pthread_create.c:442
#6  0x0000ffffad247adc in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:79

Thread 5 (Thread 0xffffa8dfa9c0 (LWP 22)):
#0  0x0000ffffad2136b0 in __GI___clock_nanosleep (clock_id=<optimized out>, clock_id@entry=0, flags=flags@entry=0, req=req@entry=0xffffa8dfa108, rem=rem@entry=0xffffa8dfa108) at ../sysdeps/unix/sysv/linux/clock_nanosleep.c:48
#1  0x0000ffffad21861c in __GI___nanosleep (req=req@entry=0xffffa8dfa108, rem=rem@entry=0xffffa8dfa108) at ../sysdeps/unix/sysv/linux/nanosleep.c:25
#2  0x0000ffffad2184e8 in __sleep (seconds=0) at ../sysdeps/posix/sleep.c:55
#3  0x0000aaaae132e240 in mk_clock_worker_init (data=0xffffaaa65700) at /src/fluent-bit/lib/monkey/mk_server/mk_clock.c:124
#4  0x0000ffffad1dee30 in start_thread (arg=0xffffa9d69fa7) at ./nptl/pthread_create.c:442
#5  0x0000ffffad247adc in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:79

Thread 4 (Thread 0xffffa9d6a9c0 (LWP 21)):
#0  0x0000ffffad247c74 in __GI_epoll_pwait (epfd=63, events=0xffffaaaaf000, maxevents=8, timeout=-1, set=0x0) at ../sysdeps/unix/sysv/linux/epoll_pwait.c:40
#1  0x0000aaaae1331e90 in _mk_event_wait_2 (loop=0xffffa9e547a0, timeout=-1) at /src/fluent-bit/lib/monkey/mk_core/mk_event_epoll.c:444
#2  0x0000aaaae13322f4 in mk_event_wait (loop=0xffffa9e547a0) at /src/fluent-bit/lib/monkey/mk_core/mk_event.c:207
#3  0x0000aaaae1319124 in mk_lib_worker (data=0xffffa9e547b8) at /src/fluent-bit/lib/monkey/mk_server/mk_lib.c:154
#4  0x0000ffffad1dee30 in start_thread (arg=0xffffab9f9e57) at ./nptl/pthread_create.c:442
#5  0x0000ffffad247adc in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:79

Thread 3 (Thread 0xffffaa9fa9c0 (LWP 20)):
#0  0x0000ffffad247c74 in __GI_epoll_pwait (epfd=11, events=0xffffaaa1d000, maxevents=32, timeout=-1, set=0x0) at ../sysdeps/unix/sysv/linux/epoll_pwait.c:40
#1  0x0000aaaae1331e90 in _mk_event_wait_2 (loop=0xffffaaa0b018, timeout=-1) at /src/fluent-bit/lib/monkey/mk_core/mk_event_epoll.c:444
#2  0x0000aaaae13322f4 in mk_event_wait (loop=0xffffaaa0b018) at /src/fluent-bit/lib/monkey/mk_core/mk_event.c:207
#3  0x0000aaaae0e54c2c in log_worker_collector (data=0xffffaaa16000) at /src/fluent-bit/src/flb_log.c:131
#4  0x0000aaaae0e7d70c in step_callback (data=<optimized out>) at /src/fluent-bit/src/flb_worker.c:43
#5  0x0000ffffad1dee30 in start_thread (arg=0xffffab9f9e67) at ./nptl/pthread_create.c:442
#6  0x0000ffffad247adc in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:79

Thread 2 (Thread 0xffffab9fa9c0 (LWP 19)):
#0  phn_next_set (offset=40, next=0x0, phn=0xffffaac1ed80) at /src/fluent-bit/lib/jemalloc-5.3.0/include/jemalloc/internal/ph.h:76
#1  phn_merge_siblings (cmp=<optimized out>, offset=40, phn=<optimized out>) at /src/fluent-bit/lib/jemalloc-5.3.0/include/jemalloc/internal/ph.h:206
#2  ph_merge_children (cmp=<optimized out>, offset=40, phn=0xffffaac20f00) at /src/fluent-bit/lib/jemalloc-5.3.0/include/jemalloc/internal/ph.h:244
#3  ph_remove (cmp=<optimized out>, offset=40, phn=phn@entry=0xffffaac20f00, ph=ph@entry=0xffffaac03d50) at /src/fluent-bit/lib/jemalloc-5.3.0/include/jemalloc/internal/ph.h:401
#4  je_edata_heap_remove (ph=ph@entry=0xffffaac03d50, phn=phn@entry=0xffffaac20f00) at /src/fluent-bit/lib/jemalloc-5.3.0/src/edata.c:6
#5  0x0000aaaae0e2595c in je_eset_remove (eset=eset@entry=0xffffaac03ab0, edata=edata@entry=0xffffaac20f00) at /src/fluent-bit/lib/jemalloc-5.3.0/src/eset.c:126
#6  0x0000aaaae0e266a8 in extent_coalesce (forward=false, outer=0xffffaac20f00, inner=0xffffaac25680, ecache=<optimized out>, ehooks=0xffffaac000c0, pac=0xffffaac03a00, tsdn=0xffffab9fb110) at /src/fluent-bit/lib/jemalloc-5.3.0/src/extent.c:806
#7  extent_try_coalesce_impl (tsdn=tsdn@entry=0xffffab9fb110, pac=pac@entry=0xffffaac03a00, ehooks=ehooks@entry=0xffffaac000c0, ecache=ecache@entry=0xffffaac03a38, edata=edata@entry=0xffffaac25680, coalesced=coalesced@entry=0xffffab9f9a4f) at /src/fluent-bit/lib/jemalloc-5.3.0/src/extent.c:855
#8  0x0000aaaae0e2803c in extent_try_coalesce_large (coalesced=0xffffab9f9a4f, edata=0xffffaac25680, ecache=0xffffaac03a38, ehooks=0xffffaac000c0, pac=0xffffaac03a00, tsdn=0xffffab9fb110) at /src/fluent-bit/lib/jemalloc-5.3.0/src/extent.c:884
#9  je_extent_record (tsdn=tsdn@entry=0xffffab9fb110, pac=pac@entry=0xffffaac03a00, ehooks=ehooks@entry=0xffffaac000c0, ecache=ecache@entry=0xffffaac03a38, edata=edata@entry=0xffffaac25680) at /src/fluent-bit/lib/jemalloc-5.3.0/src/extent.c:937
#10 0x0000aaaae0e281b4 in je_ecache_dalloc (tsdn=tsdn@entry=0xffffab9fb110, pac=pac@entry=0xffffaac03a00, ehooks=ehooks@entry=0xffffaac000c0, ecache=ecache@entry=0xffffaac03a38, edata=edata@entry=0xffffaac25680) at /src/fluent-bit/lib/jemalloc-5.3.0/src/extent.c:147
#11 0x0000aaaae0e33db8 in pac_dalloc_impl (tsdn=0xffffab9fb110, self=0xffffaac03a00, edata=0xffffaac25680, deferred_work_generated=0xffffab9f9adf) at /src/fluent-bit/lib/jemalloc-5.3.0/src/pac.c:277
#12 0x0000aaaae0e319a8 in large_dalloc_finish_impl (edata=<optimized out>, arena=0xffffaac01040, tsdn=0xffffab9fb110) at /src/fluent-bit/lib/jemalloc-5.3.0/src/large.c:253
#13 je_large_dalloc (tsdn=0xffffab9fb110, edata=<optimized out>) at /src/fluent-bit/lib/jemalloc-5.3.0/src/large.c:273
#14 0x0000aaaae0df2a34 in arena_dalloc_large (slow_path=false, szind=<optimized out>, tcache=0xffffab9fb478, ptr=<optimized out>, tsdn=0xffffab9fb110) at /src/fluent-bit/lib/jemalloc-5.3.0/include/jemalloc/internal/arena_inlines_b.h:297
#15 arena_dalloc (slow_path=false, caller_alloc_ctx=<synthetic pointer>, tcache=0xffffab9fb478, ptr=<optimized out>, tsdn=0xffffab9fb110) at /src/fluent-bit/lib/jemalloc-5.3.0/include/jemalloc/internal/arena_inlines_b.h:334
#16 idalloctm (slow_path=false, is_internal=false, alloc_ctx=<synthetic pointer>, tcache=0xffffab9fb478, ptr=<optimized out>, tsdn=0xffffab9fb110) at /src/fluent-bit/lib/jemalloc-5.3.0/include/jemalloc/internal/jemalloc_internal_inlines_c.h:120
#17 ifree (slow_path=false, tcache=0xffffab9fb478, ptr=<optimized out>, tsd=0xffffab9fb110) at /src/fluent-bit/lib/jemalloc-5.3.0/src/jemalloc.c:2887
#18 je_free_default (ptr=<optimized out>) at /src/fluent-bit/lib/jemalloc-5.3.0/src/jemalloc.c:3014
#19 0x0000aaaae0df3138 in free (ptr=ptr@entry=0xffffa687c900) at /src/fluent-bit/lib/jemalloc-5.3.0/src/jemalloc.c:3162
#20 0x0000aaaae0e630a8 in flb_free (ptr=0xffffa687c900) at /src/fluent-bit/include/fluent-bit/flb_mem.h:127
#21 flb_filter_do (ic=ic@entry=0xffffaaa3c660, data=0xffffa6a353c0, bytes=<optimized out>, out_data=out_data@entry=0xffffab9f9d50, out_bytes=out_bytes@entry=0xffffab9f9d58, tag=tag@entry=0xffffaabfc680 "kube.var.log.containers.<redacted>.log", tag_len=tag_len@entry=148, config=0xffffabe3edc0) at /src/fluent-bit/src/flb_filter.c:190
#22 0x0000aaaae0e60714 in input_chunk_append_raw (in=0xffffabea1400, event_type=0, n_records=<optimized out>, tag=0xffffaabfc680 "kube.var.log.containers.<redacted>.log", tag_len=<optimized out>, buf=0xffffa6a353c0, buf_size=47308) at /src/fluent-bit/src/flb_input_chunk.c:1588
#23 0x0000aaaae0e60f34 in flb_input_chunk_append_raw (in=in@entry=0xffffabea1400, event_type=event_type@entry=0, records=records@entry=4, tag=tag@entry=0xffffaabfc680 "kube.var.log.containers.<redacted>.log", tag_len=tag_len@entry=148, buf=<optimized out>, buf_size=<optimized out>) at /src/fluent-bit/src/flb_input_chunk.c:1929
#24 0x0000aaaae0ea13c8 in input_log_append (ins=0xffffabea1400, processor_starting_stage=processor_starting_stage@entry=0, records=4, tag=0xffffaabfc680 "kube.var.log.containers.<redacted>.log", tag_len=148, buf=0xffffa6a353c0, buf_size=47308) at /src/fluent-bit/src/flb_input_log.c:71
#25 0x0000aaaae0ea1464 in flb_input_log_append (ins=<optimized out>, tag=<optimized out>, tag_len=<optimized out>, buf=<optimized out>, buf_size=<optimized out>) at /src/fluent-bit/src/flb_input_log.c:90
#26 0x0000aaaae0ed363c in ml_stream_buffer_flush (file=0xffffaaab9980, ctx=0xffffaaa32400) at /src/fluent-bit/plugins/in_tail/tail_file.c:412
#27 process_content (bytes=<synthetic pointer>, file=0xffffaaab9980) at /src/fluent-bit/plugins/in_tail/tail_file.c:630
#28 flb_tail_file_chunk (file=file@entry=0xffffaaab9980) at /src/fluent-bit/plugins/in_tail/tail_file.c:1555
#29 0x0000aaaae0ece538 in in_tail_collect_pending (ins=<optimized out>, config=<optimized out>, in_context=0xffffaaa32400) at /src/fluent-bit/plugins/in_tail/tail.c:109
#30 0x0000aaaae0e5e20c in flb_input_collector_fd (fd=fd@entry=25, config=config@entry=0xffffabe3edc0) at /src/fluent-bit/src/flb_input.c:1970
#31 0x0000aaaae0e7472c in flb_engine_handle_event (config=0xffffabe3edc0, mask=<optimized out>, fd=25) at /src/fluent-bit/src/flb_engine.c:575
#32 flb_engine_start (config=config@entry=0xffffabe3edc0) at /src/fluent-bit/src/flb_engine.c:941
#33 0x0000aaaae0e53c94 in flb_lib_worker (data=0xffffabe183a0) at /src/fluent-bit/src/flb_lib.c:674
#34 0x0000ffffad1dee30 in start_thread (arg=0xffffc0932877) at ./nptl/pthread_create.c:442
#35 0x0000ffffad247adc in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:79

Thread 1 (Thread 0xffffada87020 (LWP 18)):
#0  0x0000ffffad2136b0 in __GI___clock_nanosleep (clock_id=<optimized out>, clock_id@entry=0, flags=flags@entry=0, req=req@entry=0xffffc0932a08, rem=rem@entry=0xffffc0932a08) at ../sysdeps/unix/sysv/linux/clock_nanosleep.c:48
#1  0x0000ffffad21861c in __GI___nanosleep (req=req@entry=0xffffc0932a08, rem=rem@entry=0xffffc0932a08) at ../sysdeps/unix/sysv/linux/nanosleep.c:25
#2  0x0000ffffad2184e8 in __sleep (seconds=0, seconds@entry=1) at ../sysdeps/posix/sleep.c:55
#3  0x0000aaaae0de6ee0 in flb_main (argc=<optimized out>, argv=<optimized out>) at /src/fluent-bit/src/fluent-bit.c:1388
#4  0x0000ffffad187740 in __libc_start_call_main (main=main@entry=0xaaaae0de5640 <main>, argc=argc@entry=4, argv=argv@entry=0xffffc0932cc8) at ../sysdeps/nptl/libc_start_call_main.h:58
#5  0x0000ffffad187818 in __libc_start_main_impl (main=0xaaaae0de5640 <main>, argc=4, argv=0xffffc0932cc8, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=<optimized out>) at ../csu/libc-start.c:360
#6  0x0000aaaae0de56f0 in _start ()

To Reproduce

I am not able to reproduce at that moment. This doesn't happen frequently.

Expected behavior

No deadlocks. The only solution currently is to monitor pods that reach their CPU limit and kill them.

Screenshots

Your Environment

  • Version used: Built from master branch commit 41b90b7
  • Configuration: Attaching tail config below
  • Environment name and version (e.g. Kubernetes? What version?): Kubernetes EKS v1.29.8
  • Server type and version: -
  • Operating System and version: Amazon Linux
  • Filters and plugins: Attaching full config below

Additional context

Full config:

[SERVICE]
    Daemon Off
    Flush 10
    Log_Level warn
    Parsers_File /fluent-bit/etc/parsers.conf
    Parsers_File /fluent-bit/etc/conf/custom_parsers.conf
    HTTP_Server On
    HTTP_Listen 0.0.0.0
    HTTP_Port 2020
    Health_Check On

[INPUT]
    Name tail
    multiline.parser docker, cri
    Path /var/log/containers/*.log
    Tag kube.*
    Skip_Long_Lines On
    Refresh_Interval 1
    DB /tmp/fluent/flb-kube.db
    DB.locking true

[FILTER]
    Name kubernetes
    Match kube.*
    Merge_Log On
    Keep_Log On
    K8S-Logging.Parser Off
    K8S-Logging.Exclude On
    Buffer_Size 0
    Merge_Log_Key parsed_message
    Kube_URL https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}
    tls.vhost kubernetes.default.svc

[FILTER]
    Name grep
    Match *
    Logical_Op or
    Exclude log should not be processed by cluster autoscaler \(no node group config\)
    Exclude log _logKeep":false

[FILTER]
    Name lua
    Match *
    Script /fluent-bit/scripts/filter_annotations.lua
    Call filter_annotations

[FILTER]
    Name record_modifier
    Match *
    Record cluster_name ${CLUSTER_NAME}
    Record environment ${env}
    Record aws_account_id ${aws_account_id}
    Record aws_account_name ${aws_account_name}
    Record aws_account_region ${aws_account_region}


# This parses logs that were not parsed by the multiline.parser parsers
[FILTER]
    Name parser
    Alias klog_parser
    Match kube.var.log.containers.*
    Key_Name log
    Parser klog_parser
    Reserve_Data On
    Preserve_Key On


# This parses logs that were not parsed by the multiline.parser parsers
[FILTER]
    Name parser
    Alias coredns_parser
    Match kube.var.log.containers.coredns*
    Key_Name log
    Parser coredns_parser
    Reserve_Data On
    Preserve_Key On


# We lift it because the modify plugin does not support nested fields. We want to make sure that we have only msg and not message
[FILTER]
    Name nest
    Match *
    Operation lift
    Wildcard ^(level|msg|message)$
    Nested_under parsed_message
    Add_prefix p_

[FILTER]
    Name modify
    Match *
    Condition Key_does_not_exist $p_msg
    Rename p_message p_msg

[FILTER]
    Name modify
    Match *
    Condition Key_does_not_exist $p_time
    Rename time p_time

# If we didn't match any of the above, we send the raw log as msg
[FILTER]
    Name modify
    Match *
    Condition Key_does_not_exist $p_msg
    Rename log p_msg

[FILTER]
    Name nest
    Match *
    Operation nest
    Wildcard p_*
    Nest_under parsed_message
    Remove_prefix p_

[FILTER]
    Name record_modifier
    Match *
    Allowlist_key parsed_message
    Allowlist_key kubernetes
    Allowlist_key cluster_name
    Allowlist_key environment
    Allowlist_key aws_account_id
    Allowlist_key aws_account_name
    Allowlist_key aws_account_region
    Uuid_key log_id

[OUTPUT]

Metadata

Metadata

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions