Skip to content

Commit 4ba956f

Browse files
authored
feat: websocket max heap size configuration (#1538)
* fix: set max process heap size to 500MB instead of 8GB * feat: set websocket transport max heap size WEBSOCKET_MAX_HEAP_SIZE can be used to configure it
1 parent 380b882 commit 4ba956f

File tree

6 files changed

+25
-4
lines changed

6 files changed

+25
-4
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ If you're using the default tenant, the URL is `ws://realtime-dev.localhost:4000
169169
| CONNECT_PARTITION_SLOTS | number | Number of dynamic supervisor partitions used by the Connect, ReplicationConnect processes |
170170
| METRICS_CLEANER_SCHEDULE_TIMER_IN_MS | number | Time in ms to run the Metric Cleaner task |
171171
| METRICS_RPC_TIMEOUT_IN_MS | number | Time in ms to wait for RPC call to fetch Metric per node |
172+
| WEBSOCKET_MAX_HEAP_SIZE | number | Max number of bytes to be allocated as heap for the WebSocket transport process. If the limit is reached the process is brutally killed. Defaults to 50MB. |
172173
| REQUEST_ID_BAGGAGE_KEY | string | OTEL Baggage key to be used as request id |
173174
| OTEL_SDK_DISABLED | boolean | Disable OpenTelemetry tracing completely when 'true' |
174175
| OTEL_TRACES_EXPORTER | string | Possible values: `otlp` or `none`. See [https://github.com/open-telemetry/opentelemetry-erlang/tree/v1.4.0/apps#os-environment] for more details on how to configure the traces exporter. |

config/runtime.exs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ janitor_children_timeout = Env.get_integer("JANITOR_CHILDREN_TIMEOUT", :timer.se
6868
janitor_schedule_timer = Env.get_integer("JANITOR_SCHEDULE_TIMER_IN_MS", :timer.hours(4))
6969
platform = if System.get_env("AWS_EXECUTION_ENV") == "AWS_ECS_FARGATE", do: :aws, else: :fly
7070
broadcast_pool_size = Env.get_integer("BROADCAST_POOL_SIZE", 10)
71+
websocket_max_heap_size = div(Env.get_integer("WEBSOCKET_MAX_HEAP_SIZE", 50_000_000), :erlang.system_info(:wordsize))
7172

7273
no_channel_timeout_in_ms =
7374
if config_env() == :test,
@@ -107,6 +108,7 @@ config :realtime, Realtime.Repo,
107108
ssl: ssl_opts
108109

109110
config :realtime,
111+
websocket_max_heap_size: websocket_max_heap_size,
110112
migration_partition_slots: migration_partition_slots,
111113
connect_partition_slots: connect_partition_slots,
112114
rebalance_check_interval_in_ms: rebalance_check_interval_in_ms,

lib/realtime_web/channels/user_socket.ex

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
11
defmodule RealtimeWeb.UserSocket do
2+
# This is defined up here before `use Phoenix.Socket` is called so that we can define `Phoenix.Socket.init/1`
3+
# It has to be overridden because we need to set the `max_heap_size` flag from the transport process context
4+
@impl true
5+
def init(state) when is_tuple(state) do
6+
Process.flag(:max_heap_size, max_heap_size())
7+
Phoenix.Socket.__init__(state)
8+
end
9+
210
use Phoenix.Socket
311
use Realtime.Logs
412

@@ -122,4 +130,6 @@ defmodule RealtimeWeb.UserSocket do
122130
_ -> @default_log_level
123131
end
124132
end
133+
134+
defp max_heap_size(), do: Application.fetch_env!(:realtime, :websocket_max_heap_size)
125135
end

mix.exs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ defmodule Realtime.MixProject do
44
def project do
55
[
66
app: :realtime,
7-
version: "2.50.2",
7+
version: "2.51.0",
88
elixir: "~> 1.17.3",
99
elixirc_paths: elixirc_paths(Mix.env()),
1010
start_permanent: Mix.env() == :prod,

rel/vm.args.eex

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@
1010
## Tweak GC to run more often
1111
##-env ERL_FULLSWEEP_AFTER 10
1212

13-
## Limit process heap for all procs to 1000 MB
14-
+hmax 1000000000
13+
## Limit process heap for all procs to 500 MB. The number here is the number of words
14+
+hmax <%= div(500_000_000, :erlang.system_info(:wordsize)) %>
1515

1616
## Set distribution buffer busy limit (default is 1024)
1717
+zdbbl 100000
1818

1919
## Disable Busy Wait
2020
+sbwt none
2121
+sbwtdio none
22-
+sbwtdcpu none
22+
+sbwtdcpu none

test/realtime_web/channels/realtime_channel_test.exs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ defmodule RealtimeWeb.RealtimeChannelTest do
2828

2929
setup :rls_context
3030

31+
test "max heap size is set", %{tenant: tenant} do
32+
jwt = Generators.generate_jwt_token(tenant)
33+
{:ok, %Socket{} = socket} = connect(UserSocket, %{}, conn_opts(tenant, jwt))
34+
35+
assert Process.info(socket.transport_pid, :max_heap_size) ==
36+
{:max_heap_size, %{error_logger: true, include_shared_binaries: false, kill: true, size: 6_250_000}}
37+
end
38+
3139
describe "broadcast" do
3240
@describetag policies: [:authenticated_all_topic_read]
3341

0 commit comments

Comments
 (0)