|
| 1 | +# SPDX-FileCopyrightText: 2024 MTS PJSC |
| 2 | +# SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | +import textwrap |
| 5 | +from typing import Literal |
| 6 | + |
| 7 | +from pydantic import BaseModel, ByteSize, Field, model_validator |
| 8 | + |
| 9 | + |
| 10 | +class ConsumerSettings(BaseModel): |
| 11 | + """Data.Rentgen consumer-specific settings. |
| 12 | +
|
| 13 | + These options are passed directly to |
| 14 | + `AIOKafkaConsumer <https://aiokafka.readthedocs.io/en/stable/api.html#aiokafka.AIOKafkaConsumer>`_. |
| 15 | +
|
| 16 | + Examples |
| 17 | + -------- |
| 18 | +
|
| 19 | + .. code-block:: bash |
| 20 | +
|
| 21 | + DATA_RENTGEN__CONSUMER__TOPICS_LIST=["input.runs"] |
| 22 | + DATA_RENTGEN__CONSUMER__GROUP_ID=data-rentgen |
| 23 | + DATA_RENTGEN__CONSUMER__FETCH_MAX_WAIT_MS=5000 |
| 24 | + DATA_RENTGEN__CONSUMER__MAX_PARTITION_FETCH_BYTES=5MiB |
| 25 | + """ |
| 26 | + |
| 27 | + topics_list: list[str] = Field( |
| 28 | + default=["input.runs"], |
| 29 | + description="List of Kafka topics to subscribe. Mutually exclusive with :obj:`~topics_pattern`.", |
| 30 | + ) |
| 31 | + topics_pattern: str | None = Field( |
| 32 | + default=None, |
| 33 | + description="Regex pattern of topics to subscribe. Mutually exclusive with :obj:`~topics_list`.", |
| 34 | + ) |
| 35 | + |
| 36 | + @model_validator(mode="after") |
| 37 | + def _check_topics(self): |
| 38 | + if not self.topics_list and not self.topics_pattern: |
| 39 | + raise ValueError("input should contain either 'topics_list' or 'topics_pattern' field, both are empty") |
| 40 | + if self.topics_list and self.topics_pattern: |
| 41 | + raise ValueError("input should contain either 'topics_list' or 'topics_pattern' field, both are set") |
| 42 | + return self |
| 43 | + |
| 44 | + group_id: str | None = Field( |
| 45 | + default="data-rentgen", |
| 46 | + description=textwrap.dedent( |
| 47 | + """ |
| 48 | + Name of the consumer group to join for dynamic partition assignment (if enabled), |
| 49 | + and to use for fetching and committing offsets. |
| 50 | + If ``None``, auto-partition assignment (via group coordinator) and offset commits are disabled. |
| 51 | + """, |
| 52 | + ), |
| 53 | + ) |
| 54 | + |
| 55 | + # Defaults are copied from FastStream: https://github.com/airtai/faststream/blob/0.5.33/faststream/kafka/fastapi/fastapi.py#L618 |
| 56 | + # But only options, related to consumer |
| 57 | + max_records: int | None = Field( |
| 58 | + default=None, |
| 59 | + description=textwrap.dedent( |
| 60 | + """ |
| 61 | + Number of messages to consume as one batch. |
| 62 | + ``None`` means no limit applied. |
| 63 | + """, |
| 64 | + ), |
| 65 | + ) |
| 66 | + fetch_max_bytes: ByteSize = Field( |
| 67 | + default=ByteSize(50 * 1024 * 1024), |
| 68 | + description="The maximum amount of data the server should return for a fetch request.", |
| 69 | + ) |
| 70 | + fetch_min_bytes: ByteSize = Field( |
| 71 | + default=ByteSize(1), |
| 72 | + description=textwrap.dedent( |
| 73 | + """ |
| 74 | + Minimum amount of data the server should |
| 75 | + return for a fetch request, otherwise wait up to |
| 76 | + :obj:`~fetch_max_wait_ms` for more data to accumulate. |
| 77 | + """, |
| 78 | + ), |
| 79 | + ) |
| 80 | + fetch_max_wait_ms: int = Field( |
| 81 | + default=500, |
| 82 | + description=textwrap.dedent( |
| 83 | + """ |
| 84 | + The maximum amount of time in milliseconds |
| 85 | + the server will block before answering the fetch request if |
| 86 | + there isn't sufficient data to immediately satisfy the |
| 87 | + requirement given by :obj:`~fetch_min_bytes`. |
| 88 | + """, |
| 89 | + ), |
| 90 | + ) |
| 91 | + max_partition_fetch_bytes: ByteSize = Field( |
| 92 | + default=ByteSize(1024 * 1024), |
| 93 | + description=textwrap.dedent( |
| 94 | + """ |
| 95 | + The maximum amount of data |
| 96 | + per-partition the server will return. The maximum total memory |
| 97 | + used for a request ``= #partitions * max_partition_fetch_bytes``. |
| 98 | +
|
| 99 | + This size must be at least as large as the maximum message size |
| 100 | + the server allows or else it is possible for the producer to |
| 101 | + send messages larger than the consumer can fetch. If that |
| 102 | + happens, the consumer can get stuck trying to fetch a large |
| 103 | + message on a certain partition. |
| 104 | + """, |
| 105 | + ), |
| 106 | + ) |
| 107 | + batch_timeout_ms: int = Field( |
| 108 | + default=200, |
| 109 | + description=textwrap.dedent( |
| 110 | + """ |
| 111 | + Milliseconds spent waiting if data is not available in the buffer. |
| 112 | + If 0, returns immediately with any records that are available currently in the buffer, |
| 113 | + else returns empty. |
| 114 | + """, |
| 115 | + ), |
| 116 | + ) |
| 117 | + auto_offset_reset: Literal["latest", "earliest", "none"] = Field( |
| 118 | + default="latest", |
| 119 | + description=textwrap.dedent( |
| 120 | + """" |
| 121 | + A policy for resetting offsets on ``OffsetOutOfRangeError`` errors: |
| 122 | +
|
| 123 | + * ``earliest`` will move to the oldest available message |
| 124 | + * ``latest`` will move to the most recent |
| 125 | + * ``none`` will raise an exception so you can handle this case |
| 126 | + """, |
| 127 | + ), |
| 128 | + ) |
| 129 | + max_poll_interval_ms: int = Field( |
| 130 | + default=5 * 60 * 1000, |
| 131 | + description=textwrap.dedent( |
| 132 | + """ |
| 133 | + Maximum allowed time between calls to consume messages in batches. |
| 134 | + If this interval is exceeded the consumer is considered failed and the group will |
| 135 | + rebalance in order to reassign the partitions to another consumer |
| 136 | + group member. |
| 137 | + If API methods block waiting for messages, that time |
| 138 | + does not count against this timeout. |
| 139 | + """, |
| 140 | + ), |
| 141 | + ) |
| 142 | + session_timeout_ms: int = Field( |
| 143 | + default=10 * 1000, |
| 144 | + description=textwrap.dedent( |
| 145 | + """ |
| 146 | + Client group session and failure detection |
| 147 | + timeout. The consumer sends periodic heartbeats |
| 148 | + (``heartbeat.interval.ms``) to indicate its liveness to the broker. |
| 149 | +
|
| 150 | + If no hearts are received by the broker for a group member within |
| 151 | + the session timeout, the broker will remove the consumer from the |
| 152 | + group and trigger a rebalance. |
| 153 | +
|
| 154 | + The allowed range is configured with the **broker** configuration properties |
| 155 | + ``group.min.session.timeout.ms`` and ``group.max.session.timeout.ms``. |
| 156 | + """, |
| 157 | + ), |
| 158 | + ) |
| 159 | + heartbeat_interval_ms: int = Field( |
| 160 | + default=3 * 1000, |
| 161 | + description=textwrap.dedent( |
| 162 | + """ |
| 163 | + The expected time in milliseconds |
| 164 | + between heartbeats to the consumer coordinator when using |
| 165 | + Kafka's group management feature. Heartbeats are used to ensure |
| 166 | + that the consumer's session stays active and to facilitate |
| 167 | + rebalancing when new consumers join or leave the group. |
| 168 | +
|
| 169 | + The value must be set lower than :obj:`~session_timeout_ms`, but typically |
| 170 | + should be set no higher than 1/3 of that value. It can be |
| 171 | + adjusted even lower to control the expected time for normal |
| 172 | + rebalances. |
| 173 | + """, |
| 174 | + ), |
| 175 | + ) |
| 176 | + consumer_timeout_ms: int = Field( |
| 177 | + default=200, |
| 178 | + description=textwrap.dedent( |
| 179 | + """ |
| 180 | + Maximum wait timeout for background fetching routine. |
| 181 | + Mostly defines how fast the system will see rebalance and |
| 182 | + request new data for new partitions. |
| 183 | + """, |
| 184 | + ), |
| 185 | + ) |
| 186 | + isolation_level: Literal["read_uncommitted", "read_committed"] = Field( |
| 187 | + default="read_uncommitted", |
| 188 | + description=textwrap.dedent( |
| 189 | + """ |
| 190 | + Controls how to read messages written |
| 191 | + transactionally. |
| 192 | +
|
| 193 | + * ``read_committed`` - batch consumer will only return |
| 194 | + transactional messages which have been committed. |
| 195 | +
|
| 196 | + * ``read_uncommitted`` (the default) - batch consumer will |
| 197 | + return all messages, even transactional messages which have been |
| 198 | + aborted. |
| 199 | +
|
| 200 | + Non-transactional messages will be returned unconditionally in |
| 201 | + either mode. |
| 202 | +
|
| 203 | + Messages will always be returned in offset order. Hence, in |
| 204 | + ``read_committed`` mode, batch consumer will only return |
| 205 | + messages up to the last stable offset (LSO), which is the one less |
| 206 | + than the offset of the first open transaction. In particular any |
| 207 | + messages appearing after messages belonging to ongoing transactions |
| 208 | + will be withheld until the relevant transaction has been completed. |
| 209 | + As a result, ``read_committed`` consumers will not be able to read up |
| 210 | + to the high watermark when there are in flight transactions. |
| 211 | + Further, when in ``read_committed`` the seek_to_end method will |
| 212 | + return the LSO. |
| 213 | + """, |
| 214 | + ), |
| 215 | + ) |
0 commit comments