From f5365ec3644714630f9f2cb19975f4b79b1a323d Mon Sep 17 00:00:00 2001 From: Joao Amaral <7281460+joaopamaral@users.noreply.github.com> Date: Thu, 14 May 2026 16:23:23 -0300 Subject: [PATCH] fix(bots): skip bot upsert when nothing changed to stop team-strip + reindex loop on boot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `BotResource.initialize()` runs `UserUtil.addOrUpdateBotUser(user)` for every bot on every OM boot. The in-memory `User` built by `UserUtil.user(...)` does not have the `teams` field populated, so the PUT path through `userRepository.createOrUpdate -> UserUpdater.entitySpecificUpdate` runs `updateTeams(original, updated)` with `original.teams = [Organization]` (or the bot's real stored teams) and `updated.teams = null`. `updateTeams` then executes `deleteTo(user, HAS, TEAM) + assignTeams(null)`, which strips every stored team membership the bot had, bumps the user version, and triggers an Elasticsearch reindex of both the user and each affected team. With several bots this fires on every restart and produces the reindex storm plus "Circular dependency detected in team hierarchy for team: Organization" warnings in the boot logs. In one production deployment this added almost 3 minutes to every boot. Short-circuit when the incoming bot has no real change vs. the persisted row: compare `description`, `displayName`, and `roles`. If they all match, return the original user and skip the PUT entirely — no `UserUpdater`, no team strip, no version bump, no reindex. Two adjustments to make the guard actually fire: - `retrieveWithAuthMechanism` now also loads `"roles"` (was loading only `"authenticationMechanism"`); `description` and `displayName` are scalar JSON-column fields and were already populated by the base read. - Compare `roles` via `listOrEmpty(...)` on both sides because the database-loaded original returns an empty list while the freshly built in-memory user returns null, and `Objects.equals(null, [])` is false. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openmetadata/service/util/UserUtil.java | 19 ++- .../service/util/UserUtilBotTest.java | 146 ++++++++++++++++++ 2 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 openmetadata-service/src/test/java/org/openmetadata/service/util/UserUtilBotTest.java diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/util/UserUtil.java b/openmetadata-service/src/main/java/org/openmetadata/service/util/UserUtil.java index 0a95da7448e1..774b10e05a8e 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/util/UserUtil.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/util/UserUtil.java @@ -332,6 +332,19 @@ public static boolean assignTeamsFromClaim(User user, List teamNames) { */ public static User addOrUpdateBotUser(User user) { User originalUser = retrieveWithAuthMechanism(user); + // Short-circuit when the incoming bot user has no real change vs. what's already in the + // database. Without this guard every OM boot calls into addOrUpdateUser -> + // userRepository.createOrUpdate, and UserUpdater.entitySpecificUpdate then runs + // updateTeams/updateRoles/etc. with the incoming `user.getTeams() == null`, which strips + // the bot's stored team relationships, bumps the version, and triggers an Elasticsearch + // reindex of the bot user (and any team membership change ripples into the team_search + // index too). With many bots this is a reindex storm on every restart. + if (originalUser != null + && Objects.equals(listOrEmpty(originalUser.getRoles()), listOrEmpty(user.getRoles())) + && Objects.equals(originalUser.getDescription(), user.getDescription()) + && Objects.equals(originalUser.getDisplayName(), user.getDisplayName())) { + return originalUser; + } AuthenticationMechanism authMechanism = originalUser != null ? originalUser.getAuthenticationMechanism() : null; // the user did not have an auth mechanism and auth config is present @@ -362,8 +375,12 @@ private static User retrieveWithAuthMechanism(User user) { EntityRepository userRepository = (UserRepository) Entity.getEntityRepository(Entity.USER); try { + // Include "roles" so the no-op short-circuit in addOrUpdateBotUser can compare it + // against the incoming user. description and displayName are scalar fields stored in + // the entity JSON column, so they're already populated by the base read without an + // extra field fetch. return userRepository.getByName( - null, user.getName(), new Fields(Set.of("authenticationMechanism"))); + null, user.getName(), new Fields(Set.of("authenticationMechanism", "roles"))); } catch (EntityNotFoundException e) { LOG.debug("Bot entity: {} does not exists.", user); return null; diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/util/UserUtilBotTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/util/UserUtilBotTest.java new file mode 100644 index 000000000000..472ff5a1e5a5 --- /dev/null +++ b/openmetadata-service/src/test/java/org/openmetadata/service/util/UserUtilBotTest.java @@ -0,0 +1,146 @@ +/* + * Copyright 2026 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.openmetadata.service.util; + +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.mockito.MockedStatic; +import org.openmetadata.schema.entity.teams.User; +import org.openmetadata.schema.type.EntityReference; +import org.openmetadata.service.Entity; +import org.openmetadata.service.jdbi3.UserRepository; + +/** + * Unit coverage for the boot-time bot-team-strip loop. + * + *

{@code BotResource.initialize()} calls {@link UserUtil#addOrUpdateBotUser(User)} for every + * bot on every OM boot. The in-memory User built by {@code UserUtil.user(...)} does not have + * the {@code teams} field populated, so without the short-circuit guard the call falls through + * to {@code userRepository.createOrUpdate}, which runs {@code UserUpdater.updateTeams} with + * {@code updated.teams == null} and wipes the bot's stored team relationships, bumps the + * version, and triggers an Elasticsearch reindex on every boot. + */ +class UserUtilBotTest { + + @Test + void addOrUpdateBotUserShortCircuitsWhenNothingChanged() { + UserRepository userRepository = mock(UserRepository.class); + + User stored = + new User() + .withId(UUID.randomUUID()) + .withName("ingestion-bot") + .withFullyQualifiedName("ingestion-bot") + .withDisplayName("ingestion-bot") + .withDescription(null) + .withIsBot(true) + .withRoles(new ArrayList<>()) + // Pre-populate authMechanism so that if the short-circuit guard regresses, the + // fall-through path doesn't immediately blow up in JWTTokenGenerator (which + // isn't initialized in a pure unit test). With the bug present, the test still + // proceeds into addOrUpdateUser and the `never()` verify below fires the + // regression signal. + .withAuthenticationMechanism( + new org.openmetadata.schema.entity.teams.AuthenticationMechanism() + .withAuthType(org.openmetadata.schema.entity.teams.AuthenticationMechanism.AuthType.JWT)); + User incoming = + new User() + .withId(UUID.randomUUID()) + .withName("ingestion-bot") + .withFullyQualifiedName("ingestion-bot") + .withDisplayName("ingestion-bot") + .withDescription(null) + .withIsBot(true) + .withRoles(null); + + when(userRepository.getByName(any(), eq("ingestion-bot"), any())).thenReturn(stored); + + try (MockedStatic entityStatic = mockStatic(Entity.class)) { + entityStatic.when(() -> Entity.getEntityRepository(Entity.USER)).thenReturn(userRepository); + User result = UserUtil.addOrUpdateBotUser(incoming); + assertSame( + stored, + result, + "Short-circuit must return the original user without going through the PUT path"); + } + + // The whole point of the fix: never hit createOrUpdate when nothing changed. + verify(userRepository, never()).createOrUpdate(any(), any(), any()); + } + + @Test + void addOrUpdateBotUserGoesThroughUpsertWhenDisplayNameChanged() { + UserRepository userRepository = mock(UserRepository.class); + + User stored = + new User() + .withId(UUID.randomUUID()) + .withName("ingestion-bot") + .withFullyQualifiedName("ingestion-bot") + .withDisplayName("Old Display Name") + .withIsBot(true) + // Provide an authMechanism on the persisted row so the upsert path doesn't try + // to generate a fresh JWT via JWTTokenGenerator (which isn't initialized in a + // pure unit test). + .withAuthenticationMechanism( + new org.openmetadata.schema.entity.teams.AuthenticationMechanism() + .withAuthType(org.openmetadata.schema.entity.teams.AuthenticationMechanism.AuthType.JWT)); + User incoming = + new User() + .withId(UUID.randomUUID()) + .withName("ingestion-bot") + .withFullyQualifiedName("ingestion-bot") + .withDisplayName("New Display Name") + .withIsBot(true); + + when(userRepository.getByName(any(), eq("ingestion-bot"), any())).thenReturn(stored); + // Returning null from createOrUpdate path is fine: addOrUpdateUser wraps exceptions but + // we only care that the method was invoked (i.e. the short-circuit did NOT fire). + when(userRepository.findByNameOrNull(any(), any())).thenReturn(null); + + try (MockedStatic entityStatic = mockStatic(Entity.class)) { + entityStatic.when(() -> Entity.getEntityRepository(Entity.USER)).thenReturn(userRepository); + try { + User result = UserUtil.addOrUpdateBotUser(incoming); + assertNotEquals( + stored, + result, + "When fields differ the upsert path must run and produce a different User"); + } catch (RuntimeException ignored) { + // The downstream createOrUpdate call may throw against the mock; the assertion we + // care about is that the short-circuit guard did NOT fire, which we verify below. + } + } + + verify(userRepository).createOrUpdate(any(), any(User.class), any()); + } + + @SuppressWarnings("unused") + private static List roleRef(String name) { + List refs = new ArrayList<>(); + refs.add(new EntityReference().withId(UUID.randomUUID()).withName(name).withType("role")); + return refs; + } +}