Skip to content

Commit f3f7bd3

Browse files
authored
[hive] Fix hive catalog lock may encounter deadlock. (apache#6783)
1 parent d7a6a8c commit f3f7bd3

File tree

1 file changed

+43
-23
lines changed

1 file changed

+43
-23
lines changed

paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalogLock.java

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,11 @@
3333
import org.apache.hadoop.hive.metastore.api.LockState;
3434
import org.apache.hadoop.hive.metastore.api.LockType;
3535
import org.apache.thrift.TException;
36+
import org.slf4j.Logger;
37+
import org.slf4j.LoggerFactory;
3638

3739
import java.net.InetAddress;
3840
import java.net.UnknownHostException;
39-
import java.time.Duration;
4041
import java.util.Collections;
4142
import java.util.concurrent.Callable;
4243

@@ -46,6 +47,8 @@
4647
/** Hive {@link CatalogLock}. */
4748
public class HiveCatalogLock implements CatalogLock {
4849

50+
private static final Logger LOG = LoggerFactory.getLogger(HiveCatalogLock.class);
51+
4952
static final String LOCK_IDENTIFIER = "hive";
5053

5154
private final ClientPool<IMetaStoreClient, TException> clients;
@@ -77,43 +80,60 @@ private long lock(String database, String table)
7780
new LockComponent(LockType.EXCLUSIVE, LockLevel.TABLE, database);
7881
lockComponent.setTablename(table);
7982
lockComponent.unsetOperationType();
83+
84+
long startMs = System.currentTimeMillis();
8085
final LockRequest lockRequest =
8186
new LockRequest(
8287
Collections.singletonList(lockComponent),
8388
System.getProperty("user.name"),
8489
InetAddress.getLocalHost().getHostName());
8590
LockResponse lockResponse = clients.run(client -> client.lock(lockRequest));
91+
long lockId = lockResponse.getLockid();
8692

8793
long nextSleep = 50;
88-
long startRetry = System.currentTimeMillis();
89-
while (lockResponse.getState() == LockState.WAITING) {
90-
nextSleep *= 2;
91-
if (nextSleep > checkMaxSleep) {
92-
nextSleep = checkMaxSleep;
93-
}
94-
Thread.sleep(nextSleep);
9594

96-
final LockResponse tempLockResponse = lockResponse;
97-
lockResponse = clients.run(client -> client.checkLock(tempLockResponse.getLockid()));
98-
if (System.currentTimeMillis() - startRetry > acquireTimeout) {
99-
break;
95+
try {
96+
while (lockResponse.getState() == LockState.WAITING) {
97+
long elapsed = System.currentTimeMillis() - startMs;
98+
if (elapsed >= acquireTimeout) {
99+
break;
100+
}
101+
102+
nextSleep = Math.min(nextSleep * 2, checkMaxSleep);
103+
Thread.sleep(nextSleep);
104+
105+
lockResponse = clients.run(client -> client.checkLock(lockId));
106+
}
107+
} finally {
108+
if (lockResponse.getState() != LockState.ACQUIRED) {
109+
// unlock if not acquired
110+
unlock(lockId);
100111
}
101112
}
102-
long retryDuration = System.currentTimeMillis() - startRetry;
103113

104-
if (lockResponse.getState() != LockState.ACQUIRED) {
105-
if (lockResponse.getState() == LockState.WAITING) {
106-
final LockResponse tempLockResponse = lockResponse;
107-
clients.execute(client -> client.unlock(tempLockResponse.getLockid()));
108-
}
109-
throw new RuntimeException(
110-
"Acquire lock failed with time: " + Duration.ofMillis(retryDuration));
114+
LockState lockState = lockResponse.getState();
115+
long duration = System.currentTimeMillis() - startMs;
116+
String msg =
117+
String.format(
118+
"for table %s.%s (lockId=%d) after %dms. Final lock state: %s",
119+
database, table, lockId, duration, lockState);
120+
LOG.info("Acquire lock {}", msg);
121+
if (lockState == LockState.ACQUIRED) {
122+
return lockId;
111123
}
112-
return lockResponse.getLockid();
124+
125+
throw new RuntimeException("Acquire lock failed " + msg);
113126
}
114127

115-
private void unlock(long lockId) throws TException, InterruptedException {
116-
clients.execute(client -> client.unlock(lockId));
128+
private void unlock(long lockId) {
129+
if (lockId <= 0) {
130+
return;
131+
}
132+
try {
133+
clients.execute(client -> client.unlock(lockId));
134+
} catch (Exception e) {
135+
LOG.warn("Unlock failed for lockId={}", lockId, e);
136+
}
117137
}
118138

119139
@Override

0 commit comments

Comments
 (0)