Skip to content

Commit d3947e9

Browse files
authored
Merge pull request #2348 from digma-ai/down-engine-between-retries
retry docker engine if the error is pod already exists Closes #2276
2 parents e10d896 + 0455dca commit d3947e9

File tree

1 file changed

+40
-11
lines changed
  • ide-common/src/main/kotlin/org/digma/intellij/plugin/docker

1 file changed

+40
-11
lines changed

ide-common/src/main/kotlin/org/digma/intellij/plugin/docker/Engine.kt

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ internal class Engine {
1818

1919
private val streamExecutor = Executors.newFixedThreadPool(2)
2020

21-
private val engineLock = ReentrantLock(true)
21+
//executeCommandLock should be used only in method executeCommand which is not recursive, other methods here may call each other
22+
// especially if there is doBetweenRetries in executeCommandWithRetry
23+
private val executeCommandLock = ReentrantLock(true)
2224

2325
//this message is used to identify timeout of the process
2426
private val timeoutMessage = "process exited with timeout"
@@ -36,7 +38,21 @@ internal class Engine {
3638
.withRedirectErrorStream(true)
3739
.toProcessBuilder()
3840

39-
return executeCommandWithRetry(project, "up", composeFile, processBuilder, reportToPosthog = true, ignoreNonRealErrors = true)
41+
//if up failed do down before retrying, in many cases it will fix the problem
42+
val actionBetweenRetry = Runnable {
43+
remove(project, composeFile, dockerComposeCmd, reportToPosthog = false)
44+
}
45+
46+
47+
return executeCommandWithRetry(
48+
project,
49+
"up",
50+
composeFile,
51+
processBuilder,
52+
reportToPosthog = true,
53+
ignoreNonRealErrors = true,
54+
doBetweenRetries = actionBetweenRetry
55+
)
4056
}
4157

4258

@@ -67,7 +83,12 @@ internal class Engine {
6783
.withRedirectErrorStream(true)
6884
.toProcessBuilder()
6985

70-
return executeCommandWithRetry(project, "up", composeFile, processBuilder)
86+
//if up failed do down before retrying, in many cases it will fix the problem
87+
val actionBetweenRetry = Runnable {
88+
remove(project, composeFile, dockerComposeCmd, reportToPosthog = false)
89+
}
90+
91+
return executeCommandWithRetry(project, "up", composeFile, processBuilder, doBetweenRetries = actionBetweenRetry)
7192

7293
}
7394

@@ -88,7 +109,7 @@ internal class Engine {
88109
}
89110

90111

91-
fun remove(project: Project, composeFile: File, dockerComposeCmd: List<String>): String {
112+
fun remove(project: Project, composeFile: File, dockerComposeCmd: List<String>, reportToPosthog: Boolean = true): String {
92113

93114
Log.log(logger::info, "starting uninstall")
94115

@@ -102,7 +123,7 @@ internal class Engine {
102123
.withRedirectErrorStream(true)
103124
.toProcessBuilder()
104125

105-
return executeCommandWithRetry(project, "down", composeFile, processBuilder)
126+
return executeCommandWithRetry(project, "down", composeFile, processBuilder, reportToPosthog = reportToPosthog)
106127

107128
}
108129

@@ -114,6 +135,7 @@ internal class Engine {
114135
processBuilder: ProcessBuilder,
115136
reportToPosthog: Boolean = true,
116137
ignoreNonRealErrors: Boolean = false,
138+
doBetweenRetries: Runnable? = null
117139
): String {
118140

119141
//try 3 times in case of failure
@@ -133,12 +155,18 @@ internal class Engine {
133155
)
134156
)
135157
}
158+
136159
Log.log(logger::info, "docker command {} failed with exit value {}, retrying..", name, exitValue)
160+
161+
doBetweenRetries?.let {
162+
Log.log(logger::info, "executing doBetweenRetries action before next retry")
163+
it.run()
164+
}
137165
}
138166

139167
//last chance
140168
Log.log(logger::info, "executing command {}, last chance after 3 failures", name)
141-
return executeCommand(project, "down", composeFile, processBuilder)
169+
return executeCommand(project, name, composeFile, processBuilder)
142170
}
143171

144172
private fun shouldExit(exitValue: String): Boolean {
@@ -154,8 +182,9 @@ internal class Engine {
154182
private fun isRetryTriggerExitValue(exitValue: String): Boolean {
155183

156184
//"process exited with timeout" is the message set in buildExitValue
157-
return exitValue.startsWith(timeoutMessage) ||
158-
exitValue.contains("unexpected EOF")
185+
return exitValue.startsWith(timeoutMessage, true) ||
186+
exitValue.contains("unexpected EOF", true) ||
187+
exitValue.contains("pod already exists", true)
159188

160189
}
161190

@@ -170,7 +199,7 @@ internal class Engine {
170199
): String {
171200

172201
try {
173-
engineLock.lock()
202+
executeCommandLock.lock()
174203

175204
Log.log(logger::info, "executing {}, compose file {}, command {}", name, composeFile, processBuilder.command())
176205

@@ -265,8 +294,8 @@ internal class Engine {
265294
Log.warnWithException(logger, e, "error running docker command {}", processBuilder.command())
266295
return e.message ?: e.toString()
267296
} finally {
268-
if (engineLock.isHeldByCurrentThread) {
269-
engineLock.unlock()
297+
if (executeCommandLock.isHeldByCurrentThread) {
298+
executeCommandLock.unlock()
270299
}
271300
}
272301
}

0 commit comments

Comments
 (0)