Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 33 additions & 47 deletions jenkins/L0_MergeRequest.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import groovy.transform.Field
import groovy.json.JsonOutput
import com.nvidia.bloom.KubernetesManager
import com.nvidia.bloom.Constants
import com.nvidia.bloom.Logger
import com.nvidia.bloom.JobBuilder
import org.jenkinsci.plugins.workflow.cps.CpsThread
import org.jsoup.Jsoup
import org.jenkinsci.plugins.pipeline.modeldefinition.Utils as jUtils
Expand Down Expand Up @@ -514,9 +516,9 @@ def getGithubMRChangedFile(pipeline, githubPrApiUrl, function, filePath="") {
def result = null
def pageId = 0
withCredentials([
string(
credentialsId: 'github-token-trtllm-ci',
variable: 'GITHUB_API_TOKEN'
usernamePassword(
credentialsId: 'github-cred-trtllm-ci',
passwordVariable: 'GITHUB_API_TOKEN'
),
]) {
while(true) {
Expand Down Expand Up @@ -939,36 +941,7 @@ def getCommonParameters()
]
}

def triggerJob(jobName, parameters, jenkinsUrl = "", credentials = "")
{
if (jenkinsUrl == "" && env.localJobCredentials) {
jenkinsUrl = env.JENKINS_URL
credentials = env.localJobCredentials
}
def status = ""
if (jenkinsUrl != "") {
def jobPath = trtllm_utils.resolveFullJobName(jobName).replace('/', '/job/').substring(1)
def handle = triggerRemoteJob(
job: "${jenkinsUrl}${jobPath}/",
auth: CredentialsAuth(credentials: credentials),
parameters: trtllm_utils.toRemoteBuildParameters(parameters),
pollInterval: 60,
abortTriggeredJob: true,
)
status = handle.getBuildResult().toString()
} else {
def handle = build(
job: jobName,
parameters: trtllm_utils.toBuildParameters(parameters),
propagate: false,
)
echo "Triggered job: ${handle.absoluteUrl}"
status = handle.result
}
return status
}

def launchJob(jobName, reuseBuild, enableFailFast, globalVars, platform="x86_64", additionalParameters = [:]) {
def launchJob(pipeline, jobName, reuseBuild, enableFailFast, globalVars, platform="x86_64", additionalParameters = [:]) {
def parameters = getCommonParameters()
String globalVarsJson = writeJSON returnText: true, json: globalVars
parameters += [
Expand Down Expand Up @@ -998,13 +971,26 @@ def launchJob(jobName, reuseBuild, enableFailFast, globalVars, platform="x86_64"
parameters['reuseArtifactPath'] = "sw-tensorrt-generic/llm-artifacts/${JOB_NAME}/${reuseBuild}"
}

if (jobName.startsWith("/")) {
jobName = jobName.substring(1)
} else {
def pos = env.JOB_NAME.lastIndexOf("/")
if (pos != -1) {
jobDir = env.JOB_NAME.substring(0, pos + 1)
} else {
jobDir = ""
}
jobName = "${jobDir}${jobName}"
}

echo "Trigger ${jobName} job, params: ${parameters}"

def status = triggerJob(jobName, parameters)
if (status != "SUCCESS") {
def logger = new Logger(pipeline)
def (jenkinsURL, buildStatus) = JobBuilder.build(pipeline, logger, jobName, parameters, 1, false)
if (buildStatus != "SUCCESS") {
error "Downstream job did not succeed"
}
return status
return buildStatus
}

def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
Expand All @@ -1023,9 +1009,9 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
'wheelDockerImagePy310': globalVars["LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE"],
'wheelDockerImagePy312': globalVars["LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE"],
]
launchJob("/LLM/helpers/Build-x86_64", reuseBuild, enableFailFast, globalVars, "x86_64", additionalParameters)
launchJob(pipeline, "/LLM/helpers/Build-x86_64", reuseBuild, enableFailFast, globalVars, "x86_64", additionalParameters)
}
def testStageName = "[Test-x86_64-Single-GPU] ${env.localJobCredentials ? "Remote Run" : "Run"}"
def testStageName = "[Test-x86_64-Single-GPU] Remote Run"
def singleGpuTestFailed = false
stage(testStageName) {
if (X86_TEST_CHOICE == STAGE_CHOICE_SKIP) {
Expand All @@ -1041,7 +1027,7 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
'wheelDockerImagePy312': globalVars["LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE"],
]

launchJob("L0_Test-x86_64-Single-GPU", false, enableFailFast, globalVars, "x86_64", additionalParameters)
launchJob(pipeline, "L0_Test-x86_64-Single-GPU", false, enableFailFast, globalVars, "x86_64", additionalParameters)
} catch (InterruptedException e) {
throw e
} catch (Exception e) {
Expand Down Expand Up @@ -1082,7 +1068,7 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
}
}

testStageName = "[Test-x86_64-Multi-GPU] ${env.localJobCredentials ? "Remote Run" : "Run"}"
testStageName = "[Test-x86_64-Multi-GPU] Remote Run"
stage(testStageName) {
if (X86_TEST_CHOICE == STAGE_CHOICE_SKIP) {
echo "x86_64 test job is skipped due to Jenkins configuration"
Expand All @@ -1097,7 +1083,7 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
'wheelDockerImagePy312': globalVars["LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE"],
]

launchJob("L0_Test-x86_64-Multi-GPU", false, enableFailFast, globalVars, "x86_64", additionalParameters)
launchJob(pipeline, "L0_Test-x86_64-Multi-GPU", false, enableFailFast, globalVars, "x86_64", additionalParameters)

} catch (InterruptedException e) {
throw e
Expand All @@ -1119,7 +1105,7 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
script {
def jenkinsUrl = ""
def credentials = ""
def testStageName = "[Test-SBSA-Single-GPU] ${env.localJobCredentials ? "Remote Run" : "Run"}"
def testStageName = "[Test-SBSA-Single-GPU] Remote Run"
def singleGpuTestFailed = false

if (testFilter[(ONLY_ONE_GROUP_CHANGED)] == "Docs") {
Expand All @@ -1131,7 +1117,7 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
def additionalParameters = [
"dockerImage": globalVars["LLM_SBSA_DOCKER_IMAGE"],
]
launchJob("/LLM/helpers/Build-SBSA", reuseBuild, enableFailFast, globalVars, "SBSA", additionalParameters)
launchJob(pipeline, "/LLM/helpers/Build-SBSA", reuseBuild, enableFailFast, globalVars, "SBSA", additionalParameters)
}
stage(testStageName) {
if (SBSA_TEST_CHOICE == STAGE_CHOICE_SKIP) {
Expand All @@ -1145,7 +1131,7 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
"dockerImage": globalVars["LLM_SBSA_DOCKER_IMAGE"],
]

launchJob("L0_Test-SBSA-Single-GPU", false, enableFailFast, globalVars, "SBSA", additionalParameters)
launchJob(pipeline, "L0_Test-SBSA-Single-GPU", false, enableFailFast, globalVars, "SBSA", additionalParameters)
} catch (InterruptedException e) {
throw e
} catch (Exception e) {
Expand Down Expand Up @@ -1186,7 +1172,7 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
}
}

testStageName = "[Test-SBSA-Multi-GPU] ${env.localJobCredentials ? "Remote Run" : "Run"}"
testStageName = "[Test-SBSA-Multi-GPU] Remote Run"
stage(testStageName) {
if (SBSA_TEST_CHOICE == STAGE_CHOICE_SKIP) {
echo "SBSA test job is skipped due to Jenkins configuration"
Expand All @@ -1199,7 +1185,7 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
"dockerImage": globalVars["LLM_SBSA_DOCKER_IMAGE"],
]

launchJob("L0_Test-SBSA-Multi-GPU", false, enableFailFast, globalVars, "SBSA", additionalParameters)
launchJob(pipeline, "L0_Test-SBSA-Multi-GPU", false, enableFailFast, globalVars, "SBSA", additionalParameters)

} catch (InterruptedException e) {
throw e
Expand Down Expand Up @@ -1234,7 +1220,7 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
'runSanityCheck': env.JOB_NAME ==~ /.*PostMerge.*/ ? true : false,
]

launchJob("/LLM/helpers/BuildDockerImages", false, enableFailFast, globalVars, "x86_64", additionalParameters)
launchJob(pipeline, "/LLM/helpers/BuildDockerImages", false, enableFailFast, globalVars, "x86_64", additionalParameters)
}
}
}
Expand Down