Skip to content

Commit eed50fa

Browse files
committed
Check if log available for Spark job container
Signed-off-by: Wei Zhang <[email protected]>
1 parent c72e10a commit eed50fa

File tree

1 file changed

+54
-36
lines changed
  • Utils/hdinsight-node-common/src/com/microsoft/azure/hdinsight/spark/common

1 file changed

+54
-36
lines changed

Utils/hdinsight-node-common/src/com/microsoft/azure/hdinsight/spark/common/SparkBatchJob.java

Lines changed: 54 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
import com.gargoylesoftware.htmlunit.Cache;
2727
import com.gargoylesoftware.htmlunit.ScriptException;
2828
import com.gargoylesoftware.htmlunit.WebClient;
29+
import com.gargoylesoftware.htmlunit.html.DomElement;
30+
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
2931
import com.gargoylesoftware.htmlunit.html.HtmlPage;
3032
import com.gargoylesoftware.htmlunit.html.HtmlTableBody;
3133
import com.microsoft.azure.hdinsight.common.MessageInfoType;
@@ -42,6 +44,7 @@
4244
import rx.Subscriber;
4345

4446
import java.io.IOException;
47+
import java.net.MalformedURLException;
4548
import java.net.URI;
4649
import java.net.UnknownServiceException;
4750
import java.util.AbstractMap.SimpleImmutableEntry;
@@ -452,53 +455,36 @@ public Observable<AppAttempt> getSparkJobYarnCurrentAppAttempt() {
452455
* @return The string pair Observable of Host and Container Id
453456
*/
454457
public Observable<SimpleImmutableEntry<String, String>> getSparkJobYarnContainersObservable(@NotNull AppAttempt appAttempt) {
455-
return Observable.create((Subscriber<? super HtmlPage> ob) -> {
456-
String getYarnUiAppAttemptsURL = getConnectUri()
457-
.resolve("/yarnui/hn/cluster/appattempt/")
458-
.resolve(appAttempt.getAppAttemptId())
459-
.toString();
460-
461-
462-
final WebClient HTTP_WEB_CLIENT = new WebClient(BrowserVersion.CHROME);
463-
HTTP_WEB_CLIENT.setCache(globalCache);
464-
465-
if (getSubmission().getCredentialsProvider() != null) {
466-
HTTP_WEB_CLIENT.setCredentialsProvider(getSubmission().getCredentialsProvider());
467-
}
468-
469-
try {
470-
ob.onNext(HTTP_WEB_CLIENT.getPage(getYarnUiAppAttemptsURL));
471-
} catch (IOException e) {
472-
log().warn("get Spark job Yarn attempts detail IO Error", e);
473-
ob.onError(e);
474-
} catch (ScriptException ignored) {
475-
log().debug("get Spark job Yarn attempts detail browser rendering Error", ignored);
476-
} finally {
477-
ob.onCompleted();
478-
}
479-
})
458+
return Observable.just(appAttempt)
459+
.map(attempt -> getConnectUri()
460+
.resolve("/yarnui/hn/cluster/appattempt/")
461+
.resolve(attempt.getAppAttemptId())
462+
.toString())
463+
.flatMap(this::loadPageByBrowserObservable)
480464
.retry(getRetriesMax())
481-
.delay(3, TimeUnit.SECONDS) // Workaround to waiting for the page loading finished
482465
.repeatWhen(ob -> ob.delay(getDelaySeconds(), TimeUnit.SECONDS))
483466
.takeUntil(this::isSparkJobYarnAppAttemptNotJustLaunched)
484467
.filter(this::isSparkJobYarnAppAttemptNotJustLaunched)
485468
.flatMap(htmlPage -> {
486-
// Get the container table by XPath
487-
HtmlTableBody containerBody = htmlPage.getFirstByXPath("//*[@id=\"containers\"]/tbody");
488-
489-
return Observable
490-
.from(containerBody
491-
.getRows()
492-
.stream()
469+
// Get the container table by XPath
470+
HtmlTableBody containerBody = htmlPage.getFirstByXPath("//*[@id=\"containers\"]/tbody");
471+
472+
return Observable
473+
.from(containerBody.getRows())
474+
.flatMap(row -> Observable.just(getConnectUri())
475+
.map(baseUri -> baseUri.resolve(((HtmlAnchor) row.getCell(3).getFirstChild())
476+
.getHrefAttribute())
477+
.toString())
478+
.flatMap(this::loadPageByBrowserObservable)
479+
.filter(this::isSparkJobYarnContainerLogAvailable)
480+
.map(page -> row))
493481
.map(row -> {
494482
String hostUrl = row.getCell(1).getTextContent().trim();
495483
String host = URI.create(hostUrl).getHost();
496484
String containerId = row.getCell(0).getTextContent().trim();
497485

498486
return new SimpleImmutableEntry<>(host, containerId);
499-
})
500-
.collect(Collectors.toList())
501-
);
487+
});
502488
});
503489
}
504490

@@ -526,6 +512,38 @@ private Boolean isSparkJobYarnAppAttemptNotJustLaunched(@NotNull HtmlPage htmlPa
526512
.orElse(false);
527513
}
528514

515+
private Boolean isSparkJobYarnContainerLogAvailable(@NotNull HtmlPage htmlPage) {
516+
Optional<DomElement> firstContent = Optional.ofNullable(
517+
htmlPage.getFirstByXPath("//*[@id=\"layout\"]/tbody/tr/td[2]"));
518+
519+
return firstContent.map(DomElement::getTextContent)
520+
.map(line -> !line.trim()
521+
.toLowerCase()
522+
.contains("no logs available"))
523+
.orElse(false);
524+
}
525+
526+
private Observable<HtmlPage> loadPageByBrowserObservable(String url) {
527+
final WebClient HTTP_WEB_CLIENT = new WebClient(BrowserVersion.CHROME);
528+
HTTP_WEB_CLIENT.setCache(globalCache);
529+
530+
if (getSubmission().getCredentialsProvider() != null) {
531+
HTTP_WEB_CLIENT.setCredentialsProvider(getSubmission().getCredentialsProvider());
532+
}
533+
534+
return Observable.create(ob -> {
535+
try {
536+
ob.onNext(HTTP_WEB_CLIENT.getPage(url));
537+
} catch (ScriptException ignored) {
538+
log().debug("get Spark job Yarn attempts detail browser rendering Error", ignored);
539+
} catch (IOException e) {
540+
ob.onError(e);
541+
} finally {
542+
ob.onCompleted();
543+
}
544+
});
545+
}
546+
529547
/**
530548
* Get Spark Job driver log URL with retries
531549
*

0 commit comments

Comments
 (0)