Skip to content

Commit 037c8af

Browse files
committed
Refactor: Renamed to SemaphoreProxy
Avoids confusion over use of "MultiArticle". Main use case for this is to hide the actual call to Semaphore so it can be mocked easily.
1 parent 5e51ef6 commit 037c8af

File tree

7 files changed

+26
-27
lines changed

7 files changed

+26
-27
lines changed
Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,10 @@
99

1010
/**
1111
* Provides an abstraction over the call to Semaphore that sends a document containing multiple articles and
12-
* then receives a document containing a classification for each article.
13-
*
14-
* This will get a new name soon now that its scope is more than just classifying a set of articles.
12+
* then receives a document containing a classification for each article. Main use case is to enable easy mocking of
13+
* the calls to Semaphore for testing purposes.
1514
*/
16-
public interface MultiArticleClassifier extends Closeable {
15+
public interface SemaphoreProxy extends Closeable {
1716

1817
byte[] classifyDocument(byte[] content, String uri);
1918

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@
1515
* Not threadsafe, as the config of the underlying client is modified. This is fine in the context of the connector,
1616
* where it is only used by a single Spark data writer.
1717
*/
18-
class SemaphoreMultiArticleClassifier implements MultiArticleClassifier {
18+
class SemaphoreProxyImpl implements SemaphoreProxy {
1919

2020
private final ClassificationClient classificationClient;
2121
private long totalDurationOfCalls;
2222

23-
public SemaphoreMultiArticleClassifier(ClassificationConfiguration config) {
23+
public SemaphoreProxyImpl(ClassificationConfiguration config) {
2424
this.classificationClient = new ClassificationClient();
2525
config.setMultiArticle(true);
2626
classificationClient.setClassificationConfiguration(config);

marklogic-spark-api/src/main/java/com/marklogic/spark/core/classifier/SemaphoreTextClassifier.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,15 @@ class SemaphoreTextClassifier implements TextClassifier {
3434

3535
static final Logger SEMAPHORE_LOGGER = LoggerFactory.getLogger("com.marklogic.semaphore.classifier");
3636

37-
private final MultiArticleClassifier multiArticleClassifier;
37+
private final SemaphoreProxy semaphoreProxy;
3838
private final DOMHelper domHelper;
3939
private final Transformer transformer;
4040
private final String encoding;
4141
private final XPathExpression articleExpression;
4242
private final int batchSize;
4343

44-
SemaphoreTextClassifier(MultiArticleClassifier multiArticleClassifier, String encoding, int batchSize) {
45-
this.multiArticleClassifier = multiArticleClassifier;
44+
SemaphoreTextClassifier(SemaphoreProxy semaphoreProxy, String encoding, int batchSize) {
45+
this.semaphoreProxy = semaphoreProxy;
4646
this.domHelper = new DOMHelper(null);
4747
this.transformer = newTransformer();
4848
this.encoding = encoding;
@@ -53,7 +53,7 @@ class SemaphoreTextClassifier implements TextClassifier {
5353
@Override
5454
public void classifyDocument(DocumentInputs inputs) {
5555
byte[] content = inputs.getContentAsBytes();
56-
byte[] classification = multiArticleClassifier.classifyDocument(content, inputs.getInitialUri());
56+
byte[] classification = semaphoreProxy.classifyDocument(content, inputs.getInitialUri());
5757
inputs.setDocumentClassification(classification);
5858
}
5959

@@ -82,7 +82,7 @@ private void classifyBatch(List<ClassifiableContent> contentBatch) {
8282

8383
Document structuredDocument;
8484
try {
85-
structuredDocument = multiArticleClassifier.classifyArticles(documentBytes);
85+
structuredDocument = semaphoreProxy.classifyArticles(documentBytes);
8686
} catch (Exception e) {
8787
throw new ConnectorException(String.format("Unable to classify content, cause: %s", e.getMessage()), e);
8888
}
@@ -96,7 +96,7 @@ private void classifyBatch(List<ClassifiableContent> contentBatch) {
9696

9797
@Override
9898
public void close() throws IOException {
99-
IOUtils.closeQuietly(multiArticleClassifier);
99+
IOUtils.closeQuietly(semaphoreProxy);
100100
}
101101

102102
private Document buildMultiArticleRequest(List<ClassifiableContent> classifiableContents) {

marklogic-spark-api/src/main/java/com/marklogic/spark/core/classifier/TextClassifierFactory.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,17 @@ public abstract class TextClassifierFactory {
2424
private static final String MOCK_CLASSIFIER_OPTION = "spark.marklogic.testing.mockClassifierResponse";
2525

2626
public static TextClassifier newTextClassifier(Context context) {
27-
MultiArticleClassifier multiArticleClassifier = null;
27+
SemaphoreProxy semaphoreProxy = null;
2828
final String host = context.getStringOption(Options.WRITE_CLASSIFIER_HOST);
2929

3030
if (context.hasOption(MOCK_CLASSIFIER_OPTION)) {
3131
String mockResponse = context.getStringOption(MOCK_CLASSIFIER_OPTION);
3232
assert mockResponse != null;
33-
multiArticleClassifier = new MockTextClassifier(mockResponse);
33+
semaphoreProxy = new MockSemaphoreProxy(mockResponse);
3434
} else if (host != null && !host.trim().isEmpty()) {
3535
try {
3636
ClassificationConfiguration config = buildClassificationConfiguration(context);
37-
multiArticleClassifier = new SemaphoreMultiArticleClassifier(config);
37+
semaphoreProxy = new SemaphoreProxyImpl(config);
3838
} catch (ConnectorException ex) {
3939
throw ex;
4040
} catch (Exception e) {
@@ -43,11 +43,11 @@ public static TextClassifier newTextClassifier(Context context) {
4343
}
4444
}
4545

46-
if (multiArticleClassifier != null) {
46+
if (semaphoreProxy != null) {
4747
// We may need a dedicated encoding for this
4848
String encoding = context.getStringOption(Options.READ_FILES_ENCODING, "UTF-8");
4949
int batchSize = context.getIntOption(Options.WRITE_CLASSIFIER_BATCH_SIZE, 20, 1);
50-
return new SemaphoreTextClassifier(multiArticleClassifier, encoding, batchSize);
50+
return new SemaphoreTextClassifier(semaphoreProxy, encoding, batchSize);
5151
}
5252
return null;
5353
}
@@ -133,15 +133,15 @@ private TextClassifierFactory() {
133133

134134
// Sonar doesn't like static assignments in this class, but this class is only used as a mock for testing.
135135
@SuppressWarnings("java:S2696")
136-
public static class MockTextClassifier implements MultiArticleClassifier {
136+
public static class MockSemaphoreProxy implements SemaphoreProxy {
137137

138138
private final Document mockResponse;
139139
private static int timesInvoked;
140140
private static boolean wasClosed;
141141

142142
// Sonar doesn't like this static assignment, but it's fine in a class that's only used as a mock.
143143
@SuppressWarnings("java:S3010")
144-
private MockTextClassifier(String mockResponse) {
144+
private MockSemaphoreProxy(String mockResponse) {
145145
this.mockResponse = new DOMHelper(null).parseXmlString(mockResponse, null);
146146
timesInvoked = 0;
147147
}

tests/src/test/java/com/marklogic/spark/writer/classifier/AddClassificationToJsonTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ void chunkAndAddClassificationToJsonInOriginalJsonDoc() {
2626
.mode(SaveMode.Append)
2727
.save();
2828

29-
assertTrue(TextClassifierFactory.MockTextClassifier.isClosed());
29+
assertTrue(TextClassifierFactory.MockSemaphoreProxy.isClosed());
3030

3131
JsonNode doc = readJsonDocument("/split-test.json");
3232
assertTrue(doc.get("classification").has("STRUCTUREDDOCUMENT"));
@@ -47,7 +47,7 @@ void sidecarChunksAddClassificationToJson() {
4747
.mode(SaveMode.Append)
4848
.save();
4949

50-
assertTrue(TextClassifierFactory.MockTextClassifier.isClosed());
50+
assertTrue(TextClassifierFactory.MockSemaphoreProxy.isClosed());
5151

5252
JsonNode doc = readJsonDocument("/split-test.json");
5353
assertTrue(doc.get("classification").has("STRUCTUREDDOCUMENT"));
@@ -82,7 +82,7 @@ void classifyJsonContentsWithoutChunking() {
8282
.mode(SaveMode.Append)
8383
.save();
8484

85-
assertTrue(TextClassifierFactory.MockTextClassifier.isClosed());
85+
assertTrue(TextClassifierFactory.MockSemaphoreProxy.isClosed());
8686

8787
JsonNode doc = readJsonDocument("/split-test.json");
8888
assertTrue(doc.get("classification").has("STRUCTUREDDOCUMENT"));
@@ -98,7 +98,7 @@ void chunkAndAddClassificationOnlyToChunksInOriginalDoc() {
9898
.mode(SaveMode.Append)
9999
.save();
100100

101-
assertTrue(TextClassifierFactory.MockTextClassifier.isClosed());
101+
assertTrue(TextClassifierFactory.MockSemaphoreProxy.isClosed());
102102

103103
JsonNode doc = readJsonDocument("/split-test.json");
104104
assertEquals(4, doc.get("chunks").size(), "Expecting 4 chunks based on max chunk size of 500.");

tests/src/test/java/com/marklogic/spark/writer/classifier/AddClassificationToXmlTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ void chunkAndAddClassificationToXmlInOriginalDoc() {
2525
.mode(SaveMode.Append)
2626
.save();
2727

28-
assertTrue(TextClassifierFactory.MockTextClassifier.isClosed());
28+
assertTrue(TextClassifierFactory.MockSemaphoreProxy.isClosed());
2929

3030
XmlNode doc = readXmlDocument("/split-test.xml");
3131
doc.assertElementExists("Expecting each chunk to have a 'model:classification' child element",
@@ -44,7 +44,7 @@ void sidecarChunksAddClassificationToXml() {
4444
.mode(SaveMode.Append)
4545
.save();
4646

47-
assertTrue(TextClassifierFactory.MockTextClassifier.isClosed());
47+
assertTrue(TextClassifierFactory.MockSemaphoreProxy.isClosed());
4848

4949
XmlNode doc = readXmlDocument("/split-test.xml");
5050
doc.assertElementExists("Expecting the root of the document to have a 'model:classification' child element",
@@ -75,7 +75,7 @@ void classifyXmlContentsWithoutChunking() {
7575
.mode(SaveMode.Append)
7676
.save();
7777

78-
assertTrue(TextClassifierFactory.MockTextClassifier.isClosed());
78+
assertTrue(TextClassifierFactory.MockSemaphoreProxy.isClosed());
7979

8080
XmlNode doc = readXmlDocument("/split-test.xml");
8181
doc.assertElementExists("Expecting the root of the document to have a 'model:classification' child element",

tests/src/test/java/com/marklogic/spark/writer/classifier/ClassifyExtractedTextTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ void withBatchSize() {
7373
.mode(SaveMode.Append)
7474
.save();
7575

76-
assertEquals(3, TextClassifierFactory.MockTextClassifier.getTimesInvoked(), "The mock classifier should " +
76+
assertEquals(3, TextClassifierFactory.MockSemaphoreProxy.getTimesInvoked(), "The mock classifier should " +
7777
"have been invoked 3 times - with 10, 10, and then 8 articles.");
7878
}
7979

0 commit comments

Comments
 (0)