Skip to content

Commit bbb803c

Browse files
committed
rolled in review comments
1 parent 42ca956 commit bbb803c

File tree

4 files changed

+360
-190
lines changed

4 files changed

+360
-190
lines changed

javav2/example_code/entityresolution/src/main/java/com/example/entity/scenario/CloudFormationHelper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ public static void deployCloudFormationStack(String stackName) {
100100
}
101101
}).join();
102102
} else {
103-
logger.info("{} stack already exists", CFN_TEMPLATE);
103+
logger.info("{} stack already exists", stackName);
104104
}
105105
}
106106

javav2/example_code/entityresolution/src/main/java/com/example/entity/scenario/EntityResScenario.java

Lines changed: 89 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66

77
import software.amazon.awssdk.core.exception.SdkException;
8+
import software.amazon.awssdk.services.cloudformation.model.CloudFormationException;
89
import software.amazon.awssdk.services.entityresolution.model.AccessDeniedException;
910
import software.amazon.awssdk.services.entityresolution.model.ConflictException;
1011
import software.amazon.awssdk.services.entityresolution.model.CreateSchemaMappingResponse;
@@ -17,6 +18,7 @@
1718
import software.amazon.awssdk.services.entityresolution.model.ResourceNotFoundException;
1819
import software.amazon.awssdk.services.entityresolution.model.ThrottlingException;
1920
import software.amazon.awssdk.services.entityresolution.model.ValidationException;
21+
import software.amazon.awssdk.services.s3.model.S3Exception;
2022

2123
import java.util.Map;
2224
import java.util.Scanner;
@@ -115,18 +117,18 @@ private static void runScenario() throws InterruptedException {
115117
Entity Resolution service.
116118
*/
117119
String json = """
118-
{"id":"1","name":"Alice Johnson","email":"alice.johnson@example.com"}
119-
{"id":"2","name":"Bob Smith","email":"bob.smith@example.com"}
120-
{"id":"3","name":"Charlie Black","email":"charlie.black@example.com"}
120+
{"id":"1","name":"Jane Doe","email":"jane.doe@example.com"}
121+
{"id":"2","name":"John Doe","email":"john.doe@example.com"}
122+
{"id":"3","name":"Jorge Souza","email":"jorge_souza@example.com"}
121123
""";
122124
logger.info("Upload the following JSON objects to the {} S3 bucket.", glueBucketName);
123125
logger.info(json);
124126
String csv = """
125127
id,name,email,phone
126-
1,Alice B. Johnson,alice.johnson@example.com,746-876-9846
127-
2,Bob Smith Jr.,bob.smith@example.com,987-654-3210
128-
3,Charlie Black,charlie.black@company.com,345-567-1234
129-
7,Jane E. Doe,jane_doe@company.com,111-222-3333
128+
1,Jane B.,Doe,jane.doe@example.com,555-876-9846
129+
2,John Doe Jr.,john.doe@example.com,555-654-3210
130+
3,María García,maría_garcia@company.com,555-567-1234
131+
4,Mary Major,mary_major@company.com,555-222-3333
130132
""";
131133
logger.info("Upload the following CSV data to the {} S3 bucket.", glueBucketName);
132134
logger.info(csv);
@@ -158,7 +160,7 @@ private static void runScenario() throws InterruptedException {
158160
and uses machine learning to link related entities, enabling a
159161
consolidated, accurate view for improved data quality and decision-making.
160162
161-
In this example, the schema mapping lines up with the fields in the JSON ans CSV objects. That is,
163+
In this example, the schema mapping lines up with the fields in the JSON and CSV objects. That is,
162164
it contains these fields: id, name, and email.
163165
""");
164166
try {
@@ -328,8 +330,8 @@ private static void runScenario() throws InterruptedException {
328330
You cannot view the result of the workflow that is in a running state.
329331
In order to view the results, you need to wait for the workflow that we started in step 3 to complete.
330332
331-
If you choose not to wait, you cannot view the results or delete the workflow. You would have to
332-
perform both tasks manually in the AWS Management Console.
333+
If you choose not to wait, you cannot view the results. You can perform
334+
this task manually in the AWS Management Console.
333335
334336
This can take up to 30 mins (y/n).
335337
""");
@@ -343,27 +345,26 @@ private static void runScenario() throws InterruptedException {
343345
logger.info("Number of match ids: {}", metrics.matchIDs());
344346
logger.info("Number of records not processed: {}", metrics.recordsNotProcessed());
345347
logger.info("Number of total records processed: {}", metrics.totalRecordsProcessed());
348+
logger.info("The following represents the actual output data generated by the Entity Resolution workflow based on the JSON and CSV input data. The output data is stored in the {} bucket.", glueBucketName);
346349
logger.info("""
347-
348-
The output of the machinelearning-based matching job is a CSV file in the S3 bucket. The following is a sample of the output:
349-
350+
350351
------------------------------------------------------------------------------ ----------------- ---- ------------------ --------------------------- -------------- ---------- ---------------------------------------------------\s
351352
InputSourceARN ConfidenceLevel id name email phone RecordId MatchID \s
352353
------------------------------------------------------------------------------ ----------------- ---- ------------------ --------------------------- -------------- ---------- ---------------------------------------------------\s
353-
arn:aws:glue:region:xxxxxxxxxxxx:table/entity_resolution_db/csvgluetable 7 Jane E. Doe [email protected] 111-222-3333 7 036298535ed6471ebfc358fc76e1f51200006472446402560 \s
354-
arn:aws:glue:region:xxxxxxxxxxxx:table/entity_resolution_db/csvgluetable 0.90523 2 Bob Smith Jr. [email protected] 987-654-3210 2 6ae2d360d6594089837eafc31b20f31600003506806140928 \s
355-
arn:aws:glue:region:xxxxxxxxxxxx:table/entity_resolution_db/jsongluetable 0.90523 2 Bob Smith [email protected] 2 6ae2d360d6594089837eafc31b20f31600003506806140928 \s
356-
arn:aws:glue:region:xxxxxxxxxxxx:table/entity_resolution_db/csvgluetable 0.89398956 1 Alice B. Johnson [email protected] 746-876-9846 1 34a5075b289247efa1847ab292ed677400009137438953472 \s
357-
arn:aws:glue:region:xxxxxxxxxxxx:table/entity_resolution_db/jsongluetable 0.89398956 1 Alice Johnson [email protected] 1 34a5075b289247efa1847ab292ed677400009137438953472 \s
358-
arn:aws:glue:region:xxxxxxxxxxxx:table/entity_resolution_db/csvgluetable 0.605295 3 Charlie Black [email protected] 345-567-1234 3 92c8ef3f68b34948a3af998d700ed02700002146028888064 \s
359-
arn:aws:glue:region:xxxxxxxxxxxx:table/entity_resolution_db/jsongluetable 0.605295 3 Charlie Black [email protected] 3 92c8ef3f68b34948a3af998d700ed02700002146028888064 \s
360-
361-
Note that each of the last 3 pairs of records are considered a match even though the 'name' or 'email' differ between the records;
362-
For example 'Bob Smith Jr.' compared to 'Bob Smith'.
363-
The confidence level is a value between 0 and 1, where 1 indicates a perfect match. In the last pair of matched records,
364-
the confidence level is lower for the differing email addresses.
365-
354+
355+
arn:aws:glue:us-east-1:xxxxxxxxxxxx:table/entity_resolution_db/csvgluetable Mary Major [email protected], 555-222-3333 4 ec05e7a55a0d4319b86da0a65286118f000040 \s
356+
arn:aws:glue:us-east-1:xxxxxxxxxxxx:table/entity_resolution_db/csvgluetable 0.605295 3 María García marí[email protected] 555-567-1234 3 201ed8241ec04f9aa7fcfd962220580500001369367187456 \s
357+
arn:aws:glue:us-east-1:xxxxxxxxxxxx:table/entity_resolution_db/jsongluetable 1 Jane Doe [email protected] 1 895c3a439dc44a298663d52c08635e1a0000434359738368 \s
358+
arn:aws:glue:us-east-1:xxxxxxxxxxxx:table/entity_resolution_db/csvgluetable 1 Jane B.Doe [email protected] 1 69c2b2190c60427c8f5a2daa7ce5d45b00001463856467968 \s
359+
arn:aws:glue:us-east-1:xxxxxxxxxxxx:table/entity_resolution_db/jsongluetable 0.8914204 2 John Doe [email protected] 2 fbeda81b4c72429382c064b20cd592ff00001386547056640 \s
360+
arn:aws:glue:us-east-1::xxxxxxxxxxxx:table/entity_resolution_db/csvgluetable 0.8914204 2 John Doe Jr. [email protected] 555-654-3210 2 fbeda81b4c72429382c064b20cd592ff00001386547056640 \s
361+
362+
Note that each of the last 2 records are considered a match even though the 'name' differs between the records;
363+
For example 'John Doe Jr.' compared to 'John Doe'.
364+
The confidence level is a value between 0 and 1, where 1 indicates a perfect match.
365+
366366
""");
367+
367368
} catch (CompletionException ce) {
368369
Throwable cause = ce.getCause();
369370
if (cause instanceof ResourceNotFoundException) {
@@ -373,49 +374,64 @@ private static void runScenario() throws InterruptedException {
373374
}
374375
return;
375376
}
377+
}
376378

377-
logger.info("Do you want to delete the resources, including the workflow? (y/n)");
378-
String delAns = scanner.nextLine().trim();
379-
if (delAns.equalsIgnoreCase("y")) {
380-
try {
381-
actions.deleteMatchingWorkflowAsync(workflowName).join();
382-
logger.info("Workflow deleted successfully!");
383-
} catch (CompletionException ce) {
384-
Throwable cause = ce.getCause();
385-
logger.info("Failed to delete workflow: " + (cause != null ? cause.getMessage() : ce.getMessage()));
386-
return;
387-
}
379+
waitForInputToContinue(scanner);
380+
logger.info(DASHES);
388381

389-
try {
390-
// Delete both schema mappings.
391-
actions.deleteSchemaMappingAsync(jsonSchemaMappingName).join();
392-
actions.deleteSchemaMappingAsync(csvSchemaMappingName).join();
393-
logger.info("Both schema mappings were deleted successfully!");
394-
} catch (RuntimeException e) {
395-
logger.error("Error deleting schema mapping: {}", e.getMessage());
396-
return;
397-
}
382+
logger.info(DASHES);
383+
logger.info("9. Do you want to delete the resources, including the workflow? (y/n)");
384+
logger.info("""
385+
You cannot delete the workflow that is in a running state.
386+
In order to delete the workflow, you need to wait for the workflow to complete.
387+
388+
You can delete the workflow manually in the AWS Management Console at a later time.
389+
390+
If you already waited for the workflow to complete in the previous step,
391+
the workflow is completed and you can delete it.
392+
393+
If the workflow is not completed, this can take up to 30 mins (y/n).
394+
""");
395+
String delAns = scanner.nextLine().trim();
396+
if (delAns.equalsIgnoreCase("y")) {
397+
try {
398+
countdownWithWorkflowCheck(actions, 1800, jobId, workflowName);
399+
actions.deleteMatchingWorkflowAsync(workflowName).join();
400+
logger.info("Workflow deleted successfully!");
401+
} catch (CompletionException ce) {
402+
logger.info("Error deleting the workflow: {} ", ce.getMessage());
403+
return;
404+
}
398405

399-
waitForInputToContinue(scanner);
400-
logger.info(DASHES);
401-
logger.info("""
402-
Now we delete the CloudFormation stack, which deletes
403-
the resources that were created at the beginning
404-
""");
405-
waitForInputToContinue(scanner);
406-
logger.info(DASHES);
407-
try {
408-
deleteResources();
409-
} catch (CompletionException ce) {
410-
Throwable cause = ce.getCause();
411-
logger.error("Failed to delete Glue Table: {}", cause != null ? cause.getMessage() : ce.getMessage());
412-
return;
413-
}
406+
try {
407+
// Delete both schema mappings.
408+
actions.deleteSchemaMappingAsync(jsonSchemaMappingName).join();
409+
actions.deleteSchemaMappingAsync(csvSchemaMappingName).join();
410+
logger.info("Both schema mappings were deleted successfully!");
411+
} catch (CompletionException ce) {
412+
logger.error("Error deleting schema mapping: {}", ce.getMessage());
413+
return;
414+
}
414415

415-
} else {
416-
logger.info("You can delete the Workflow later in the AWS Management console.");
416+
waitForInputToContinue(scanner);
417+
logger.info(DASHES);
418+
logger.info("""
419+
Now we delete the CloudFormation stack, which deletes
420+
the resources that were created at the beginning of this scenario.
421+
""");
422+
waitForInputToContinue(scanner);
423+
logger.info(DASHES);
424+
try {
425+
deleteCloudFormationStack();
426+
} catch (RuntimeException e) {
427+
logger.error("Failed to delete the stack: {}", e.getMessage());
428+
return;
417429
}
430+
431+
} else {
432+
logger.info("You can delete the AWS resources in the AWS Management Console.");
418433
}
434+
419435
waitForInputToContinue(scanner);
420436
logger.info(DASHES);
421437

@@ -472,10 +488,16 @@ public static void countdownWithWorkflowCheck(EntityResActions actions, int tota
472488
}
473489
}
474490

475-
private static void deleteResources() {
476-
CloudFormationHelper.emptyS3Bucket(glueBucketName);
477-
CloudFormationHelper.destroyCloudFormationStack(STACK_NAME);
478-
logger.info("Resources deleted successfully!");
491+
private static void deleteCloudFormationStack() {
492+
try {
493+
CloudFormationHelper.emptyS3Bucket(glueBucketName);
494+
CloudFormationHelper.destroyCloudFormationStack(STACK_NAME);
495+
logger.info("Resources deleted successfully!");
496+
} catch (CloudFormationException e) {
497+
throw new RuntimeException("Failed to delete CloudFormation stack: " + e.getMessage(), e);
498+
} catch (S3Exception e) {
499+
throw new RuntimeException("Failed to empty S3 bucket: " + e.getMessage(), e);
500+
}
479501
}
480502
}
481503
// snippet-end:[entityres.java2_scenario.main]

0 commit comments

Comments
 (0)