|
1032 | 1032 | "min":1,
|
1033 | 1033 | "pattern":"^[a-zA-Z0-9](-*[a-zA-Z0-9])*"
|
1034 | 1034 | },
|
| 1035 | + "AugmentedManifestsDocumentTypeFormat":{ |
| 1036 | + "type":"string", |
| 1037 | + "enum":[ |
| 1038 | + "PLAIN_TEXT_DOCUMENT", |
| 1039 | + "SEMI_STRUCTURED_DOCUMENT" |
| 1040 | + ] |
| 1041 | + }, |
1035 | 1042 | "AugmentedManifestsListItem":{
|
1036 | 1043 | "type":"structure",
|
1037 | 1044 | "required":[
|
|
1046 | 1053 | "AttributeNames":{
|
1047 | 1054 | "shape":"AttributeNamesList",
|
1048 | 1055 | "documentation":"<p>The JSON attribute that contains the annotations for your training documents. The number of attribute names that you specify depends on whether your augmented manifest file is the output of a single labeling job or a chained labeling job.</p> <p>If your file is the output of a single labeling job, specify the LabelAttributeName key that was used when the job was created in Ground Truth.</p> <p>If your file is the output of a chained labeling job, specify the LabelAttributeName key for one or more jobs in the chain. Each LabelAttributeName key provides the annotations from an individual job.</p>"
|
| 1056 | + }, |
| 1057 | + "AnnotationDataS3Uri":{ |
| 1058 | + "shape":"S3Uri", |
| 1059 | + "documentation":"<p>The S3 prefix to the annotation files that are referred in the augmented manifest file.</p>" |
| 1060 | + }, |
| 1061 | + "SourceDocumentsS3Uri":{ |
| 1062 | + "shape":"S3Uri", |
| 1063 | + "documentation":"<p>The S3 prefix to the source files (PDFs) that are referred to in the augmented manifest file.</p>" |
| 1064 | + }, |
| 1065 | + "DocumentType":{ |
| 1066 | + "shape":"AugmentedManifestsDocumentTypeFormat", |
| 1067 | + "documentation":"<p>The type of augmented manifest. PlainTextDocument or SemiStructuredDocument. If you don't specify, the default is PlainTextDocument. </p> <ul> <li> <p> <code>PLAIN_TEXT_DOCUMENT</code> A document type that represents any unicode text that is encoded in UTF-8.</p> </li> <li> <p> <code>SEMI_STRUCTURED_DOCUMENT</code> A document type with positional and structural context, like a PDF. For training with Amazon Comprehend, only PDFs are supported. For inference, Amazon Comprehend support PDFs, DOCX and TXT.</p> </li> </ul>" |
1049 | 1068 | }
|
1050 | 1069 | },
|
1051 | 1070 | "documentation":"<p>An augmented manifest file that provides training data for your custom model. An augmented manifest file is a labeled dataset that is produced by Amazon SageMaker Ground Truth.</p>"
|
|
2166 | 2185 | "DocumentClassifierArn":{
|
2167 | 2186 | "type":"string",
|
2168 | 2187 | "max":256,
|
2169 |
| - "pattern":"arn:aws(-[^:]+)?:comprehend:[a-zA-Z0-9-]*:[0-9]{12}:document-classifier/[a-zA-Z0-9](-*[a-zA-Z0-9])*" |
| 2188 | + "pattern":"arn:aws(-[^:]+)?:comprehend:[a-zA-Z0-9-]*:[0-9]{12}:document-classifier/[a-zA-Z0-9](-*[a-zA-Z0-9])*(/version/[a-zA-Z0-9](-*[a-zA-Z0-9])*)?" |
2170 | 2189 | },
|
2171 | 2190 | "DocumentClassifierAugmentedManifestsList":{
|
2172 | 2191 | "type":"list",
|
|
2333 | 2352 | },
|
2334 | 2353 | "documentation":"<p>Specifies one of the label or labels that categorize the document being analyzed.</p>"
|
2335 | 2354 | },
|
| 2355 | + "DocumentReadAction":{ |
| 2356 | + "type":"string", |
| 2357 | + "enum":[ |
| 2358 | + "TEXTRACT_DETECT_DOCUMENT_TEXT", |
| 2359 | + "TEXTRACT_ANALYZE_DOCUMENT" |
| 2360 | + ] |
| 2361 | + }, |
| 2362 | + "DocumentReadFeatureTypes":{ |
| 2363 | + "type":"string", |
| 2364 | + "documentation":"<p>A list of the types of analyses to perform. This field specifies what feature types need to be extracted from the document where entity recognition is expected.</p> <ul> <li> <p> <code>TABLES</code> - Add TABLES to the list to return information about the tables that are detected in the input document. </p> </li> <li> <p> <code>FORMS</code> - Add FORMS to return detected form data. </p> </li> </ul>", |
| 2365 | + "enum":[ |
| 2366 | + "TABLES", |
| 2367 | + "FORMS" |
| 2368 | + ] |
| 2369 | + }, |
| 2370 | + "DocumentReadMode":{ |
| 2371 | + "type":"string", |
| 2372 | + "enum":[ |
| 2373 | + "SERVICE_DEFAULT", |
| 2374 | + "FORCE_DOCUMENT_READ_ACTION" |
| 2375 | + ] |
| 2376 | + }, |
| 2377 | + "DocumentReaderConfig":{ |
| 2378 | + "type":"structure", |
| 2379 | + "required":["DocumentReadAction"], |
| 2380 | + "members":{ |
| 2381 | + "DocumentReadAction":{ |
| 2382 | + "shape":"DocumentReadAction", |
| 2383 | + "documentation":"<p>This enum field will start with two values which will apply to PDFs:</p> <ul> <li> <p> <code>TEXTRACT_DETECT_DOCUMENT_TEXT</code> - The service calls DetectDocumentText for PDF documents per page.</p> </li> <li> <p> <code>TEXTRACT_ANALYZE_DOCUMENT</code> - The service calls AnalyzeDocument for PDF documents per page.</p> </li> </ul>" |
| 2384 | + }, |
| 2385 | + "DocumentReadMode":{ |
| 2386 | + "shape":"DocumentReadMode", |
| 2387 | + "documentation":"<p>This enum field provides two values:</p> <ul> <li> <p> <code>SERVICE_DEFAULT</code> - use service defaults for Document reading. For Digital PDF it would mean using an internal parser instead of Textract APIs</p> </li> <li> <p> <code>FORCE_DOCUMENT_READ_ACTION</code> - Always use specified action for DocumentReadAction, including Digital PDF. </p> </li> </ul>" |
| 2388 | + }, |
| 2389 | + "FeatureTypes":{ |
| 2390 | + "shape":"ListOfDocumentReadFeatureTypes", |
| 2391 | + "documentation":"<p>Specifies how the text in an input file should be processed:</p>" |
| 2392 | + } |
| 2393 | + }, |
| 2394 | + "documentation":"<p>The input properties for a topic detection job.</p>" |
| 2395 | + }, |
2336 | 2396 | "DominantLanguage":{
|
2337 | 2397 | "type":"structure",
|
2338 | 2398 | "members":{
|
|
2648 | 2708 | "EntityRecognizerArn":{
|
2649 | 2709 | "type":"string",
|
2650 | 2710 | "max":256,
|
2651 |
| - "pattern":"arn:aws(-[^:]+)?:comprehend:[a-zA-Z0-9-]*:[0-9]{12}:entity-recognizer/[a-zA-Z0-9](-*[a-zA-Z0-9])*" |
| 2711 | + "pattern":"arn:aws(-[^:]+)?:comprehend:[a-zA-Z0-9-]*:[0-9]{12}:entity-recognizer/[a-zA-Z0-9](-*[a-zA-Z0-9])*(/version/[a-zA-Z0-9](-*[a-zA-Z0-9])*)?" |
2652 | 2712 | },
|
2653 | 2713 | "EntityRecognizerAugmentedManifestsList":{
|
2654 | 2714 | "type":"list",
|
|
3026 | 3086 | "InputFormat":{
|
3027 | 3087 | "shape":"InputFormat",
|
3028 | 3088 | "documentation":"<p>Specifies how the text in an input file should be processed:</p> <ul> <li> <p> <code>ONE_DOC_PER_FILE</code> - Each file is considered a separate document. Use this option when you are processing large documents, such as newspaper articles or scientific papers.</p> </li> <li> <p> <code>ONE_DOC_PER_LINE</code> - Each line in a file is considered a separate document. Use this option when you are processing many short documents, such as text messages.</p> </li> </ul>"
|
| 3089 | + }, |
| 3090 | + "DocumentReaderConfig":{ |
| 3091 | + "shape":"DocumentReaderConfig", |
| 3092 | + "documentation":"<p>The document reader config field applies only for InputDataConfig of StartEntitiesDetectionJob. </p> <p>Use DocumentReaderConfig to provide specifications about how you want your inference documents read. Currently it applies for PDF documents in StartEntitiesDetectionJob custom inference.</p>" |
3029 | 3093 | }
|
3030 | 3094 | },
|
3031 |
| - "documentation":"<p>The input properties for a topic detection job.</p>" |
| 3095 | + "documentation":"<p>The input properties for an inference job.</p>" |
3032 | 3096 | },
|
3033 | 3097 | "InputFormat":{
|
3034 | 3098 | "type":"string",
|
|
3202 | 3266 | },
|
3203 | 3267 | "KmsKeyId":{
|
3204 | 3268 | "type":"string",
|
3205 |
| - "max":2048 |
| 3269 | + "max":2048, |
| 3270 | + "pattern":".*" |
3206 | 3271 | },
|
3207 | 3272 | "KmsKeyValidationException":{
|
3208 | 3273 | "type":"structure",
|
|
3499 | 3564 | "type":"list",
|
3500 | 3565 | "member":{"shape":"BatchDetectSyntaxItemResult"}
|
3501 | 3566 | },
|
| 3567 | + "ListOfDocumentReadFeatureTypes":{ |
| 3568 | + "type":"list", |
| 3569 | + "member":{"shape":"DocumentReadFeatureTypes"}, |
| 3570 | + "max":2, |
| 3571 | + "min":1 |
| 3572 | + }, |
3502 | 3573 | "ListOfDominantLanguages":{
|
3503 | 3574 | "type":"list",
|
3504 | 3575 | "member":{"shape":"DominantLanguage"}
|
|
0 commit comments