Skip to content

Commit c62aaba

Browse files
✨ add support for dataschema parameter (#220)
1 parent 622eff9 commit c62aaba

File tree

12 files changed

+300
-7
lines changed

12 files changed

+300
-7
lines changed

lib/mindee/http/mindee_api_v2.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ def enqueue_form_options(form_data, params)
122122
form_data.push(['confidence', params.confidence.to_s]) unless params.confidence.nil?
123123
form_data.push ['file_alias', params.file_alias] if params.file_alias
124124
form_data.push ['text_context', params.text_context] if params.text_context
125+
form_data.push ['data_schema', params.data_schema.to_s] if params.data_schema
125126
unless params.webhook_ids.nil? || params.webhook_ids.empty?
126127
form_data.push ['webhook_ids', params.webhook_ids.join(',')]
127128
end

lib/mindee/input.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# frozen_string_literal: true
22

3+
require_relative 'input/data_schema'
34
require_relative 'input/inference_parameters'
45
require_relative 'input/polling_options'
56
require_relative 'input/sources'

lib/mindee/input/data_schema.rb

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# frozen_string_literal: true
2+
3+
module Mindee
4+
module Input
5+
# Data Schema Field.
6+
class DataSchemaField
7+
# @return [String] Display name for the field, also impacts inference results.
8+
attr_reader :title
9+
# @return [String] Name of the field in the data schema.
10+
attr_reader :name
11+
# @return [Boolean] Whether this field can contain multiple values.
12+
attr_reader :is_array
13+
# @return [String] Data type of the field.
14+
attr_reader :type
15+
# @return [Array<String>, nil] Allowed values when type is `classification`. Leave empty for other types.
16+
attr_reader :classification_values
17+
# @return [Boolean, nil] Whether to remove duplicate values in the array.
18+
# Only applicable if `is_array` is True.
19+
attr_reader :unique_values
20+
# @return [String, nil] Detailed description of what this field represents.
21+
attr_reader :description
22+
# @return [String, nil] Optional extraction guidelines.
23+
attr_reader :guidelines
24+
# @return [Array<Hash>, nil] Nested fields.
25+
attr_reader :nested_fields
26+
27+
# @param field [Hash]
28+
def initialize(field)
29+
field.transform_keys!(&:to_sym)
30+
@name = field[:name]
31+
@title = field[:title]
32+
@is_array = field[:is_array]
33+
@type = field[:type]
34+
@classification_values = field[:classification_values]
35+
@unique_values = field[:unique_values]
36+
@description = field[:description]
37+
@guidelines = field[:guidelines]
38+
@nested_fields = field[:nested_fields]
39+
end
40+
41+
# @return [Hash]
42+
def to_hash
43+
out = {
44+
name: @name,
45+
title: @title,
46+
is_array: @is_array,
47+
type: @type,
48+
} # @type var out: Hash[Symbol, untyped]
49+
out[:classification_values] = @classification_values unless @classification_values.nil?
50+
out[:unique_values] = @unique_values unless @unique_values.nil?
51+
out[:description] = @description unless @description.nil?
52+
out[:guidelines] = @guidelines unless @guidelines.nil?
53+
out[:nested_fields] = @nested_fields unless @nested_fields.nil?
54+
out
55+
end
56+
57+
# @return [String]
58+
def to_s
59+
to_hash.to_json
60+
end
61+
end
62+
63+
# The structure to completely replace the data schema of the model.
64+
class DataSchemaReplace
65+
# @return [Array<DataSchemaField>] Subfields when type is `nested_object`. Leave empty for other types.
66+
attr_reader :fields
67+
68+
# @param data_schema_replace [Hash]
69+
def initialize(data_schema_replace)
70+
data_schema_replace.transform_keys!(&:to_sym)
71+
fields_list = data_schema_replace[:fields]
72+
raise Mindee::Errors::MindeeError, 'Invalid Data Schema provided.' if fields_list.nil?
73+
raise TypeError, 'Data Schema replacement fields cannot be empty.' if fields_list.empty?
74+
75+
@fields = fields_list.map { |field| DataSchemaField.new(field) }
76+
end
77+
78+
# @return [Hash]
79+
def to_hash
80+
{ fields: @fields.map(&:to_hash) }
81+
end
82+
83+
# @return [String]
84+
def to_s
85+
to_hash.to_json
86+
end
87+
end
88+
89+
# Modify the Data Schema.
90+
class DataSchema
91+
# @return [Mindee::Input::DataSchemaReplace]
92+
attr_reader :replace
93+
94+
# @param data_schema [Hash, String]
95+
def initialize(data_schema)
96+
case data_schema
97+
when String
98+
parsed = JSON.parse(data_schema.to_s, object_class: Hash)
99+
parsed.transform_keys!(&:to_sym)
100+
@replace = DataSchemaReplace.new(parsed[:replace])
101+
when Hash
102+
data_schema.transform_keys!(&:to_sym)
103+
@replace = if data_schema[:replace].is_a?(DataSchemaReplace)
104+
data_schema[:replace]
105+
else
106+
DataSchemaReplace.new(data_schema[:replace])
107+
end
108+
when DataSchema
109+
@replace = data_schema.replace
110+
else
111+
raise TypeError, 'Invalid Data Schema provided.'
112+
end
113+
end
114+
115+
# @return [Hash]
116+
def to_hash
117+
{ replace: @replace.to_hash }
118+
end
119+
120+
# @return [String]
121+
def to_s
122+
to_hash.to_json
123+
end
124+
end
125+
end
126+
end

lib/mindee/input/inference_parameters.rb

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# frozen_string_literal: true
22

3+
require_relative 'data_schema'
4+
35
module Mindee
46
module Input
57
# Parameters to set when sending a file for inference.
@@ -35,6 +37,9 @@ class InferenceParameters
3537
# @return [PollingOptions] Options for polling. Set only if having timeout issues.
3638
attr_reader :polling_options
3739

40+
# @return [DataSchemaField]
41+
attr_reader :data_schema
42+
3843
# @return [Boolean, nil] Whether to close the file after parsing.
3944
attr_reader :close_file
4045

@@ -58,7 +63,8 @@ def initialize(
5863
webhook_ids: nil,
5964
text_context: nil,
6065
polling_options: nil,
61-
close_file: true
66+
close_file: true,
67+
data_schema: nil
6268
)
6369
raise Errors::MindeeInputError, 'Model ID is required.' if model_id.empty? || model_id.nil?
6470

@@ -72,6 +78,7 @@ def initialize(
7278
@text_context = text_context
7379
@polling_options = get_clean_polling_options(polling_options)
7480
@close_file = close_file.nil? || close_file
81+
@data_schema = DataSchema.new(data_schema) unless data_schema.nil?
7582
# rubocop:enable Metrics/ParameterLists
7683
end
7784

lib/mindee/parsing/v2/inference_active_options.rb

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,23 @@
33
module Mindee
44
module Parsing
55
module V2
6+
# Data schema options activated during the inference.
7+
class DataSchemaActiveOption
8+
# @return [Boolean]
9+
attr_reader :replace
10+
11+
# @param server_response [Hash]
12+
def initialize(server_response)
13+
@replace = server_response[:replace] || server_response['replace']
14+
end
15+
16+
# String representation.
17+
# @return [String]
18+
def to_s
19+
"Data Schema\n-----------\n:Replace: #{@replace ? 'True' : 'False'}"
20+
end
21+
end
22+
623
# Options which were activated during the inference.
724
class InferenceActiveOptions
825
# @return [Boolean] Whether the Raw Text feature was activated.
@@ -15,6 +32,8 @@ class InferenceActiveOptions
1532
attr_reader :rag
1633
# @return [Boolean] Whether the text context feature was activated.
1734
attr_reader :text_context
35+
# @return [DataSchemaActiveOption]
36+
attr_reader :data_schema
1837

1938
# @param server_response [Hash] Raw JSON parsed into a Hash.
2039
def initialize(server_response)
@@ -23,6 +42,7 @@ def initialize(server_response)
2342
@confidence = server_response['confidence']
2443
@rag = server_response['rag']
2544
@text_context = server_response['text_context']
45+
@data_schema = DataSchemaActiveOption.new(server_response['data_schema'])
2646
end
2747

2848
# String representation.
@@ -35,6 +55,8 @@ def to_s
3555
":Polygon: #{@polygon ? 'True' : 'False'}",
3656
":Confidence: #{@confidence ? 'True' : 'False'}",
3757
":RAG: #{@rag ? 'True' : 'False'}",
58+
":Text Context: #{@text_context ? 'True' : 'False'}\n",
59+
@data_schema.to_s,
3860
'',
3961
]
4062
parts.join("\n")

sig/mindee/input/data_schema.rbs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
module Mindee
2+
module Input
3+
class DataSchemaField
4+
attr_reader title: String
5+
attr_reader name: String
6+
attr_reader is_array: bool
7+
attr_reader type: String
8+
attr_reader classification_values: String|nil
9+
attr_reader unique_values: bool|nil
10+
attr_reader description: String|nil
11+
attr_reader guidelines: String|nil
12+
attr_reader nested_fields: Array[Hash[String|Symbol, untyped]]|nil
13+
14+
def initialize: (Hash[Symbol, untyped]) -> void
15+
def to_hash: () -> Hash[Symbol, untyped]
16+
def to_string: () -> String
17+
end
18+
19+
class DataSchemaReplace
20+
attr_reader fields: Array[DataSchemaField]
21+
def initialize: (Hash[Symbol, untyped]) -> void
22+
def to_hash: () -> Hash[Symbol, untyped]
23+
def to_string: () -> String
24+
end
25+
26+
class DataSchema
27+
attr_reader replace: DataSchemaReplace
28+
29+
def initialize: (Hash[String|Symbol, untyped]|String|DataSchema) -> void
30+
def to_hash: () -> Hash[Symbol, untyped]
31+
def to_s: -> String
32+
end
33+
end
34+
end

sig/mindee/input/inference_parameters.rbs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ module Mindee
1212
attr_reader raw_text: bool?
1313
attr_reader text_context: String?
1414
attr_reader webhook_ids: Array[String]?
15+
attr_reader data_schema: DataSchema?
1516

1617
def initialize: (
1718
String,
@@ -23,7 +24,8 @@ module Mindee
2324
?text_context: String?,
2425
?webhook_ids: Array[String]?,
2526
?polling_options: Hash[Symbol | String, untyped] | PollingOptions?,
26-
?close_file: bool?
27+
?close_file: bool?,
28+
?data_schema: DataSchema|String|Hash[Symbol | String, untyped]?
2729
) -> void
2830

2931
def self.from_hash: (params: Hash[String | Symbol, untyped]) -> InferenceParameters

sig/mindee/parsing/v2/inference_active_options.rbs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,22 @@
11
module Mindee
22
module Parsing
33
module V2
4+
class DataSchemaActiveOption
5+
attr_reader replace: bool
6+
7+
def initialize: (Hash[Symbol |string, untyped]) -> void
8+
def to_s: () -> String
9+
end
410
class InferenceActiveOptions
511
attr_reader confidence: bool
612
attr_reader polygon: bool
713
attr_reader rag: bool
814
attr_reader raw_text: bool
915
attr_reader text_context: bool
16+
attr_reader data_schema: DataSchemaActiveOption
1017

1118
def initialize: (Hash[String | Symbol, untyped]) -> void
19+
def to_s: () -> String
1220
end
1321
end
1422
end

spec/v2/client_v2_integration.rb

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
raw_text: true,
2424
polygon: false,
2525
confidence: false,
26-
file_alias: 'ruby-integration-test',
26+
file_alias: 'rb_integration_test',
2727
polling_options: polling,
2828
text_context: 'this is a test'
2929
)
@@ -72,7 +72,7 @@
7272
polygon: false,
7373
confidence: false,
7474
rag: false,
75-
file_alias: 'ruby-integration-test'
75+
file_alias: 'rb_integration_test'
7676
)
7777

7878
response = client.enqueue_and_get_inference(input, inference_params)
@@ -191,7 +191,7 @@
191191
polygon: false,
192192
confidence: false,
193193
rag: false,
194-
file_alias: 'ruby-integration-test'
194+
file_alias: 'rb_integration_test'
195195
)
196196
client.enqueue_and_get_inference(input, inference_params)
197197
end.to raise_error(Mindee::Errors::MindeeHTTPErrorV2) { |e|
@@ -216,4 +216,50 @@
216216
expect(response.inference).not_to be_nil
217217
end
218218
end
219+
220+
context 'A Data Schema Override' do
221+
it 'Overrides successfully' do
222+
data_schema_replace = File.read(File.join(V2_DATA_DIR, 'inference', 'data_schema_replace_param.json'))
223+
input = Mindee::Input::Source::PathInputSource.new(File.join(FILE_TYPES_DIR, 'pdf', 'blank_1.pdf'))
224+
225+
inference_params = Mindee::Input::InferenceParameters.new(
226+
model_id,
227+
raw_text: false,
228+
polygon: false,
229+
confidence: false,
230+
rag: false,
231+
file_alias: 'rb_integration_data_schema_replace',
232+
data_schema: data_schema_replace
233+
)
234+
235+
response = client.enqueue_and_get_inference(input, inference_params)
236+
expect(response).not_to be_nil
237+
238+
model = response.inference.model
239+
expect(model).not_to be_nil
240+
expect(model).to be_a(Mindee::Parsing::V2::InferenceModel)
241+
expect(model.id).to eq(model_id)
242+
243+
active_options = response.inference.active_options
244+
expect(active_options).not_to be_nil
245+
expect(active_options).to be_a(Mindee::Parsing::V2::InferenceActiveOptions)
246+
expect(active_options.raw_text).to eq(false)
247+
expect(active_options.polygon).to eq(false)
248+
expect(active_options.confidence).to eq(false)
249+
expect(active_options.rag).to eq(false)
250+
expect(active_options.text_context).to eq(false)
251+
expect(active_options.data_schema).to_not be_nil
252+
expect(active_options.data_schema.replace).to eq(true)
253+
254+
result = response.inference.result
255+
expect(result).not_to be_nil
256+
257+
expect(result.raw_text).to be_nil
258+
259+
fields = result.fields
260+
expect(fields).not_to be_nil
261+
expect(fields['test_replace']).not_to be_nil
262+
expect(fields['test_replace'].value).to eq('a test value')
263+
end
264+
end
219265
end

0 commit comments

Comments
 (0)