@@ -67,7 +67,7 @@ def perform_completion!(payload)
6767 prompt = build_prompt ( model , messages )
6868 validate_context_length! ( prompt , payload [ :model ] )
6969 config = build_generation_config ( payload )
70- model . generate ( prompt , config : config )
70+ generate_with_error_handling ( model , prompt , config , payload [ :model ] )
7171 end
7272
7373 format_response ( response , payload [ :schema ] )
@@ -84,8 +84,8 @@ def perform_streaming_completion!(payload, &block)
8484 # Collect all streamed content
8585 full_content = ""
8686
87- # Stream tokens
88- model . generate_stream ( prompt , config : config ) do |token |
87+ # Stream tokens with error handling
88+ stream_with_error_handling ( model , prompt , config , payload [ :model ] ) do |token |
8989 full_content += token
9090 chunk = format_stream_chunk ( token )
9191 block . call ( chunk )
@@ -189,6 +189,44 @@ def model_error_message(exception, model_id)
189189 ERROR_MESSAGE
190190 end
191191
192+ def generate_with_error_handling ( model , prompt , config , model_id )
193+ model . generate ( prompt , config : config )
194+ rescue StandardError => e
195+ raise RubyLLM ::Error . new ( nil , generation_error_message ( e , model_id ) )
196+ end
197+
198+ def stream_with_error_handling ( model , prompt , config , model_id , &block )
199+ model . generate_stream ( prompt , config : config , &block )
200+ rescue StandardError => e
201+ raise RubyLLM ::Error . new ( nil , generation_error_message ( e , model_id ) )
202+ end
203+
204+ def generation_error_message ( exception , model_id )
205+ message = exception . message . to_s
206+
207+ if message . include? ( "out of memory" ) || message . include? ( "OOM" )
208+ <<~ERROR_MESSAGE . strip
209+ Out of memory while generating with #{ model_id } .
210+ Try using a smaller model or reducing the context length.
211+ Original error: #{ message }
212+ ERROR_MESSAGE
213+ elsif message . include? ( "context" ) || message . include? ( "sequence" )
214+ <<~ERROR_MESSAGE . strip
215+ Context length exceeded for #{ model_id } .
216+ The input is too long for this model's context window.
217+ Original error: #{ message }
218+ ERROR_MESSAGE
219+ elsif message . include? ( "tensor" ) || message . include? ( "shape" )
220+ <<~ERROR_MESSAGE . strip
221+ Model execution error for #{ model_id } .
222+ This may indicate an incompatible model format or corrupted weights.
223+ Original error: #{ message }
224+ ERROR_MESSAGE
225+ else
226+ "Generation failed for #{ model_id } : #{ message } "
227+ end
228+ end
229+
192230 def format_messages ( messages )
193231 messages . map do |msg |
194232 # Handle both hash and Message objects
0 commit comments