Azure
diff --git a/‎azure/functions/decorators/function_app.py‎
Lines changed: 74 additions & 27 deletions b/‎azure/functions/decorators/function_app.py‎
Lines changed: 74 additions & 27 deletions
@@ -1694,7 +1694,6 @@ def assistant_skill_trigger(self,
                                 function_description: str,
                                 function_name: Optional[str] = None,
                                 parameter_description_json: Optional[str] = None,  # NoQA
-                                model: Optional[OpenAIModels] = OpenAIModels.DefaultChatModel,  # NoQA
                                 data_type: Optional[
                                     Union[DataType, str]] = None,
                                 **kwargs: Any) -> Callable[..., Any]:
@@ -1723,7 +1722,6 @@ def assistant_skill_trigger(self,
         :param parameter_description_json: A JSON description of the function
         parameter, which is provided to the LLM.
         If no description is provided, the description will be autogenerated.
-        :param model: The OpenAI chat model to use.
         :param data_type: Defines how Functions runtime should treat the
         parameter value.
         :param kwargs: Keyword arguments for specifying additional binding
@@ -1741,7 +1739,6 @@ def decorator():
                         function_description=function_description,
                         function_name=function_name,
                         parameter_description_json=parameter_description_json,
-                        model=model,
                         data_type=parse_singular_param_to_enum(data_type,
                                                                DataType),
                         **kwargs))
@@ -3220,10 +3217,11 @@ def decorator():
     def text_completion_input(self,
                               arg_name: str,
                               prompt: str,
-                              model: Optional[OpenAIModels] = OpenAIModels.DefaultChatModel,  # NoQA
+                              chat_model: Optional[OpenAIModels] = OpenAIModels.DefaultChatModel,  # NoQA
                               temperature: Optional[str] = "0.5",
                               top_p: Optional[str] = None,
                               max_tokens: Optional[str] = "100",
+                              is_reasoning_model: Optional[bool] = False,
                               data_type: Optional[Union[DataType, str]] = None,
                               **kwargs) \
             -> Callable[..., Any]:
@@ -3255,7 +3253,10 @@ def text_completion_input(self,
         :param max_tokens: The maximum number of tokens to generate in the
         completion. The token count of your prompt plus max_tokens cannot
         exceed the model's context length. Most models have a context length of
-        2048 tokens (except for the newest models, which support 4096).
+        2048 tokens (except for the newest models, which support 4096)        
+        :param is_reasoning_model: Whether the configured chat completion model
+        is a reasoning model or not. Properties max_tokens and temperature are not
+        supported for reasoning models.
         :param data_type: Defines how Functions runtime should treat the
         parameter value
         :param kwargs: Keyword arguments for specifying additional binding
@@ -3271,10 +3272,11 @@ def decorator():
                     binding=TextCompletionInput(
                         name=arg_name,
                         prompt=prompt,
-                        model=model,
+                        chat_model=chat_model,
                         temperature=temperature,
                         top_p=top_p,
                         max_tokens=max_tokens,
+                        is_reasoning_model=is_reasoning_model,
                         data_type=parse_singular_param_to_enum(data_type,
                                                                DataType),
                         **kwargs))
@@ -3371,9 +3373,13 @@ def decorator():
     def assistant_post_input(self, arg_name: str,
                              id: str,
                              user_message: str,
-                             model: Optional[str] = None,
+                             chat_model: Optional[str] = OpenAIModels.DefaultChatModel,
                              chat_storage_connection_setting: Optional[str] = "AzureWebJobsStorage",       # noqa: E501
-                             collection_name: Optional[str] = "ChatState",       # noqa: E501
+                             collection_name: Optional[str] = "ChatState",      # noqa: E501
+                             temperature: Optional[str] = "0.5",
+                             top_p: Optional[str] = None,
+                             max_tokens: Optional[str] = "100",
+                             is_reasoning_model: Optional[bool] = False,
                              data_type: Optional[
                                  Union[DataType, str]] = None,
                              **kwargs) \
@@ -3386,12 +3392,27 @@ def assistant_post_input(self, arg_name: str,
         :param id: The ID of the assistant to update.
         :param user_message: The user message that user has entered for
         assistant to respond to.
-        :param model: The OpenAI chat model to use.
+        :param chat_model: The deployment name or model name of OpenAI Chat Completion API.
         :param chat_storage_connection_setting:  The configuration section name
         for the table settings for assistant chat storage. The default value is
         "AzureWebJobsStorage".
         :param collection_name:  The table collection name for assistant chat
         storage. The default value is "ChatState".
+        :param temperature: The sampling temperature to use, between 0 and 2.
+        Higher values like 0.8 will make the output more random, while lower
+        values like 0.2 will make it more focused and deterministic.
+        :param top_p: An alternative to sampling with temperature, called
+        nucleus sampling, where the model considers the results of the tokens
+        with top_p probability mass. So 0.1 means only the tokens comprising
+        the top 10% probability mass are considered. It's generally recommend
+        to use this or temperature
+        :param max_tokens: The maximum number of tokens to generate in the
+        completion. The token count of your prompt plus max_tokens cannot
+        exceed the model's context length. Most models have a context length of
+        2048 tokens (except for the newest models, which support 4096)        
+        :param is_reasoning_model: Whether the configured chat completion model
+        is a reasoning model or not. Properties max_tokens and temperature are not
+        supported for reasoning models.
         :param data_type: Defines how Functions runtime should treat the
         parameter value
         :param kwargs: Keyword arguments for specifying additional binding
@@ -3408,9 +3429,13 @@ def decorator():
                         name=arg_name,
                         id=id,
                         user_message=user_message,
-                        model=model,
+                        chat_model=chat_model,
                         chat_storage_connection_setting=chat_storage_connection_setting,       # noqa: E501
                         collection_name=collection_name,
+                        temperature=temperature,
+                        top_p=top_p,
+                        max_tokens=max_tokens,
+                        is_reasoning_model=is_reasoning_model,
                         data_type=parse_singular_param_to_enum(data_type,
                                                                DataType),
                         **kwargs))
@@ -3424,7 +3449,7 @@ def embeddings_input(self,
                          arg_name: str,
                          input: str,
                          input_type: InputType,
-                         model: Optional[str] = None,
+                         embeddings_model: Optional[str] = OpenAIModels.DefaultEmbeddingsModel,
                          max_chunk_length: Optional[int] = 8 * 1024,
                          max_overlap: Optional[int] = 128,
                          data_type: Optional[
@@ -3441,7 +3466,7 @@ def embeddings_input(self,
         :param input: The input source containing the data to generate
         embeddings for.
         :param input_type: The type of the input.
-        :param model: The ID of the model to use.
+        :param embeddings_model: The deployment name or model name for OpenAI Embeddings.
         :param max_chunk_length: The maximum number of characters to chunk the
         input into. Default value: 8 * 1024
         :param max_overlap: The maximum number of characters to overlap
@@ -3462,7 +3487,7 @@ def decorator():
                         name=arg_name,
                         input=input,
                         input_type=input_type,
-                        model=model,
+                        embeddings_model=embeddings_model,
                         max_chunk_length=max_chunk_length,
                         max_overlap=max_overlap,
                         data_type=parse_singular_param_to_enum(data_type,
@@ -3476,13 +3501,17 @@ def decorator():
 
     def semantic_search_input(self,
                               arg_name: str,
-                              connection_name: str,
+                              search_connection_name: str,
                               collection: str,
                               query: Optional[str] = None,
                               embeddings_model: Optional[OpenAIModels] = OpenAIModels.DefaultEmbeddingsModel,  # NoQA
                               chat_model: Optional[OpenAIModels] = OpenAIModels.DefaultChatModel,  # NoQA
                               system_prompt: Optional[str] = semantic_search_system_prompt,  # NoQA
                               max_knowledge_count: Optional[int] = 1,
+                              temperature: Optional[str] = "0.5",
+                              top_p: Optional[str] = None,
+                              max_tokens: Optional[str] = "100",
+                              is_reasoning_model: Optional[bool] = False,
                               data_type: Optional[
                                   Union[DataType, str]] = None,
                               **kwargs) \
@@ -3499,19 +3528,33 @@ def semantic_search_input(self,
         Ref: https://platform.openai.com/docs/guides/embeddings
 
         :param arg_name: The name of binding parameter in the function code.
-        :param connection_name: app setting or environment variable which
-        contains a connection string value.
+        :param search_connection_name: app setting or environment variable which
+        contains a vector search connection setting value.
         :param collection: The name of the collection or table to search or
         store.
         :param query: The semantic query text to use for searching.
-        :param embeddings_model: The ID of the model to use for embeddings.
+        :param embeddings_model: The deployment name or model name for OpenAI Embeddings.
         The default value is "text-embedding-ada-002".
-        :param chat_model: The name of the Large Language Model to invoke for
-        chat responses. The default value is "gpt-3.5-turbo".
+        :param chat_model: The deployment name or model name of OpenAI Chat Completion API.
         :param system_prompt: Optional. The system prompt to use for prompting
         the large language model.
         :param max_knowledge_count: Optional. The number of knowledge items to
         inject into the SystemPrompt. Default value: 1
+        :param temperature: The sampling temperature to use, between 0 and 2.
+        Higher values like 0.8 will make the output more random, while lower
+        values like 0.2 will make it more focused and deterministic.
+        :param top_p: An alternative to sampling with temperature, called
+        nucleus sampling, where the model considers the results of the tokens
+        with top_p probability mass. So 0.1 means only the tokens comprising
+        the top 10% probability mass are considered. It's generally recommend
+        to use this or temperature
+        :param max_tokens: The maximum number of tokens to generate in the
+        completion. The token count of your prompt plus max_tokens cannot
+        exceed the model's context length. Most models have a context length of
+        2048 tokens (except for the newest models, which support 4096)        
+        :param is_reasoning_model: Whether the configured chat completion model
+        is a reasoning model or not. Properties max_tokens and temperature are not
+        supported for reasoning models.
         :param data_type: Optional. Defines how Functions runtime should treat
         the parameter value. Default value: None
         :param kwargs: Keyword arguments for specifying additional binding
@@ -3526,13 +3569,17 @@ def decorator():
                 fb.add_binding(
                     binding=SemanticSearchInput(
                         name=arg_name,
-                        connection_name=connection_name,
+                        search_connection_name=search_connection_name,
                         collection=collection,
                         query=query,
                         embeddings_model=embeddings_model,
                         chat_model=chat_model,
                         system_prompt=system_prompt,
                         max_knowledge_count=max_knowledge_count,
+                        temperature=temperature,
+                        top_p=top_p,
+                        max_tokens=max_tokens,
+                        is_reasoning_model=is_reasoning_model,
                         data_type=parse_singular_param_to_enum(data_type,
                                                                DataType),
                         **kwargs))
@@ -3546,9 +3593,9 @@ def embeddings_store_output(self,
                                 arg_name: str,
                                 input: str,
                                 input_type: InputType,
-                                connection_name: str,
+                                store_connection_name: str,
                                 collection: str,
-                                model: Optional[OpenAIModels] = OpenAIModels.DefaultEmbeddingsModel,  # NoQA
+                                embeddings_model: Optional[OpenAIModels] = OpenAIModels.DefaultEmbeddingsModel,  # NoQA
                                 max_chunk_length: Optional[int] = 8 * 1024,
                                 max_overlap: Optional[int] = 128,
                                 data_type: Optional[
@@ -3568,10 +3615,10 @@ def embeddings_store_output(self,
         :param arg_name: The name of binding parameter in the function code.
         :param input: The input to generate embeddings for.
         :param input_type: The type of the input.
-        :param connection_name: The name of an app setting or environment
-        variable which contains a connection string value
+        :param store_connection_name: The name of an app setting or environment
+        variable which contains a vectore store connection setting value
         :param collection: The collection or table to search.
-        :param model: The ID of the model to use.
+        :param embeddings_model: The deployment name or model name for OpenAI Embeddings.
         :param max_chunk_length: The maximum number of characters to chunk the
         input into.
         :param max_overlap: The maximum number of characters to overlap between
@@ -3592,9 +3639,9 @@ def decorator():
                         name=arg_name,
                         input=input,
                         input_type=input_type,
-                        connection_name=connection_name,
+                        store_connection_name=store_connection_name,
                         collection=collection,
-                        model=model,
+                        embeddings_model=embeddings_model,
                         max_chunk_length=max_chunk_length,
                         max_overlap=max_overlap,
                         data_type=parse_singular_param_to_enum(data_type,