|
14 | 14 |
|
15 | 15 | models: |
16 | 16 | {defs.DEFAULT_CHAT_MODEL_ID}: |
17 | | - api_key: ${{GRAPHRAG_API_KEY}} # set this in the generated .env file |
18 | 17 | type: {defs.LLM_TYPE.value} # or azure_openai_chat |
| 18 | + # api_base: https://<instance>.openai.azure.com |
| 19 | + # api_version: 2024-05-01-preview |
19 | 20 | auth_type: {defs.AUTH_TYPE.value} # or azure_managed_identity |
20 | | - model: {defs.LLM_MODEL} |
21 | | - model_supports_json: true # recommended if this is available for your model. |
22 | | - parallelization_num_threads: {defs.PARALLELIZATION_NUM_THREADS} |
23 | | - parallelization_stagger: {defs.PARALLELIZATION_STAGGER} |
24 | | - async_mode: {defs.ASYNC_MODE.value} # or asyncio |
| 21 | + api_key: ${{GRAPHRAG_API_KEY}} # set this in the generated .env file |
25 | 22 | # audience: "https://cognitiveservices.azure.com/.default" |
26 | | - # api_base: https://<instance>.openai.azure.com |
27 | | - # api_version: 2024-02-15-preview |
28 | 23 | # organization: <organization_id> |
| 24 | + model: {defs.LLM_MODEL} |
29 | 25 | # deployment_name: <azure_model_deployment_name> |
| 26 | + # encoding_model: {defs.ENCODING_MODEL} # automatically set by tiktoken if left undefined |
| 27 | + model_supports_json: true # recommended if this is available for your model. |
| 28 | + concurrent_requests: {defs.LLM_CONCURRENT_REQUESTS} # max number of simultaneous LLM requests allowed |
| 29 | + async_mode: {defs.ASYNC_MODE.value} # or asyncio |
| 30 | + retry_strategy: native |
| 31 | + max_retries: -1 # set to -1 for dynamic retry logic (most optimal setting based on server response) |
| 32 | + tokens_per_minute: 0 # set to 0 to disable rate limiting |
| 33 | + requests_per_minute: 0 # set to 0 to disable rate limiting |
30 | 34 | {defs.DEFAULT_EMBEDDING_MODEL_ID}: |
31 | | - api_key: ${{GRAPHRAG_API_KEY}} |
32 | 35 | type: {defs.EMBEDDING_TYPE.value} # or azure_openai_embedding |
33 | | - auth_type: {defs.AUTH_TYPE.value} # or azure_managed_identity |
34 | | - model: {defs.EMBEDDING_MODEL} |
35 | | - parallelization_num_threads: {defs.PARALLELIZATION_NUM_THREADS} |
36 | | - parallelization_stagger: {defs.PARALLELIZATION_STAGGER} |
37 | | - async_mode: {defs.ASYNC_MODE.value} # or asyncio |
38 | 36 | # api_base: https://<instance>.openai.azure.com |
39 | | - # api_version: 2024-02-15-preview |
| 37 | + # api_version: 2024-05-01-preview |
| 38 | + auth_type: {defs.AUTH_TYPE.value} # or azure_managed_identity |
| 39 | + api_key: ${{GRAPHRAG_API_KEY}} |
40 | 40 | # audience: "https://cognitiveservices.azure.com/.default" |
41 | 41 | # organization: <organization_id> |
| 42 | + model: {defs.EMBEDDING_MODEL} |
42 | 43 | # deployment_name: <azure_model_deployment_name> |
| 44 | + # encoding_model: {defs.ENCODING_MODEL} # automatically set by tiktoken if left undefined |
| 45 | + model_supports_json: true # recommended if this is available for your model. |
| 46 | + concurrent_requests: {defs.LLM_CONCURRENT_REQUESTS} # max number of simultaneous LLM requests allowed |
| 47 | + async_mode: {defs.ASYNC_MODE.value} # or asyncio |
| 48 | + retry_strategy: native |
| 49 | + max_retries: -1 # set to -1 for dynamic retry logic (most optimal setting based on server response) |
| 50 | + tokens_per_minute: 0 # set to 0 to disable rate limiting |
| 51 | + requests_per_minute: 0 # set to 0 to disable rate limiting |
43 | 52 |
|
44 | 53 | vector_store: |
45 | 54 | {defs.VECTOR_STORE_DEFAULT_ID}: |
|
0 commit comments