diff --git a/public/calls/static/calls-api-2024-05-21.yaml b/public/calls/static/calls-api-2024-05-21.yaml index 585ef081c400cff..b0d027896f0947f 100644 --- a/public/calls/static/calls-api-2024-05-21.yaml +++ b/public/calls/static/calls-api-2024-05-21.yaml @@ -502,7 +502,7 @@ components: errorCode: type: string errorDescription: - type: string + type: string requiresImmediateRenegotiation: type: boolean GetSessionStateResponse: @@ -538,6 +538,3 @@ components: type: string sessionDescription: $ref: "#/components/schemas/SessionDescription" - - - diff --git a/src/components/models/ModelFeatures.tsx b/src/components/models/ModelFeatures.tsx new file mode 100644 index 000000000000000..3d2223f1f08cb58 --- /dev/null +++ b/src/components/models/ModelFeatures.tsx @@ -0,0 +1,118 @@ +import type { WorkersAIModelsSchema } from "~/schemas"; + +const ModelFeatures = ({ model }: { model: WorkersAIModelsSchema }) => { + const nf = new Intl.NumberFormat("en-US"); + const properties: any = {}; + model.properties.forEach((property: any) => { + properties[property.property_id] = property.value; + }); + + return ( + <> + {Object.keys(properties).length ? ( + <> + + + + <> + + + + + {properties.planned_deprecation_date && ( + + + + + )} + {properties.context_window && ( + + + + + )} + {properties.terms && ( + + + + + )} + {properties.info && ( + + + + + )} + {properties.max_input_tokens && ( + + + + + )} + {properties.output_dimensions && ( + + + + + )} + {properties.function_calling && ( + + + + + )} + {properties.lora && ( + + + + + )} + {properties.beta && ( + + + + + )} + +
Features + +
+ {Date.now() > + Math.floor( + new Date(properties.planned_deprecation_date).getTime() / + 1000, + ) + ? "Deprecated" + : "Planned Deprecation"} + + {new Date( + properties.planned_deprecation_date, + ).toLocaleDateString("en-US")} +
+ Context Window + + + + {nf.format(properties.context_window)} tokens
Terms and License + + link + +
More information + + link + +
Maximum Input Tokens{nf.format(properties.max_input_tokens)}
Output Dimensions{nf.format(properties.output_dimensions)}
+ Function calling{" "} + + + + Yes
LoRAYes
BetaYes
+ + ) : ( + false + )} + + ); +}; + +export default ModelFeatures; diff --git a/src/content/changelog/workers-ai/2025-02-24-context-windows.mdx b/src/content/changelog/workers-ai/2025-02-24-context-windows.mdx new file mode 100644 index 000000000000000..e5e775ca81009f3 --- /dev/null +++ b/src/content/changelog/workers-ai/2025-02-24-context-windows.mdx @@ -0,0 +1,11 @@ +--- +title: Workers AI larger context windows +description: Updated Workers AI models with larger context windows +date: 2025-02-24T11:00:00Z +--- + +We've updated the Workers AI text generation models to include context windows and limits definitions and changed our APIs to estimate and validate the number of tokens in the input prompt, not the number of characters. + +This update allows developers to use larger context windows when interacting with Workers AI models, which can lead to better and more accurate results. + +Our [catalog page](/workers-ai/models/) provides more information about each model's supported context window. \ No newline at end of file diff --git a/src/content/glossary/workers-ai.yaml b/src/content/glossary/workers-ai.yaml index 69f93d77264db2b..a8fe2e73e16333c 100644 --- a/src/content/glossary/workers-ai.yaml +++ b/src/content/glossary/workers-ai.yaml @@ -4,6 +4,10 @@ entries: - term: Workers AI general_definition: |- [Workers AI](/workers-ai/) is a Cloudflare service that enables running machine learning models on Cloudflare's global network, utilizing serverless GPUs. It allows developers to integrate AI capabilities into their applications using Workers, Pages, or via the REST API. + - term: Context Window + general_definition: In generative AI, the context window is the sum of the number of input, reasoning, and completion or response tokens a model supports. You can find the context window limit on each [model page](/workers-ai/models/). + - term: Maximum Tokens + general_definition: In generative AI, the user-defined property `max_tokens` defines the maximum number of tokens at which the model should stop responding. This limit cannot exceed the context window. - term: Serverless GPUs general_definition: |- [Serverless GPUs](/workers-ai/) are graphics processing units provided by Cloudflare in a serverless environment, enabling scalable and efficient execution of machine learning models without the need for managing underlying hardware. diff --git a/src/content/glossary/workers.yaml b/src/content/glossary/workers.yaml index 0e25f5afc8a3db9..b1525e3310b0f9a 100644 --- a/src/content/glossary/workers.yaml +++ b/src/content/glossary/workers.yaml @@ -131,4 +131,4 @@ entries: - term: wrangler.toml / wrangler.json / wrangler.jsonc general_definition: |- - The [configuration](/workers/wrangler/configuration/) used to customize the development and deployment setup for a Worker or a Pages Function. + The [configuration](/workers/wrangler/configuration/) used to customize the development and deployment setup for a Worker or a Pages Function. diff --git a/src/content/products/automatic-platform-optimization.yaml b/src/content/products/automatic-platform-optimization.yaml index af151103e65f289..86aad98b7db8247 100644 --- a/src/content/products/automatic-platform-optimization.yaml +++ b/src/content/products/automatic-platform-optimization.yaml @@ -8,8 +8,7 @@ product: meta: title: Cloudflare Automatic Platform Optimization docs - description: - Serve your WP site from Cloudflare's edge, ensuring improved performance. + description: Serve your WP site from Cloudflare's edge, ensuring improved performance. author: "@cloudflare" resources: diff --git a/src/content/release-notes/api-deprecations.yaml b/src/content/release-notes/api-deprecations.yaml index d82414b202e58f1..883ada3c57b9621 100644 --- a/src/content/release-notes/api-deprecations.yaml +++ b/src/content/release-notes/api-deprecations.yaml @@ -9,11 +9,11 @@ entries: title: Cloudflare DWeb Resolver description: |- Deprecation date: July 1, 2025 - + The Cloudflare DWeb Resolver experiment is ending. - + Deprecated APIs: - + - DoH resolver on resolver.cloudflare-eth.com - publish_date: "2025-03-23" diff --git a/src/content/release-notes/api-shield.yaml b/src/content/release-notes/api-shield.yaml index 83e15561f9041b1..01b4703de5f2eb0 100644 --- a/src/content/release-notes/api-shield.yaml +++ b/src/content/release-notes/api-shield.yaml @@ -14,7 +14,7 @@ entries: title: API Authentication Posture description: |- Customers will see per-endpoint authentication details inside API Shield's [Endpoint Management](/api-shield/management-and-monitoring/) for zones with configured session identifiers. - + - publish_date: "2024-12-19" title: Automatically applied endpoint risk labels description: |- diff --git a/src/content/release-notes/d1.yaml b/src/content/release-notes/d1.yaml index a28615adcde6b0f..b5138afc518fbe6 100644 --- a/src/content/release-notes/d1.yaml +++ b/src/content/release-notes/d1.yaml @@ -9,7 +9,7 @@ entries: title: Fixed bug with D1 read-only access via UI and /query REST API. description: |- Fixed a bug with D1 permissions which allowed users with read-only roles via the UI and users with read-only API tokens via the `/query` [REST API](/api/resources/d1/subresources/database/methods/query/) to execute queries that modified databases. UI actions via the `Tables` tab, such as creating and deleting tables, were incorrectly allowed with read-only access. However, UI actions via the `Console` tab were not affected by this bug and correctly required write access. - + Write queries with read-only access will now fail. If you relied on the previous incorrect behavior, please assign the correct roles to users or permissions to API tokens to perform D1 write queries. - publish_date: "2025-01-13" diff --git a/src/content/release-notes/dns.yaml b/src/content/release-notes/dns.yaml index 278725cecd75363..64276029cc3a78c 100644 --- a/src/content/release-notes/dns.yaml +++ b/src/content/release-notes/dns.yaml @@ -29,4 +29,4 @@ entries: - Automatically quoted TXT content upon save if no quotes exist in the record content field. - publish_date: "2024-10-07" title: API support for per-record CNAME flattening - description: Paid zones now have the option to flatten specific CNAME records. When using the API, specify the setting `cname_flatten` as `true` or `false`. Refer to the [documentation](/dns/cname-flattening/set-up-cname-flattening/#per-record) for details. \ No newline at end of file + description: Paid zones now have the option to flatten specific CNAME records. When using the API, specify the setting `cname_flatten` as `true` or `false`. Refer to the [documentation](/dns/cname-flattening/set-up-cname-flattening/#per-record) for details. diff --git a/src/content/release-notes/durable-objects.yaml b/src/content/release-notes/durable-objects.yaml index 6bf1de69a7c8acc..fd47a88218544b5 100644 --- a/src/content/release-notes/durable-objects.yaml +++ b/src/content/release-notes/durable-objects.yaml @@ -54,4 +54,4 @@ entries: - publish_date: "2024-02-15" title: Optional `alarmInfo` parameter for Durable Object Alarms description: |- - Durable Objects [Alarms](/durable-objects/api/alarms/) now have a new `alarmInfo` argument that provides more details about an alarm invocation, including the `retryCount` and `isRetry` to signal if the alarm was retried. \ No newline at end of file + Durable Objects [Alarms](/durable-objects/api/alarms/) now have a new `alarmInfo` argument that provides more details about an alarm invocation, including the `retryCount` and `isRetry` to signal if the alarm was retried. diff --git a/src/content/release-notes/email-security.yaml b/src/content/release-notes/email-security.yaml index d2659371da36ccf..b60fd91c10758f4 100644 --- a/src/content/release-notes/email-security.yaml +++ b/src/content/release-notes/email-security.yaml @@ -16,9 +16,8 @@ entries: - publish_date: "2024-12-19" title: Email Security reclassification tab description: |- - Customers can now have more transparency about their team and user submissions. The new Reclassification tab in the Zero Trust dashboard will allow customers to have a full understanding of what submissions they have made and what the outcomes of those submissions are. + Customers can now have more transparency about their team and user submissions. The new Reclassification tab in the Zero Trust dashboard will allow customers to have a full understanding of what submissions they have made and what the outcomes of those submissions are. - publish_date: "2024-12-19" title: Email Security expanded folder scanning description: |- Microsoft 365 customers can now choose to scan all folders or just the inbox when deploying via the Graph API. - diff --git a/src/content/release-notes/fundamentals.yaml b/src/content/release-notes/fundamentals.yaml index 7b3714d3fe11409..072d007f4c179d2 100644 --- a/src/content/release-notes/fundamentals.yaml +++ b/src/content/release-notes/fundamentals.yaml @@ -21,10 +21,10 @@ entries: Refer to [Account owned tokens documentation](/fundamentals/api/get-started/account-owned-tokens/) for more details. - publish_date: "2024-12-16" title: Cloudflare API docs are now automatically generated - description: |- + description: |- Cloudflare's API documentation is now being automatically generated based on OpenAPI Schemas, and we have retired our old documentation. The move to OpenAPI Schemas allows us to ensure greater consistency and quality across our API documentation. The documentation now also includes examples of how to call the API using curl or our SDKs. - Refer to the [Cloudflare API documentation](/api/), or the [blog post on our transition to OpenAPI](https://blog.cloudflare.com/open-api-transition/) for more information. + Refer to the [Cloudflare API documentation](/api/), or the [blog post on our transition to OpenAPI](https://blog.cloudflare.com/open-api-transition/) for more information. - publish_date: "2024-12-06" title: Dashboard SCIM is now fully self-serve description: |- diff --git a/src/content/release-notes/security-center.yaml b/src/content/release-notes/security-center.yaml index 0950918591e69af..651fdb6080a3893 100644 --- a/src/content/release-notes/security-center.yaml +++ b/src/content/release-notes/security-center.yaml @@ -7,7 +7,7 @@ productAreaLink: /fundamentals/reference/changelog/security/ entries: - publish_date: "2025-02-03" description: |- - - Security Center now has a role called Brand Protection. This role gives you access to the Brand Protection feature on the API and Cloudflare dashboard. Brand Protection role also gives you access to the Investigate platform, where you can consume the Threat Intel API and URL scanner API calls. + - Security Center now has a role called Brand Protection. This role gives you access to the Brand Protection feature on the API and Cloudflare dashboard. Brand Protection role also gives you access to the Investigate platform, where you can consume the Threat Intel API and URL scanner API calls. - publish_date: "2025-01-20" description: |- - On the URL scanner, customers who search for a report will now get a list of all reports related to that specific hostname. A hash is also available in the security report. By selecting the hash, the dashboard will list reports containing the same hash. diff --git a/src/content/release-notes/warp.yaml b/src/content/release-notes/warp.yaml index c75bcf3a67c5d67..05f7e88121c5e8f 100644 --- a/src/content/release-notes/warp.yaml +++ b/src/content/release-notes/warp.yaml @@ -23,56 +23,56 @@ entries: - publish_date: "2025-02-19" title: WARP client for macOS (version 2025.1.861.0) description: |- - A new GA release for the macOS WARP client is now available on the [Downloads page](/cloudflare-one/connections/connect-devices/warp/download-warp/). This release contains minor fixes and improvements. + A new GA release for the macOS WARP client is now available on the [Downloads page](/cloudflare-one/connections/connect-devices/warp/download-warp/). This release contains minor fixes and improvements. - Note: If using macOS Sequoia, Cloudflare recommends the use of macOS 15.3 or - later. With macOS 15.3, Apple addressed several issues that may have caused - the WARP client to not behave as expected when used with macOS 15.0.x. + Note: If using macOS Sequoia, Cloudflare recommends the use of macOS 15.3 or + later. With macOS 15.3, Apple addressed several issues that may have caused + the WARP client to not behave as expected when used with macOS 15.0.x. - **Changes and improvements** + **Changes and improvements** - - Improved command line interface for Access for Infrastructure with added function for filtering and ordering. - - Fixed client connectivity issues when switching between managed network profiles that use different WARP protocols. - - Improved OS version posture checks on macOS for greater reliability and availability. - - Added support for WARP desktop to use additional DoH endpoints to help reduce NAT congestion. - - Improved Wireguard connection stability on reconnections. - - Added additional HTTP/3 QUIC connectivity test to `warp-diag`. - - Added support for collection of system health metrics for enhanced device Digital Experience Monitoring. - - Automated the removal of active registrations for devices with multiple registrations with the same Zero Trust organization. - - Fixes issues with deleted registration at start up. + - Improved command line interface for Access for Infrastructure with added function for filtering and ordering. + - Fixed client connectivity issues when switching between managed network profiles that use different WARP protocols. + - Improved OS version posture checks on macOS for greater reliability and availability. + - Added support for WARP desktop to use additional DoH endpoints to help reduce NAT congestion. + - Improved Wireguard connection stability on reconnections. + - Added additional HTTP/3 QUIC connectivity test to `warp-diag`. + - Added support for collection of system health metrics for enhanced device Digital Experience Monitoring. + - Automated the removal of active registrations for devices with multiple registrations with the same Zero Trust organization. + - Fixes issues with deleted registration at start up. - **Known issues** + **Known issues** - - macOS Sequoia: Due to changes Apple introduced in macOS 15.0.x, the WARP - client may not behave as expected. Cloudflare recommends the use of macOS 15.3 - or later. + - macOS Sequoia: Due to changes Apple introduced in macOS 15.0.x, the WARP + client may not behave as expected. Cloudflare recommends the use of macOS 15.3 + or later. - publish_date: "2025-02-19" title: WARP client for Windows (version 2025.1.861.0) description: |- - A new GA release for the Windows WARP client is now available on the [Downloads page](/cloudflare-one/connections/connect-devices/warp/download-warp/). This release contains only improvements. + A new GA release for the Windows WARP client is now available on the [Downloads page](/cloudflare-one/connections/connect-devices/warp/download-warp/). This release contains only improvements. - **Changes and improvements** + **Changes and improvements** - - Improved command line interface for Access for Infrastructure with added function for filtering and ordering. - - Fixed client connectivity issues when switching between managed network profiles that use different WARP protocols. - - Added support for WARP desktop to use additional DoH endpoints to help reduce NAT congestion. - - Improved connectivity check reliability in certain split tunnel configurations. - - Improved reading of device DNS settings at connection restart. - - Improved WARP connectivity in environments with virtual machine interfaces. - - Improved Wireguard connection stability on reconnections. - - Improved reliability of device posture checks for OS Version, Unique Client ID, Domain Joined, Disk Encryption, and Firewall attributes. - - Added additional HTTP/3 QUIC connectivity test to warp-diag. - - Added support for collection of system health metrics for enhanced device Digital Experience Monitoring. - - Automated the removal of active registrations for devices with multiple registrations with the same Zero Trust organization. + - Improved command line interface for Access for Infrastructure with added function for filtering and ordering. + - Fixed client connectivity issues when switching between managed network profiles that use different WARP protocols. + - Added support for WARP desktop to use additional DoH endpoints to help reduce NAT congestion. + - Improved connectivity check reliability in certain split tunnel configurations. + - Improved reading of device DNS settings at connection restart. + - Improved WARP connectivity in environments with virtual machine interfaces. + - Improved Wireguard connection stability on reconnections. + - Improved reliability of device posture checks for OS Version, Unique Client ID, Domain Joined, Disk Encryption, and Firewall attributes. + - Added additional HTTP/3 QUIC connectivity test to warp-diag. + - Added support for collection of system health metrics for enhanced device Digital Experience Monitoring. + - Automated the removal of active registrations for devices with multiple registrations with the same Zero Trust organization. - **Known issues** + **Known issues** - - DNS resolution may be broken when the following conditions are all true: - - WARP is in Secure Web Gateway without DNS filtering (tunnel-only) mode. - - A custom DNS server address is configured on the primary network adapter. - - The custom DNS server address on the primary network adapter is changed while WARP is connected. + - DNS resolution may be broken when the following conditions are all true: + - WARP is in Secure Web Gateway without DNS filtering (tunnel-only) mode. + - A custom DNS server address is configured on the primary network adapter. + - The custom DNS server address on the primary network adapter is changed while WARP is connected. - To work around this issue, reconnect the WARP client by toggling off and back on. + To work around this issue, reconnect the WARP client by toggling off and back on. - publish_date: "2025-02-13" title: Cloudflare One Agent for iOS (version 1.9) @@ -150,7 +150,7 @@ entries: - publish_date: "2025-01-17" title: WARP download repository description: |- - Microsoft is [retiring the App Center](https://learn.microsoft.com/en-gb/appcenter/retirement) which customers previously used to download Windows and macOS WARP client releases. As a result, Cloudflare has launched a new [Downloads page](/cloudflare-one/connections/connect-devices/warp/download-warp/) where you can find WARP client downloads for all operating systems, version history, and release notes. + Microsoft is [retiring the App Center](https://learn.microsoft.com/en-gb/appcenter/retirement) which customers previously used to download Windows and macOS WARP client releases. As a result, Cloudflare has launched a new [Downloads page](/cloudflare-one/connections/connect-devices/warp/download-warp/) where you can find WARP client downloads for all operating systems, version history, and release notes. - publish_date: "2025-01-09" title: WARP client for Windows (version 2024.12.760.0) @@ -351,7 +351,7 @@ entries: - publish_date: "2024-12-04" title: Custom device posture integration description: |- - WARP now supports setting up [custom device posture integrations](/cloudflare-one/identity/devices/service-providers/custom/) using a third-party API of your choice. + WARP now supports setting up [custom device posture integrations](/cloudflare-one/identity/devices/service-providers/custom/) using a third-party API of your choice. - publish_date: "2024-11-19" title: MASQUE GA description: |- diff --git a/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json b/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json index d4b18a3a43467b9..db4044064789438 100644 --- a/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json +++ b/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "4096" + }, { "property_id": "terms", "value": "https://huggingface.co/TheBloke/deepseek-coder-6.7B-base-AWQ" diff --git a/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json b/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json index e61356329b6eff1..65306283b724345 100644 --- a/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json +++ b/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "4096" + }, { "property_id": "terms", "value": "https://huggingface.co/TheBloke/deepseek-coder-6.7B-instruct-AWQ" diff --git a/src/content/workers-ai-models/deepseek-math-7b-instruct.json b/src/content/workers-ai-models/deepseek-math-7b-instruct.json index 4327c94333424b1..12a85d3aa8c4edb 100644 --- a/src/content/workers-ai-models/deepseek-math-7b-instruct.json +++ b/src/content/workers-ai-models/deepseek-math-7b-instruct.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "4096" + }, { "property_id": "info", "value": "https://huggingface.co/deepseek-ai/deepseek-math-7b-instruct" diff --git a/src/content/workers-ai-models/deepseek-r1-distill-qwen-32b.json b/src/content/workers-ai-models/deepseek-r1-distill-qwen-32b.json index b8503e50801fdc2..8763b60ba7b2815 100644 --- a/src/content/workers-ai-models/deepseek-r1-distill-qwen-32b.json +++ b/src/content/workers-ai-models/deepseek-r1-distill-qwen-32b.json @@ -10,6 +10,10 @@ }, "tags": [], "properties": [ + { + "property_id": "context_window", + "value": "80000" + }, { "property_id": "terms", "value": "https://github.com/deepseek-ai/DeepSeek-R1/blob/main/LICENSE" diff --git a/src/content/workers-ai-models/discolm-german-7b-v1-awq.json b/src/content/workers-ai-models/discolm-german-7b-v1-awq.json index 8e22a36a612d9a8..7f0fcef85bcf3f0 100644 --- a/src/content/workers-ai-models/discolm-german-7b-v1-awq.json +++ b/src/content/workers-ai-models/discolm-german-7b-v1-awq.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "4096" + }, { "property_id": "info", "value": "https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-AWQ" diff --git a/src/content/workers-ai-models/falcon-7b-instruct.json b/src/content/workers-ai-models/falcon-7b-instruct.json index 1ca23a34b1a7273..6e30c0f4c636735 100644 --- a/src/content/workers-ai-models/falcon-7b-instruct.json +++ b/src/content/workers-ai-models/falcon-7b-instruct.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "4096" + }, { "property_id": "info", "value": "https://huggingface.co/tiiuae/falcon-7b-instruct" diff --git a/src/content/workers-ai-models/gemma-2b-it-lora.json b/src/content/workers-ai-models/gemma-2b-it-lora.json index c663a9308e58407..88a14cd353f9af0 100644 --- a/src/content/workers-ai-models/gemma-2b-it-lora.json +++ b/src/content/workers-ai-models/gemma-2b-it-lora.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "8192" + }, { "property_id": "lora", "value": "true" diff --git a/src/content/workers-ai-models/gemma-7b-it-lora.json b/src/content/workers-ai-models/gemma-7b-it-lora.json index 9f2ed54a01c5a84..52821bc31e13a2d 100644 --- a/src/content/workers-ai-models/gemma-7b-it-lora.json +++ b/src/content/workers-ai-models/gemma-7b-it-lora.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "3500" + }, { "property_id": "lora", "value": "true" diff --git a/src/content/workers-ai-models/gemma-7b-it.json b/src/content/workers-ai-models/gemma-7b-it.json index a09614544e25371..ffd606a45c318ca 100644 --- a/src/content/workers-ai-models/gemma-7b-it.json +++ b/src/content/workers-ai-models/gemma-7b-it.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "8192" + }, { "property_id": "info", "value": "https://ai.google.dev/gemma/docs" @@ -22,18 +26,6 @@ "property_id": "lora", "value": "true" }, - { - "property_id": "max_batch_prefill_tokens", - "value": "2048" - }, - { - "property_id": "max_input_length", - "value": "1512" - }, - { - "property_id": "max_total_tokens", - "value": "2048" - }, { "property_id": "terms", "value": "https://ai.google.dev/gemma/terms" diff --git a/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json b/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json index 7cc59a13b2561d9..be073550cb32a24 100644 --- a/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json +++ b/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "24000" + }, { "property_id": "function_calling", "value": "true" diff --git a/src/content/workers-ai-models/llama-2-13b-chat-awq.json b/src/content/workers-ai-models/llama-2-13b-chat-awq.json index dff9c82a3c9e6fb..d73ff0a71db7169 100644 --- a/src/content/workers-ai-models/llama-2-13b-chat-awq.json +++ b/src/content/workers-ai-models/llama-2-13b-chat-awq.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "4096" + }, { "property_id": "info", "value": "https://huggingface.co/TheBloke/Llama-2-13B-chat-AWQ" diff --git a/src/content/workers-ai-models/llama-2-7b-chat-fp16.json b/src/content/workers-ai-models/llama-2-7b-chat-fp16.json index 8bbf9fb2ed385a6..28655dc4586db51 100644 --- a/src/content/workers-ai-models/llama-2-7b-chat-fp16.json +++ b/src/content/workers-ai-models/llama-2-7b-chat-fp16.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "false" }, + { + "property_id": "context_window", + "value": "4096" + }, { "property_id": "info", "value": "https://ai.meta.com/llama/" diff --git a/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json b/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json index 6c2c7ef94963e80..1a057aaa42ca5df 100644 --- a/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json +++ b/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "8192" + }, { "property_id": "lora", "value": "true" diff --git a/src/content/workers-ai-models/llama-2-7b-chat-int8.json b/src/content/workers-ai-models/llama-2-7b-chat-int8.json index f15f1fce9e25af6..a05377fd91da919 100644 --- a/src/content/workers-ai-models/llama-2-7b-chat-int8.json +++ b/src/content/workers-ai-models/llama-2-7b-chat-int8.json @@ -9,7 +9,12 @@ "description": "Family of generative text models, such as large language models (LLM), that can be adapted for a variety of natural language tasks." }, "tags": [], - "properties": [], + "properties": [ + { + "property_id": "context_window", + "value": "8192" + } + ], "schema": { "input": { "type": "object", diff --git a/src/content/workers-ai-models/llama-3-8b-instruct-awq.json b/src/content/workers-ai-models/llama-3-8b-instruct-awq.json index 86d45f3a4d4d97c..49808a21eb3eb96 100644 --- a/src/content/workers-ai-models/llama-3-8b-instruct-awq.json +++ b/src/content/workers-ai-models/llama-3-8b-instruct-awq.json @@ -10,6 +10,10 @@ }, "tags": [], "properties": [ + { + "property_id": "context_window", + "value": "8192" + }, { "property_id": "info", "value": "https://llama.meta.com" diff --git a/src/content/workers-ai-models/llama-3-8b-instruct.json b/src/content/workers-ai-models/llama-3-8b-instruct.json index 55ec2a52efab543..384b1b5be0f3450 100644 --- a/src/content/workers-ai-models/llama-3-8b-instruct.json +++ b/src/content/workers-ai-models/llama-3-8b-instruct.json @@ -10,6 +10,10 @@ }, "tags": [], "properties": [ + { + "property_id": "context_window", + "value": "7968" + }, { "property_id": "info", "value": "https://llama.meta.com" diff --git a/src/content/workers-ai-models/llama-3.1-70b-instruct.json b/src/content/workers-ai-models/llama-3.1-70b-instruct.json index 2b77283503f7f0e..1b8b4ce01567757 100644 --- a/src/content/workers-ai-models/llama-3.1-70b-instruct.json +++ b/src/content/workers-ai-models/llama-3.1-70b-instruct.json @@ -1,372 +1,417 @@ { - "id": "03e26c2a-a18e-4daf-8616-da9e121eff3e", - "source": 1, - "name": "@cf/meta/llama-3.1-70b-instruct", - "description": "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models. The Llama 3.1 instruction tuned text only models are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.", - "task": { - "id": "c329a1f9-323d-4e91-b2aa-582dd4188d34", - "name": "Text Generation", - "description": "Family of generative text models, such as large language models (LLM), that can be adapted for a variety of natural language tasks." - }, - "tags": [], - "properties": [ - { - "property_id": "terms", - "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE" - } - ], - "schema": { - "input": { - "type": "object", - "oneOf": [ - { - "title": "Prompt", - "properties": { - "prompt": { - "type": "string", - "minLength": 1, - "maxLength": 131072, - "description": "The input text prompt for the model to generate a response." - }, - "image": { - "oneOf": [ - { - "type": "array", - "description": "An array of integers that represent the image data constrained to 8-bit unsigned integer values", - "items": { - "type": "number", - "description": "A value between 0 and 255" - } - }, - { - "type": "string", - "format": "binary", - "description": "Binary string representing the image contents." - } - ] - }, - "raw": { - "type": "boolean", - "default": false, - "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting." - }, - "stream": { - "type": "boolean", - "default": false, - "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events." - }, - "max_tokens": { - "type": "integer", - "default": 256, - "description": "The maximum number of tokens to generate in the response." - }, - "temperature": { - "type": "number", - "default": 0.6, - "minimum": 0, - "maximum": 5, - "description": "Controls the randomness of the output; higher values produce more random results." - }, - "top_p": { - "type": "number", - "minimum": 0, - "maximum": 2, - "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." - }, - "top_k": { - "type": "integer", - "minimum": 1, - "maximum": 50, - "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." - }, - "seed": { - "type": "integer", - "minimum": 1, - "maximum": 9999999999, - "description": "Random seed for reproducibility of the generation." - }, - "repetition_penalty": { - "type": "number", - "minimum": 0, - "maximum": 2, - "description": "Penalty for repeated tokens; higher values discourage repetition." - }, - "frequency_penalty": { - "type": "number", - "minimum": 0, - "maximum": 2, - "description": "Decreases the likelihood of the model repeating the same lines verbatim." - }, - "presence_penalty": { - "type": "number", - "minimum": 0, - "maximum": 2, - "description": "Increases the likelihood of the model introducing new topics." - }, - "lora": { - "type": "string", - "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model." - } - }, - "required": ["prompt"] - }, - { - "title": "Messages", - "properties": { - "messages": { - "type": "array", - "description": "An array of message objects representing the conversation history.", - "items": { - "type": "object", - "properties": { - "role": { - "type": "string", - "description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')." - }, - "content": { - "type": "string", - "maxLength": 131072, - "description": "The content of the message as a string." - } - }, - "required": ["role", "content"] - } - }, - "image": { - "oneOf": [ - { - "type": "array", - "description": "An array of integers that represent the image data constrained to 8-bit unsigned integer values", - "items": { - "type": "number", - "description": "A value between 0 and 255" - } - }, - { - "type": "string", - "format": "binary", - "description": "Binary string representing the image contents." - } - ] - }, - "functions": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { "type": "string" }, - "code": { "type": "string" } - }, - "required": ["name", "code"] - } - }, - "tools": { - "type": "array", - "description": "A list of tools available for the assistant to use.", - "items": { - "type": "object", - "oneOf": [ - { - "properties": { - "name": { - "type": "string", - "description": "The name of the tool. More descriptive the better." - }, - "description": { - "type": "string", - "description": "A brief description of what the tool does." - }, - "parameters": { - "type": "object", - "description": "Schema defining the parameters accepted by the tool.", - "properties": { - "type": { - "type": "string", - "description": "The type of the parameters object (usually 'object')." - }, - "required": { - "type": "array", - "description": "List of required parameter names.", - "items": { "type": "string" } - }, - "properties": { - "type": "object", - "description": "Definitions of each parameter.", - "additionalProperties": { - "type": "object", - "properties": { - "type": { - "type": "string", - "description": "The data type of the parameter." - }, - "description": { - "type": "string", - "description": "A description of the expected parameter." - } - }, - "required": ["type", "description"] - } - } - }, - "required": ["type", "properties"] - } - }, - "required": ["name", "description", "parameters"] - }, - { - "properties": { - "type": { - "type": "string", - "description": "Specifies the type of tool (e.g., 'function')." - }, - "function": { - "type": "object", - "description": "Details of the function tool.", - "properties": { - "name": { - "type": "string", - "description": "The name of the function." - }, - "description": { - "type": "string", - "description": "A brief description of what the function does." - }, - "parameters": { - "type": "object", - "description": "Schema defining the parameters accepted by the function.", - "properties": { - "type": { - "type": "string", - "description": "The type of the parameters object (usually 'object')." - }, - "required": { - "type": "array", - "description": "List of required parameter names.", - "items": { "type": "string" } - }, - "properties": { - "type": "object", - "description": "Definitions of each parameter.", - "additionalProperties": { - "type": "object", - "properties": { - "type": { - "type": "string", - "description": "The data type of the parameter." - }, - "description": { - "type": "string", - "description": "A description of the expected parameter." - } - }, - "required": ["type", "description"] - } - } - }, - "required": ["type", "properties"] - } - }, - "required": ["name", "description", "parameters"] - } - }, - "required": ["type", "function"] - } - ] - } - }, - "stream": { - "type": "boolean", - "default": false, - "description": "If true, the response will be streamed back incrementally." - }, - "max_tokens": { - "type": "integer", - "default": 256, - "description": "The maximum number of tokens to generate in the response." - }, - "temperature": { - "type": "number", - "default": 0.6, - "minimum": 0, - "maximum": 5, - "description": "Controls the randomness of the output; higher values produce more random results." - }, - "top_p": { - "type": "number", - "minimum": 0, - "maximum": 2, - "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." - }, - "top_k": { - "type": "integer", - "minimum": 1, - "maximum": 50, - "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." - }, - "seed": { - "type": "integer", - "minimum": 1, - "maximum": 9999999999, - "description": "Random seed for reproducibility of the generation." - }, - "repetition_penalty": { - "type": "number", - "minimum": 0, - "maximum": 2, - "description": "Penalty for repeated tokens; higher values discourage repetition." - }, - "frequency_penalty": { - "type": "number", - "minimum": 0, - "maximum": 2, - "description": "Decreases the likelihood of the model repeating the same lines verbatim." - }, - "presence_penalty": { - "type": "number", - "minimum": 0, - "maximum": 2, - "description": "Increases the likelihood of the model introducing new topics." - } - }, - "required": ["messages"] - } - ] - }, - "output": { - "oneOf": [ - { - "type": "object", - "contentType": "application/json", - "properties": { - "response": { - "type": "string", - "description": "The generated text response from the model" - }, - "tool_calls": { - "type": "array", - "description": "An array of tool calls requests made during the response generation", - "items": { - "type": "object", - "properties": { - "arguments": { - "type": "object", - "description": "The arguments passed to be passed to the tool call request" - }, - "name": { - "type": "string", - "description": "The name of the tool to be called" - } - } - } - } - } - }, - { - "type": "string", - "contentType": "text/event-stream", - "format": "binary" - } - ] - } - } -} + "description" : "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models. The Llama 3.1 instruction tuned text only models are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.", + "id" : "03e26c2a-a18e-4daf-8616-da9e121eff3e", + "name" : "@cf/meta/llama-3.1-70b-instruct", + "properties" : [ + { + "property_id" : "terms", + "value" : "https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE" + }, + { + "property_id" : "context_window", + "value" : "24000" + } + ], + "schema" : { + "input" : { + "oneOf" : [ + { + "properties" : { + "frequency_penalty" : { + "description" : "Decreases the likelihood of the model repeating the same lines verbatim.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + }, + "image" : { + "oneOf" : [ + { + "description" : "An array of integers that represent the image data constrained to 8-bit unsigned integer values", + "items" : { + "description" : "A value between 0 and 255", + "type" : "number" + }, + "type" : "array" + }, + { + "description" : "Binary string representing the image contents.", + "format" : "binary", + "type" : "string" + } + ] + }, + "lora" : { + "description" : "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.", + "type" : "string" + }, + "max_tokens" : { + "default" : 256, + "description" : "The maximum number of tokens to generate in the response.", + "type" : "integer" + }, + "presence_penalty" : { + "description" : "Increases the likelihood of the model introducing new topics.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + }, + "prompt" : { + "description" : "The input text prompt for the model to generate a response.", + "maxLength" : 131072, + "minLength" : 1, + "type" : "string" + }, + "raw" : { + "default" : false, + "description" : "If true, a chat template is not applied and you must adhere to the specific model's expected formatting.", + "type" : "boolean" + }, + "repetition_penalty" : { + "description" : "Penalty for repeated tokens; higher values discourage repetition.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + }, + "seed" : { + "description" : "Random seed for reproducibility of the generation.", + "maximum" : 9999999999, + "minimum" : 1, + "type" : "integer" + }, + "stream" : { + "default" : false, + "description" : "If true, the response will be streamed back incrementally using SSE, Server Sent Events.", + "type" : "boolean" + }, + "temperature" : { + "default" : 0.6, + "description" : "Controls the randomness of the output; higher values produce more random results.", + "maximum" : 5, + "minimum" : 0, + "type" : "number" + }, + "top_k" : { + "description" : "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.", + "maximum" : 50, + "minimum" : 1, + "type" : "integer" + }, + "top_p" : { + "description" : "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + } + }, + "required" : [ + "prompt" + ], + "title" : "Prompt" + }, + { + "properties" : { + "frequency_penalty" : { + "description" : "Decreases the likelihood of the model repeating the same lines verbatim.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + }, + "functions" : { + "items" : { + "properties" : { + "code" : { + "type" : "string" + }, + "name" : { + "type" : "string" + } + }, + "required" : [ + "name", + "code" + ], + "type" : "object" + }, + "type" : "array" + }, + "image" : { + "oneOf" : [ + { + "description" : "An array of integers that represent the image data constrained to 8-bit unsigned integer values", + "items" : { + "description" : "A value between 0 and 255", + "type" : "number" + }, + "type" : "array" + }, + { + "description" : "Binary string representing the image contents.", + "format" : "binary", + "type" : "string" + } + ] + }, + "max_tokens" : { + "default" : 256, + "description" : "The maximum number of tokens to generate in the response.", + "type" : "integer" + }, + "messages" : { + "description" : "An array of message objects representing the conversation history.", + "items" : { + "properties" : { + "content" : { + "description" : "The content of the message as a string.", + "maxLength" : 131072, + "type" : "string" + }, + "role" : { + "description" : "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').", + "type" : "string" + } + }, + "required" : [ + "role", + "content" + ], + "type" : "object" + }, + "type" : "array" + }, + "presence_penalty" : { + "description" : "Increases the likelihood of the model introducing new topics.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + }, + "repetition_penalty" : { + "description" : "Penalty for repeated tokens; higher values discourage repetition.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + }, + "seed" : { + "description" : "Random seed for reproducibility of the generation.", + "maximum" : 9999999999, + "minimum" : 1, + "type" : "integer" + }, + "stream" : { + "default" : false, + "description" : "If true, the response will be streamed back incrementally.", + "type" : "boolean" + }, + "temperature" : { + "default" : 0.6, + "description" : "Controls the randomness of the output; higher values produce more random results.", + "maximum" : 5, + "minimum" : 0, + "type" : "number" + }, + "tools" : { + "description" : "A list of tools available for the assistant to use.", + "items" : { + "oneOf" : [ + { + "properties" : { + "description" : { + "description" : "A brief description of what the tool does.", + "type" : "string" + }, + "name" : { + "description" : "The name of the tool. More descriptive the better.", + "type" : "string" + }, + "parameters" : { + "description" : "Schema defining the parameters accepted by the tool.", + "properties" : { + "properties" : { + "additionalProperties" : { + "properties" : { + "description" : { + "description" : "A description of the expected parameter.", + "type" : "string" + }, + "type" : { + "description" : "The data type of the parameter.", + "type" : "string" + } + }, + "required" : [ + "type", + "description" + ], + "type" : "object" + }, + "description" : "Definitions of each parameter.", + "type" : "object" + }, + "required" : { + "description" : "List of required parameter names.", + "items" : { + "type" : "string" + }, + "type" : "array" + }, + "type" : { + "description" : "The type of the parameters object (usually 'object').", + "type" : "string" + } + }, + "required" : [ + "type", + "properties" + ], + "type" : "object" + } + }, + "required" : [ + "name", + "description", + "parameters" + ] + }, + { + "properties" : { + "function" : { + "description" : "Details of the function tool.", + "properties" : { + "description" : { + "description" : "A brief description of what the function does.", + "type" : "string" + }, + "name" : { + "description" : "The name of the function.", + "type" : "string" + }, + "parameters" : { + "description" : "Schema defining the parameters accepted by the function.", + "properties" : { + "properties" : { + "additionalProperties" : { + "properties" : { + "description" : { + "description" : "A description of the expected parameter.", + "type" : "string" + }, + "type" : { + "description" : "The data type of the parameter.", + "type" : "string" + } + }, + "required" : [ + "type", + "description" + ], + "type" : "object" + }, + "description" : "Definitions of each parameter.", + "type" : "object" + }, + "required" : { + "description" : "List of required parameter names.", + "items" : { + "type" : "string" + }, + "type" : "array" + }, + "type" : { + "description" : "The type of the parameters object (usually 'object').", + "type" : "string" + } + }, + "required" : [ + "type", + "properties" + ], + "type" : "object" + } + }, + "required" : [ + "name", + "description", + "parameters" + ], + "type" : "object" + }, + "type" : { + "description" : "Specifies the type of tool (e.g., 'function').", + "type" : "string" + } + }, + "required" : [ + "type", + "function" + ] + } + ], + "type" : "object" + }, + "type" : "array" + }, + "top_k" : { + "description" : "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.", + "maximum" : 50, + "minimum" : 1, + "type" : "integer" + }, + "top_p" : { + "description" : "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + } + }, + "required" : [ + "messages" + ], + "title" : "Messages" + } + ], + "type" : "object" + }, + "output" : { + "oneOf" : [ + { + "contentType" : "application/json", + "properties" : { + "response" : { + "description" : "The generated text response from the model", + "type" : "string" + }, + "tool_calls" : { + "description" : "An array of tool calls requests made during the response generation", + "items" : { + "properties" : { + "arguments" : { + "description" : "The arguments passed to be passed to the tool call request", + "type" : "object" + }, + "name" : { + "description" : "The name of the tool to be called", + "type" : "string" + } + }, + "type" : "object" + }, + "type" : "array" + } + }, + "type" : "object" + }, + { + "contentType" : "text/event-stream", + "format" : "binary", + "type" : "string" + } + ] + } + }, + "source" : 1, + "tags" : [], + "task" : { + "description" : "Family of generative text models, such as large language models (LLM), that can be adapted for a variety of natural language tasks.", + "id" : "c329a1f9-323d-4e91-b2aa-582dd4188d34", + "name" : "Text Generation" + } +} \ No newline at end of file diff --git a/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json b/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json index f2ff8fa66601317..b7e6849aaf220d1 100644 --- a/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json +++ b/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json @@ -10,6 +10,10 @@ }, "tags": [], "properties": [ + { + "property_id": "context_window", + "value": "8192" + }, { "property_id": "terms", "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE" diff --git a/src/content/workers-ai-models/llama-3.1-8b-instruct-fast.json b/src/content/workers-ai-models/llama-3.1-8b-instruct-fast.json index faed7b7e05148de..2d8508ba51e1a47 100644 --- a/src/content/workers-ai-models/llama-3.1-8b-instruct-fast.json +++ b/src/content/workers-ai-models/llama-3.1-8b-instruct-fast.json @@ -1,744 +1,417 @@ { - - "id": "872d9af1-5ff8-4e84-aed1-ab3caf909436", - - "source": 1, - - "name": "@cf/meta/llama-3.1-8b-instruct-fast", - - "description": "[Fast version] The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models. The Llama 3.1 instruction tuned text only models are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.", - - "task": { - - "id": "c329a1f9-323d-4e91-b2aa-582dd4188d34", - - "name": "Text Generation", - - "description": "Family of generative text models, such as large language models (LLM), that can be adapted for a variety of natural language tasks." - - }, - - "tags": [], - - "properties": [ - - - { - - "property_id": "terms", - - "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE" - - } - - ], - - "schema": { - - "input": { - - "type": "object", - - "oneOf": [ - - { - - "title": "Prompt", - - "properties": { - - "prompt": { - - "type": "string", - - "minLength": 1, - - "maxLength": 131072, - - "description": "The input text prompt for the model to generate a response." - - }, - - "image": { - - "oneOf": [ - - { - - "type": "array", - - "description": "An array of integers that represent the image data constrained to 8-bit unsigned integer values", - - "items": { - - "type": "number", - - "description": "A value between 0 and 255" - - } - - }, - - { - - "type": "string", - - "format": "binary", - - "description": "Binary string representing the image contents." - - } - - ] - - }, - - "raw": { - - "type": "boolean", - - "default": false, - - "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting." - - }, - - "stream": { - - "type": "boolean", - - "default": false, - - "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events." - - }, - - "max_tokens": { - - "type": "integer", - - "default": 256, - - "description": "The maximum number of tokens to generate in the response." - - }, - - "temperature": { - - "type": "number", - - "default": 0.6, - - "minimum": 0, - - "maximum": 5, - - "description": "Controls the randomness of the output; higher values produce more random results." - - }, - - "top_p": { - - "type": "number", - - "minimum": 0, - - "maximum": 2, - - "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." - - }, - - "top_k": { - - "type": "integer", - - "minimum": 1, - - "maximum": 50, - - "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." - - }, - - "seed": { - - "type": "integer", - - "minimum": 1, - - "maximum": 9999999999, - - "description": "Random seed for reproducibility of the generation." - - }, - - "repetition_penalty": { - - "type": "number", - - "minimum": 0, - - "maximum": 2, - - "description": "Penalty for repeated tokens; higher values discourage repetition." - - }, - - "frequency_penalty": { - - "type": "number", - - "minimum": 0, - - "maximum": 2, - - "description": "Decreases the likelihood of the model repeating the same lines verbatim." - - }, - - "presence_penalty": { - - "type": "number", - - "minimum": 0, - - "maximum": 2, - - "description": "Increases the likelihood of the model introducing new topics." - - }, - - "lora": { - - "type": "string", - - "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model." - - } - - }, - - "required": ["prompt"] - - }, - - { - - "title": "Messages", - - "properties": { - - "messages": { - - "type": "array", - - "description": "An array of message objects representing the conversation history.", - - "items": { - - "type": "object", - - "properties": { - - "role": { - - "type": "string", - - "description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')." - - }, - - "content": { - - "type": "string", - - "maxLength": 131072, - - "description": "The content of the message as a string." - - } - - }, - - "required": ["role", "content"] - - } - - }, - - "image": { - - "oneOf": [ - - { - - "type": "array", - - "description": "An array of integers that represent the image data constrained to 8-bit unsigned integer values", - - "items": { - - "type": "number", - - "description": "A value between 0 and 255" - - } - - }, - - { - - "type": "string", - - "format": "binary", - - "description": "Binary string representing the image contents." - - } - - ] - - }, - - "functions": { - - "type": "array", - - "items": { - - "type": "object", - - "properties": { - - "name": { "type": "string" }, - - "code": { "type": "string" } - - }, - - "required": ["name", "code"] - - } - - }, - - "tools": { - - "type": "array", - - "description": "A list of tools available for the assistant to use.", - - "items": { - - "type": "object", - - "oneOf": [ - - { - - "properties": { - - "name": { - - "type": "string", - - "description": "The name of the tool. More descriptive the better." - - }, - - "description": { - - "type": "string", - - "description": "A brief description of what the tool does." - - }, - - "parameters": { - - "type": "object", - - "description": "Schema defining the parameters accepted by the tool.", - - "properties": { - - "type": { - - "type": "string", - - "description": "The type of the parameters object (usually 'object')." - - }, - - "required": { - - "type": "array", - - "description": "List of required parameter names.", - - "items": { "type": "string" } - - }, - - "properties": { - - "type": "object", - - "description": "Definitions of each parameter.", - - "additionalProperties": { - - "type": "object", - - "properties": { - - "type": { - - "type": "string", - - "description": "The data type of the parameter." - - }, - - "description": { - - "type": "string", - - "description": "A description of the expected parameter." - - } - - }, - - "required": ["type", "description"] - - } - - } - - }, - - "required": ["type", "properties"] - - } - - }, - - "required": ["name", "description", "parameters"] - - }, - - { - - "properties": { - - "type": { - - "type": "string", - - "description": "Specifies the type of tool (e.g., 'function')." - - }, - - "function": { - - "type": "object", - - "description": "Details of the function tool.", - - "properties": { - - "name": { - - "type": "string", - - "description": "The name of the function." - - }, - - "description": { - - "type": "string", - - "description": "A brief description of what the function does." - - }, - - "parameters": { - - "type": "object", - - "description": "Schema defining the parameters accepted by the function.", - - "properties": { - - "type": { - - "type": "string", - - "description": "The type of the parameters object (usually 'object')." - - }, - - "required": { - - "type": "array", - - "description": "List of required parameter names.", - - "items": { "type": "string" } - - }, - - "properties": { - - "type": "object", - - "description": "Definitions of each parameter.", - - "additionalProperties": { - - "type": "object", - - "properties": { - - "type": { - - "type": "string", - - "description": "The data type of the parameter." - - }, - - "description": { - - "type": "string", - - "description": "A description of the expected parameter." - - } - - }, - - "required": ["type", "description"] - - } - - } - - }, - - "required": ["type", "properties"] - - } - - }, - - "required": ["name", "description", "parameters"] - - } - - }, - - "required": ["type", "function"] - - } - - ] - - } - - }, - - "stream": { - - "type": "boolean", - - "default": false, - - "description": "If true, the response will be streamed back incrementally." - - }, - - "max_tokens": { - - "type": "integer", - - "default": 256, - - "description": "The maximum number of tokens to generate in the response." - - }, - - "temperature": { - - "type": "number", - - "default": 0.6, - - "minimum": 0, - - "maximum": 5, - - "description": "Controls the randomness of the output; higher values produce more random results." - - }, - - "top_p": { - - "type": "number", - - "minimum": 0, - - "maximum": 2, - - "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." - - }, - - "top_k": { - - "type": "integer", - - "minimum": 1, - - "maximum": 50, - - "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." - - }, - - "seed": { - - "type": "integer", - - "minimum": 1, - - "maximum": 9999999999, - - "description": "Random seed for reproducibility of the generation." - - }, - - "repetition_penalty": { - - "type": "number", - - "minimum": 0, - - "maximum": 2, - - "description": "Penalty for repeated tokens; higher values discourage repetition." - - }, - - "frequency_penalty": { - - "type": "number", - - "minimum": 0, - - "maximum": 2, - - "description": "Decreases the likelihood of the model repeating the same lines verbatim." - - }, - - "presence_penalty": { - - "type": "number", - - "minimum": 0, - - "maximum": 2, - - "description": "Increases the likelihood of the model introducing new topics." - - } - - }, - - "required": ["messages"] - - } - - ] - - }, - - "output": { - - "oneOf": [ - - { - - "type": "object", - - "contentType": "application/json", - - "properties": { - - "response": { - - "type": "string", - - "description": "The generated text response from the model" - - }, - - "tool_calls": { - - "type": "array", - - "description": "An array of tool calls requests made during the response generation", - - "items": { - - "type": "object", - - "properties": { - - "arguments": { - - "type": "object", - - "description": "The arguments passed to be passed to the tool call request" - - }, - - "name": { - - "type": "string", - - "description": "The name of the tool to be called" - - } - - } - - } - - } - - } - - }, - - { - - "type": "string", - - "contentType": "text/event-stream", - - "format": "binary" - - } - - ] - - } - - } - + "description" : "[Fast version] The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models. The Llama 3.1 instruction tuned text only models are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.", + "id" : "872d9af1-5ff8-4e84-aed1-ab3caf909436", + "name" : "@cf/meta/llama-3.1-8b-instruct-fast", + "properties" : [ + { + "property_id" : "context_window", + "value" : "128000" + }, + { + "property_id" : "terms", + "value" : "https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE" + } + ], + "schema" : { + "input" : { + "oneOf" : [ + { + "properties" : { + "frequency_penalty" : { + "description" : "Decreases the likelihood of the model repeating the same lines verbatim.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + }, + "image" : { + "oneOf" : [ + { + "description" : "An array of integers that represent the image data constrained to 8-bit unsigned integer values", + "items" : { + "description" : "A value between 0 and 255", + "type" : "number" + }, + "type" : "array" + }, + { + "description" : "Binary string representing the image contents.", + "format" : "binary", + "type" : "string" + } + ] + }, + "lora" : { + "description" : "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.", + "type" : "string" + }, + "max_tokens" : { + "default" : 256, + "description" : "The maximum number of tokens to generate in the response.", + "type" : "integer" + }, + "presence_penalty" : { + "description" : "Increases the likelihood of the model introducing new topics.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + }, + "prompt" : { + "description" : "The input text prompt for the model to generate a response.", + "maxLength" : 131072, + "minLength" : 1, + "type" : "string" + }, + "raw" : { + "default" : false, + "description" : "If true, a chat template is not applied and you must adhere to the specific model's expected formatting.", + "type" : "boolean" + }, + "repetition_penalty" : { + "description" : "Penalty for repeated tokens; higher values discourage repetition.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + }, + "seed" : { + "description" : "Random seed for reproducibility of the generation.", + "maximum" : 9999999999, + "minimum" : 1, + "type" : "integer" + }, + "stream" : { + "default" : false, + "description" : "If true, the response will be streamed back incrementally using SSE, Server Sent Events.", + "type" : "boolean" + }, + "temperature" : { + "default" : 0.6, + "description" : "Controls the randomness of the output; higher values produce more random results.", + "maximum" : 5, + "minimum" : 0, + "type" : "number" + }, + "top_k" : { + "description" : "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.", + "maximum" : 50, + "minimum" : 1, + "type" : "integer" + }, + "top_p" : { + "description" : "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + } + }, + "required" : [ + "prompt" + ], + "title" : "Prompt" + }, + { + "properties" : { + "frequency_penalty" : { + "description" : "Decreases the likelihood of the model repeating the same lines verbatim.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + }, + "functions" : { + "items" : { + "properties" : { + "code" : { + "type" : "string" + }, + "name" : { + "type" : "string" + } + }, + "required" : [ + "name", + "code" + ], + "type" : "object" + }, + "type" : "array" + }, + "image" : { + "oneOf" : [ + { + "description" : "An array of integers that represent the image data constrained to 8-bit unsigned integer values", + "items" : { + "description" : "A value between 0 and 255", + "type" : "number" + }, + "type" : "array" + }, + { + "description" : "Binary string representing the image contents.", + "format" : "binary", + "type" : "string" + } + ] + }, + "max_tokens" : { + "default" : 256, + "description" : "The maximum number of tokens to generate in the response.", + "type" : "integer" + }, + "messages" : { + "description" : "An array of message objects representing the conversation history.", + "items" : { + "properties" : { + "content" : { + "description" : "The content of the message as a string.", + "maxLength" : 131072, + "type" : "string" + }, + "role" : { + "description" : "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').", + "type" : "string" + } + }, + "required" : [ + "role", + "content" + ], + "type" : "object" + }, + "type" : "array" + }, + "presence_penalty" : { + "description" : "Increases the likelihood of the model introducing new topics.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + }, + "repetition_penalty" : { + "description" : "Penalty for repeated tokens; higher values discourage repetition.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + }, + "seed" : { + "description" : "Random seed for reproducibility of the generation.", + "maximum" : 9999999999, + "minimum" : 1, + "type" : "integer" + }, + "stream" : { + "default" : false, + "description" : "If true, the response will be streamed back incrementally.", + "type" : "boolean" + }, + "temperature" : { + "default" : 0.6, + "description" : "Controls the randomness of the output; higher values produce more random results.", + "maximum" : 5, + "minimum" : 0, + "type" : "number" + }, + "tools" : { + "description" : "A list of tools available for the assistant to use.", + "items" : { + "oneOf" : [ + { + "properties" : { + "description" : { + "description" : "A brief description of what the tool does.", + "type" : "string" + }, + "name" : { + "description" : "The name of the tool. More descriptive the better.", + "type" : "string" + }, + "parameters" : { + "description" : "Schema defining the parameters accepted by the tool.", + "properties" : { + "properties" : { + "additionalProperties" : { + "properties" : { + "description" : { + "description" : "A description of the expected parameter.", + "type" : "string" + }, + "type" : { + "description" : "The data type of the parameter.", + "type" : "string" + } + }, + "required" : [ + "type", + "description" + ], + "type" : "object" + }, + "description" : "Definitions of each parameter.", + "type" : "object" + }, + "required" : { + "description" : "List of required parameter names.", + "items" : { + "type" : "string" + }, + "type" : "array" + }, + "type" : { + "description" : "The type of the parameters object (usually 'object').", + "type" : "string" + } + }, + "required" : [ + "type", + "properties" + ], + "type" : "object" + } + }, + "required" : [ + "name", + "description", + "parameters" + ] + }, + { + "properties" : { + "function" : { + "description" : "Details of the function tool.", + "properties" : { + "description" : { + "description" : "A brief description of what the function does.", + "type" : "string" + }, + "name" : { + "description" : "The name of the function.", + "type" : "string" + }, + "parameters" : { + "description" : "Schema defining the parameters accepted by the function.", + "properties" : { + "properties" : { + "additionalProperties" : { + "properties" : { + "description" : { + "description" : "A description of the expected parameter.", + "type" : "string" + }, + "type" : { + "description" : "The data type of the parameter.", + "type" : "string" + } + }, + "required" : [ + "type", + "description" + ], + "type" : "object" + }, + "description" : "Definitions of each parameter.", + "type" : "object" + }, + "required" : { + "description" : "List of required parameter names.", + "items" : { + "type" : "string" + }, + "type" : "array" + }, + "type" : { + "description" : "The type of the parameters object (usually 'object').", + "type" : "string" + } + }, + "required" : [ + "type", + "properties" + ], + "type" : "object" + } + }, + "required" : [ + "name", + "description", + "parameters" + ], + "type" : "object" + }, + "type" : { + "description" : "Specifies the type of tool (e.g., 'function').", + "type" : "string" + } + }, + "required" : [ + "type", + "function" + ] + } + ], + "type" : "object" + }, + "type" : "array" + }, + "top_k" : { + "description" : "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.", + "maximum" : 50, + "minimum" : 1, + "type" : "integer" + }, + "top_p" : { + "description" : "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.", + "maximum" : 2, + "minimum" : 0, + "type" : "number" + } + }, + "required" : [ + "messages" + ], + "title" : "Messages" + } + ], + "type" : "object" + }, + "output" : { + "oneOf" : [ + { + "contentType" : "application/json", + "properties" : { + "response" : { + "description" : "The generated text response from the model", + "type" : "string" + }, + "tool_calls" : { + "description" : "An array of tool calls requests made during the response generation", + "items" : { + "properties" : { + "arguments" : { + "description" : "The arguments passed to be passed to the tool call request", + "type" : "object" + }, + "name" : { + "description" : "The name of the tool to be called", + "type" : "string" + } + }, + "type" : "object" + }, + "type" : "array" + } + }, + "type" : "object" + }, + { + "contentType" : "text/event-stream", + "format" : "binary", + "type" : "string" + } + ] + } + }, + "source" : 1, + "tags" : [], + "task" : { + "description" : "Family of generative text models, such as large language models (LLM), that can be adapted for a variety of natural language tasks.", + "id" : "c329a1f9-323d-4e91-b2aa-582dd4188d34", + "name" : "Text Generation" + } } \ No newline at end of file diff --git a/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json b/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json index 75c364934e8ac84..0c7ef40b5bd0bea 100644 --- a/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json +++ b/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json @@ -10,6 +10,10 @@ }, "tags": [], "properties": [ + { + "property_id": "context_window", + "value": "32000" + }, { "property_id": "terms", "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE" diff --git a/src/content/workers-ai-models/llama-3.1-8b-instruct.json b/src/content/workers-ai-models/llama-3.1-8b-instruct.json index 6fd36b59bb577e9..cc529285a8becb0 100644 --- a/src/content/workers-ai-models/llama-3.1-8b-instruct.json +++ b/src/content/workers-ai-models/llama-3.1-8b-instruct.json @@ -10,6 +10,10 @@ }, "tags": [], "properties": [ + { + "property_id": "context_window", + "value": "7968" + }, { "property_id": "terms", "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE" diff --git a/src/content/workers-ai-models/llama-3.2-1b-instruct.json b/src/content/workers-ai-models/llama-3.2-1b-instruct.json index eb35aa6e24a8848..2aacfcac2107dad 100644 --- a/src/content/workers-ai-models/llama-3.2-1b-instruct.json +++ b/src/content/workers-ai-models/llama-3.2-1b-instruct.json @@ -10,6 +10,10 @@ }, "tags": [], "properties": [ + { + "property_id": "context_window", + "value": "128000" + }, { "property_id": "terms", "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE" diff --git a/src/content/workers-ai-models/llama-3.2-3b-instruct.json b/src/content/workers-ai-models/llama-3.2-3b-instruct.json index b215a7c281f1656..84514121cfda5be 100644 --- a/src/content/workers-ai-models/llama-3.2-3b-instruct.json +++ b/src/content/workers-ai-models/llama-3.2-3b-instruct.json @@ -10,6 +10,10 @@ }, "tags": [], "properties": [ + { + "property_id": "context_window", + "value": "128000" + }, { "property_id": "terms", "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE" diff --git a/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json b/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json index 395207fa51f5785..bfa1c5e0d2cafe0 100644 --- a/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json +++ b/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json @@ -10,6 +10,10 @@ }, "tags": [], "properties": [ + { + "property_id": "context_window", + "value": "24000" + }, { "property_id": "terms", "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/LICENSE" diff --git a/src/content/workers-ai-models/llama-guard-3-8b.json b/src/content/workers-ai-models/llama-guard-3-8b.json new file mode 100644 index 000000000000000..ae4e67a90d4e325 --- /dev/null +++ b/src/content/workers-ai-models/llama-guard-3-8b.json @@ -0,0 +1,120 @@ +{ + "id": "cc80437b-9a8d-4f1a-9c77-9aaf0d226922", + "source": 1, + "name": "@cf/meta/llama-guard-3-8b", + "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.", + "task": { + "id": "c329a1f9-323d-4e91-b2aa-582dd4188d34", + "name": "Text Generation", + "description": "Family of generative text models, such as large language models (LLM), that can be adapted for a variety of natural language tasks." + }, + "tags": [], + "properties": [], + "schema": { + "input": { + "type": "object", + "properties": { + "messages": { + "type": "array", + "description": "An array of message objects representing the conversation history.", + "items": { + "type": "object", + "properties": { + "role": { + "type": "string", + "description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')." + }, + "content": { + "type": "string", + "maxLength": 131072, + "description": "The content of the message as a string." + } + }, + "required": [ + "role", + "content" + ] + } + }, + "max_tokens": { + "type": "integer", + "default": 256, + "description": "The maximum number of tokens to generate in the response." + }, + "temperature": { + "type": "number", + "default": 0.6, + "minimum": 0, + "maximum": 5, + "description": "Controls the randomness of the output; higher values produce more random results." + }, + "response_format": { + "type": "object", + "description": "Dictate the output format of the generated response.", + "properties": { + "type": { + "type": "string", + "description": "Set to json_object to process and output generated text as JSON." + } + } + } + }, + "required": [ + "messages" + ] + }, + "output": { + "type": "object", + "contentType": "application/json", + "properties": { + "response": { + "oneOf": [ + { + "type": "string", + "description": "The generated text response from the model." + }, + { + "type": "object", + "description": "The json response parsed from the generated text response from the model.", + "properties": { + "safe": { + "type": "boolean", + "description": "Whether the conversation is safe or not." + }, + "categories": { + "type": "array", + "description": "A list of what hazard categories predicted for the conversation, if the conversation is deemed unsafe.", + "items": { + "type": "string", + "description": "Hazard category classname, from S1 to S14." + } + } + } + } + ] + }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + } + } + } + } +} \ No newline at end of file diff --git a/src/content/workers-ai-models/llamaguard-7b-awq.json b/src/content/workers-ai-models/llamaguard-7b-awq.json index b1545f9c397d103..0ba6dae8c14358c 100644 --- a/src/content/workers-ai-models/llamaguard-7b-awq.json +++ b/src/content/workers-ai-models/llamaguard-7b-awq.json @@ -13,6 +13,10 @@ { "property_id": "beta", "value": "true" + }, + { + "property_id": "context_window", + "value": "4096" } ], "schema": { diff --git a/src/content/workers-ai-models/meta-llama-3-8b-instruct.json b/src/content/workers-ai-models/meta-llama-3-8b-instruct.json index e6ff5fab2a79289..be5e832aae9c10e 100644 --- a/src/content/workers-ai-models/meta-llama-3-8b-instruct.json +++ b/src/content/workers-ai-models/meta-llama-3-8b-instruct.json @@ -9,7 +9,12 @@ "description": "Family of generative text models, such as large language models (LLM), that can be adapted for a variety of natural language tasks." }, "tags": [], - "properties": [], + "properties": [ + { + "property_id": "context_window", + "value": "8192" + } + ], "schema": { "input": { "type": "object", diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json index 0ceab1d96b92309..f5f366aca8acce7 100644 --- a/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json +++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "4096" + }, { "property_id": "info", "value": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-AWQ" diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json index f8a2f3981c42f4c..76b41cb89e42565 100644 --- a/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json +++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "false" }, + { + "property_id": "context_window", + "value": "2824" + }, { "property_id": "info", "value": "https://mistral.ai/news/announcing-mistral-7b/" diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json index 61413c647baa615..b88c25fe4d8cf6c 100644 --- a/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json +++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "15000" + }, { "property_id": "lora", "value": "true" diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json index 42e65e43f92b787..85bbc62cc8dbc66 100644 --- a/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json +++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "3072" + }, { "property_id": "info", "value": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2" diff --git a/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json b/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json index 8234317b0b0f528..8fd28ad72c16eb9 100644 --- a/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json +++ b/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json @@ -13,6 +13,10 @@ { "property_id": "beta", "value": "true" + }, + { + "property_id": "context_window", + "value": "4096" } ], "schema": { diff --git a/src/content/workers-ai-models/openchat-3.5-0106.json b/src/content/workers-ai-models/openchat-3.5-0106.json index cbb7c0dae5a1ca9..a77546f08ec33e6 100644 --- a/src/content/workers-ai-models/openchat-3.5-0106.json +++ b/src/content/workers-ai-models/openchat-3.5-0106.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "8192" + }, { "property_id": "info", "value": "https://huggingface.co/openchat/openchat-3.5-0106" diff --git a/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json b/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json index c35017e2b0cdf65..b6893b29022945e 100644 --- a/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json +++ b/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json @@ -13,6 +13,10 @@ { "property_id": "beta", "value": "true" + }, + { + "property_id": "context_window", + "value": "4096" } ], "schema": { diff --git a/src/content/workers-ai-models/phi-2.json b/src/content/workers-ai-models/phi-2.json index bc41a76631604a0..9f97cdaa899e9d5 100644 --- a/src/content/workers-ai-models/phi-2.json +++ b/src/content/workers-ai-models/phi-2.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "2048" + }, { "property_id": "info", "value": "https://huggingface.co/microsoft/phi-2" diff --git a/src/content/workers-ai-models/qwen1.5-0.5b-chat.json b/src/content/workers-ai-models/qwen1.5-0.5b-chat.json index a1d6ceaa1fdaf93..a98f325d0b7a672 100644 --- a/src/content/workers-ai-models/qwen1.5-0.5b-chat.json +++ b/src/content/workers-ai-models/qwen1.5-0.5b-chat.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "32000" + }, { "property_id": "info", "value": "https://huggingface.co/qwen/qwen1.5-0.5b-chat" diff --git a/src/content/workers-ai-models/qwen1.5-1.8b-chat.json b/src/content/workers-ai-models/qwen1.5-1.8b-chat.json index 97977017e6fe361..ea9075487713f22 100644 --- a/src/content/workers-ai-models/qwen1.5-1.8b-chat.json +++ b/src/content/workers-ai-models/qwen1.5-1.8b-chat.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "32000" + }, { "property_id": "info", "value": "https://huggingface.co/qwen/qwen1.5-1.8b-chat" diff --git a/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json b/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json index bff46b051e4f923..00fc17848f93a8c 100644 --- a/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json +++ b/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "7500" + }, { "property_id": "info", "value": "https://huggingface.co/qwen/qwen1.5-14b-chat-awq" diff --git a/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json b/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json index c17d98d917d4f58..628664271455465 100644 --- a/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json +++ b/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "20000" + }, { "property_id": "info", "value": "https://huggingface.co/qwen/qwen1.5-7b-chat-awq" diff --git a/src/content/workers-ai-models/sqlcoder-7b-2.json b/src/content/workers-ai-models/sqlcoder-7b-2.json index 66f87190c937759..f81efdac4d23b9d 100644 --- a/src/content/workers-ai-models/sqlcoder-7b-2.json +++ b/src/content/workers-ai-models/sqlcoder-7b-2.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "10000" + }, { "property_id": "info", "value": "https://huggingface.co/defog/sqlcoder-7b-2" diff --git a/src/content/workers-ai-models/starling-lm-7b-beta.json b/src/content/workers-ai-models/starling-lm-7b-beta.json index 9ff393649d46f26..6a94fffb23018e5 100644 --- a/src/content/workers-ai-models/starling-lm-7b-beta.json +++ b/src/content/workers-ai-models/starling-lm-7b-beta.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "4096" + }, { "property_id": "info", "value": "https://huggingface.co/Nexusflow/Starling-LM-7B-beta" diff --git a/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json b/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json index 8f99e0d348bc316..3dc1b3a4cd04d06 100644 --- a/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json +++ b/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "2048" + }, { "property_id": "info", "value": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0" diff --git a/src/content/workers-ai-models/uform-gen2-qwen-500m.json b/src/content/workers-ai-models/uform-gen2-qwen-500m.json index 0400ed70b674896..a4e635d6eeea244 100644 --- a/src/content/workers-ai-models/uform-gen2-qwen-500m.json +++ b/src/content/workers-ai-models/uform-gen2-qwen-500m.json @@ -30,10 +30,6 @@ { "type": "object", "properties": { - "temperature": { - "type": "number", - "description": "Controls the randomness of the output; higher values produce more random results." - }, "prompt": { "type": "string", "description": "The input text prompt for the model to generate a response." diff --git a/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json b/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json index 88fbe3646b7936e..337040b66a4a635 100644 --- a/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json +++ b/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json @@ -13,6 +13,10 @@ { "property_id": "beta", "value": "true" + }, + { + "property_id": "context_window", + "value": "15000" } ], "schema": { diff --git a/src/content/workers-ai-models/zephyr-7b-beta-awq.json b/src/content/workers-ai-models/zephyr-7b-beta-awq.json index 0685aa03b4d3014..e49f0132f46625f 100644 --- a/src/content/workers-ai-models/zephyr-7b-beta-awq.json +++ b/src/content/workers-ai-models/zephyr-7b-beta-awq.json @@ -14,6 +14,10 @@ "property_id": "beta", "value": "true" }, + { + "property_id": "context_window", + "value": "4096" + }, { "property_id": "info", "value": "https://huggingface.co/TheBloke/zephyr-7B-beta-AWQ" diff --git a/src/pages/workers-ai/models/[name].astro b/src/pages/workers-ai/models/[name].astro index 465bddbf50c60ee..f82cb16797e5b83 100644 --- a/src/pages/workers-ai/models/[name].astro +++ b/src/pages/workers-ai/models/[name].astro @@ -6,7 +6,7 @@ import StarlightPage, { } from "@astrojs/starlight/components/StarlightPage.astro"; import { LinkButton, Tabs, TabItem, Code, Aside, Badge } from "~/components"; import ModelInfo from "~/components/models/ModelInfo.tsx"; -import ModelBadges from "~/components/models/ModelBadges.tsx"; +import ModelFeatures from "~/components/models/ModelFeatures.tsx"; import SchemaViewer from "~/components/models/SchemaViewer.astro"; import TextGenerationCode from "~/components/models/code/TextGenerationCode.astro"; @@ -94,7 +94,6 @@ if (model.name === "@cf/openai/whisper-large-v3-turbo") { } const description = model.description; -const terms = model.properties.find((x) => x.property_id === "terms"); const isBeta = model.properties.find( ({ property_id, value }) => property_id === "beta" && value === "true", @@ -154,8 +153,6 @@ const starlightPageProps = { {model.name}

{description}

- {terms && Terms and License} - { model.name === "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b" && ( @@ -198,7 +195,9 @@ const starlightPageProps = { /> ) - } + } + + { hasPlayground && (