diff --git a/gptel-anthropic.el b/gptel-anthropic.el index 9dd2214f..975c6262 100644 --- a/gptel-anthropic.el +++ b/gptel-anthropic.el @@ -468,45 +468,58 @@ files in the context." ;; (t context-string))) ;; (plist-get (car (last prompts)) :content))) +(defconst gptel--anthropic-cost-url "https://docs.anthropic.com/en/docs/about-claude/models#model-comparison-table" + "Web page for current Anthropic model pricing.") + (defconst gptel--anthropic-models - '((claude-3-7-sonnet-20250219 + `((claude-3-7-sonnet-20250219 :description "Hybrid model capable of standard thinking and extended thinking modes" :capabilities (media tool-use cache) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp" "application/pdf") :context-window 200 + :max-output 8192 :input-cost 3 :output-cost 15 + :cost-url ,gptel--anthropic-cost-url :cutoff-date "2025-02") (claude-3-5-sonnet-20241022 :description "Highest level of intelligence and capability" :capabilities (media tool-use cache) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp" "application/pdf") :context-window 200 + :max-output 8192 :input-cost 3 :output-cost 15 + :cost-url ,gptel--anthropic-cost-url :cutoff-date "2024-04") (claude-3-5-sonnet-20240620 :description "Highest level of intelligence and capability (earlier version)" :capabilities (media tool-use cache) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 200 + :max-output 8192 :input-cost 3 :output-cost 15 + :cost-url ,gptel--anthropic-cost-url :cutoff-date "2024-04") (claude-3-5-haiku-20241022 :description "Intelligence at blazing speeds" :capabilities (tool-use cache) :context-window 200 + :max-output 8192 :input-cost 1.00 :output-cost 5.00 + :cost-url ,gptel--anthropic-cost-url :cutoff-date "2024-07") (claude-3-opus-20240229 :description "Top-level performance, intelligence, fluency, and understanding" :capabilities (media tool-use cache) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 200 + :max-output 4096 :input-cost 15 :output-cost 75 + :cost-url ,gptel--anthropic-cost-url :cutoff-date "2023-08") (claude-3-sonnet-20240229 :description "Balance of intelligence and speed (legacy model)" @@ -515,13 +528,16 @@ files in the context." :context-window 200 :input-cost 3 :output-cost 15 + :cost-url ,gptel--anthropic-cost-url :cutoff-date "2023-08") (claude-3-haiku-20240307 :description "Fast and most compact model for near-instant responsiveness" :capabilities (tool-use cache) :context-window 200 + :max-output 4096 :input-cost 0.25 :output-cost 1.25 + :cost-url ,gptel--anthropic-cost-url :cutoff-date "2023-08")) "List of available Anthropic models and associated properties. Keys: @@ -534,10 +550,14 @@ Keys: - `:context-window': the context window size, in thousands of tokens. +- `:max-output': the maximum number of output tokens. + - `:input-cost': the input cost, in US dollars per million tokens. - `:output-cost': the output cost, in US dollars per million tokens. +- `:cost-url': web page for current model pricing. + - `:cutoff-date': the knowledge cutoff date. - `:request-params': a plist of additional request parameters to diff --git a/gptel-gemini.el b/gptel-gemini.el index aa75c82f..cb291513 100644 --- a/gptel-gemini.el +++ b/gptel-gemini.el @@ -313,16 +313,21 @@ files in the context." current)) (plist-get (car (last prompts)) :parts)))) +(defconst gptel--gemini-cost-url "https://ai.google.dev/pricing" + "Web page for current Gemini model pricing.") + (defconst gptel--gemini-models - '((gemini-1.5-pro-latest + `((gemini-1.5-pro-latest :description "Google's latest model with enhanced capabilities across various tasks" :capabilities (tool-use json media) :mime-types ("image/png" "image/jpeg" "image/webp" "image/heic" "image/heif" "application/pdf" "text/plain" "text/csv" "text/html") :context-window 2000 + :max-output 8192 ;; input & output price is halved for prompts of 128k tokens or less :input-cost 2.50 :output-cost 10 + :cost-url ,gptel--gemini-cost-url :cutoff-date "2024-05") (gemini-2.0-flash-exp :description "Next generation features, superior speed, native tool use" @@ -337,19 +342,23 @@ files in the context." :mime-types ("image/png" "image/jpeg" "image/webp" "image/heic" "image/heif" "application/pdf" "text/plain" "text/csv" "text/html") :context-window 1000 + :max-output 8192 ;; input & output price is halved for prompts of 128k tokens or less :input-cost 0.15 :output-cost 0.60 + :cost-url ,gptel--gemini-cost-url :cutoff-date "2024-05") (gemini-1.5-flash-8b :description "High volume and lower intelligence tasks" :capabilities (tool-use json media) :context-window 1000 + :max-output 8192 :mime-types ("image/png" "image/jpeg" "image/webp" "image/heic" "image/heif" "application/pdf" "text/plain" "text/csv" "text/html") ;; input & output price is halved for prompts of 128k tokens or less :input-cost 0.075 :output-cost 0.30 + :cost-url ,gptel--gemini-cost-url :cutoff-date "2024-10") (gemini-exp-1206 :description "Improved coding, reasoning and vision capabilities" @@ -363,8 +372,10 @@ files in the context." :mime-types ("image/png" "image/jpeg" "image/webp" "image/heic" "image/heif" "application/pdf" "text/plain" "text/csv" "text/html") :context-window 1000 + :max-output 8192 :input-cost 0.10 :output-cost 0.40 + :cost-url ,gptel--gemini-cost-url :cutoff-date "2024-08") (gemini-2.0-flash-lite-preview-02-05 :description "Gemini 2.0 Flash model optimized for cost efficiency and low latency" @@ -372,6 +383,7 @@ files in the context." :context-window 1000 :input-cost 0.075 :output-cost 0.30 + :cost-url ,gptel--gemini-cost-url :cutoff-date "2024-08") (gemini-2.0-pro-exp-02-05 :description "Next gen, high speed, multimodal for a diverse variety of tasks" @@ -379,12 +391,14 @@ files in the context." :context-window 2000 :input-cost 0.00 :output-cost 0.00 + :cost-url ,gptel--gemini-cost-url :cutoff-date "2024-08") (gemini-2.0-flash-thinking-exp-01-21 :description "Next gen, high speed, multimodal for a diverse variety of tasks" :capabilities (json) :input-cost 0.00 :output-cost 0.00 + :cost-url ,gptel--gemini-cost-url :cutoff-date "2024-08") (gemini-2.0-flash-exp :description "Multi-modal, streaming, tool use 2000 RPM" @@ -394,6 +408,7 @@ files in the context." :context-window 1000 :input-cost 0.00 :output-cost 0.00 + :cost-url ,gptel--gemini-cost-url :cutoff-date "2024-08") (gemini-2.0-flash-thinking-exp :description "DEPRECATED: Please use gemini-2.0-flash-thinking-exp-01-21 instead." @@ -413,10 +428,14 @@ Keys: - `:context-window': the context window size, in thousands of tokens. +- `:max-output': maximum number of output tokens. + - `:input-cost': the input cost, in US dollars per million tokens. - `:output-cost': the output cost, in US dollars per million tokens. +- `:cost-url': web page for current model pricing. + - `:cutoff-date': the knowledge cutoff date. - `:request-params': a plist of additional request parameters to diff --git a/gptel-openai-extras.el b/gptel-openai-extras.el index c9e06307..e17a1f30 100644 --- a/gptel-openai-extras.el +++ b/gptel-openai-extras.el @@ -339,12 +339,16 @@ The Deepseek API requires strictly alternating roles (user/assistant) in message (models '((deepseek-reasoner :capabilities (tool reasoning) :context-window 64 + :max-output 8192 :input-cost 0.55 - :output-cost 2.19) + :output-cost 2.19 + :cost-url "https://api-docs.deepseek.com/quick_start/pricing/") (deepseek-chat :capabilities (tool) :context-window 64 + :max-output 8192 :input-cost 0.27 + :cost-url "https://api-docs.deepseek.com/quick_start/pricing/" :output-cost 1.10)))) "Register a DeepSeek backend for gptel with NAME." (declare (indent 1)) diff --git a/gptel-transient.el b/gptel-transient.el index 24dce163..e316a6d4 100644 --- a/gptel-transient.el +++ b/gptel-transient.el @@ -576,6 +576,13 @@ Also format its value in the Transient menu." (gptel--infix-context-add-file) (gptel--infix-context-remove-all) (gptel--suffix-context-buffer)] + ["" "Info" + ("h" "Describe model" + (lambda () + "Describe the current gptel model." + (interactive) + (describe-gptel-model gptel-backend gptel-model)) + :transient t)] [:pad-keys t :if (lambda () (and gptel-use-tools gptel--known-tools)) "" (:info diff --git a/gptel.el b/gptel.el index e8f5e140..171aca02 100644 --- a/gptel.el +++ b/gptel.el @@ -520,14 +520,18 @@ the same as t." (defvar gptel--known-backends) +(defconst gptel--openai-cost-url "https://platform.openai.com/docs/pricing" + "Web page for current OpenAI model pricing.") + (defconst gptel--openai-models - '((gpt-4o + `((gpt-4o :description "Advanced model for complex tasks; cheaper & faster than GPT-Turbo" :capabilities (media tool-use json url) :mime-types ("image/jpeg" "image/png" "image/gif" "image/webp") :context-window 128 :input-cost 2.50 :output-cost 10 + :cost-url ,gptel--openai-cost-url :cutoff-date "2023-10") (gpt-4o-mini :description "Cheap model for fast tasks; cheaper & more capable than GPT-3.5 Turbo" @@ -536,6 +540,7 @@ the same as t." :context-window 128 :input-cost 0.15 :output-cost 0.60 + :cost-url ,gptel--openai-cost-url :cutoff-date "2023-10") (gpt-4-turbo :description "Previous high-intelligence model" @@ -544,6 +549,7 @@ the same as t." :context-window 128 :input-cost 10 :output-cost 30 + :cost-url ,gptel--openai-cost-url :cutoff-date "2023-12") ;; points to gpt-4-0613 (gpt-4 @@ -553,6 +559,7 @@ the same as t." :context-window 8.192 :input-cost 30 :output-cost 60 + :cost-url ,gptel--openai-cost-url :cutoff-date "2023-09") (gpt-4-turbo-preview :description "Points to gpt-4-0125-preview" @@ -561,6 +568,7 @@ the same as t." :context-window 128 :input-cost 10 :output-cost 30 + :cost-url ,gptel--openai-cost-url :cutoff-date "2023-12") (gpt-4-0125-preview :description "GPT-4 Turbo preview model intended to reduce cases of “laziness”" @@ -569,6 +577,7 @@ the same as t." :context-window 128 :input-cost 10 :output-cost 30 + :cost-url ,gptel--openai-cost-url :cutoff-date "2023-12") (gpt-4.5-preview :description "Largest and most capable GPT model to date" @@ -577,6 +586,7 @@ the same as t." :context-window 128 :input-cost 75 :output-cost 150 + :cost-url ,gptel--openai-cost-url :cutoff-date "2023-10") (o1 :description "Reasoning model designed to solve hard problems across domains" @@ -586,6 +596,7 @@ the same as t." :input-cost 15 :output-cost 60 :cutoff-date "2023-10" + :cost-url ,gptel--openai-cost-url :request-params (:stream :json-false)) (o1-preview :description "DEPRECATED: PLEASE USE o1" @@ -594,6 +605,7 @@ the same as t." :context-window 128 :input-cost 15 :output-cost 60 + :cost-url ,gptel--openai-cost-url :cutoff-date "2023-10" :capabilities (nosystem reasoning) :request-params (:stream :json-false)) @@ -602,6 +614,7 @@ the same as t." :context-window 128 :input-cost 3 :output-cost 12 + :cost-url ,gptel--openai-cost-url :cutoff-date "2023-10" :capabilities (nosystem reasoning) :request-params (:stream :json-false)) @@ -610,6 +623,7 @@ the same as t." :context-window 200 :input-cost 3 :output-cost 12 + :cost-url ,gptel--openai-cost-url :cutoff-date "2023-10" :capabilities (nosystem reasoning) :request-params (:stream :json-false)) @@ -617,13 +631,15 @@ the same as t." (gpt-4-32k :capabilities (tool-use) :input-cost 60 - :output-cost 120) + :output-cost 120 + :cost-url ,gptel--openai-cost-url) (gpt-4-1106-preview :description "Preview model with improved function calling support" :capabilities (tool-use) :context-window 128 :input-cost 10 :output-cost 30 + :cost-url ,gptel--openai-cost-url :cutoff-date "2023-04") (gpt-3.5-turbo :description "More expensive & less capable than GPT-4o-mini; use that instead" @@ -631,6 +647,7 @@ the same as t." :context-window 16.358 :input-cost 0.50 :output-cost 1.50 + :cost-url ,gptel--openai-cost-url :cutoff-date "2021-09") (gpt-3.5-turbo-16k :description "More expensive & less capable than GPT-4o-mini; use that instead" @@ -638,6 +655,7 @@ the same as t." :context-window 16.385 :input-cost 3 :output-cost 4 + :cost-url ,gptel--openai-cost-url :cutoff-date "2021-09")) "List of available OpenAI models and associated properties. Keys: @@ -654,6 +672,8 @@ Keys: - `:output-cost': the output cost, in US dollars per million tokens. +- `:cost-url': web page for current model pricing. + - `:cutoff-date': the knowledge cutoff date. - `:request-params': a plist of additional request parameters to @@ -1442,6 +1462,78 @@ file." (declare-function gptel-context--wrap "gptel-context") +;; See (info "(elisp) Emacs Lisp Coding Conventions") for justification +;; for not prefixing this function name with `gptel-'. +(defun describe-gptel-model (backend model) + "Display a description of a gptel model MODEL of BACKEND." + (interactive + (nthcdr 1 + (cl-loop + with default = (concat (gptel-backend-name gptel-backend) ":" + (gptel--model-name gptel-model)) + for (name . backend) in gptel--known-backends + nconc (cl-loop for model in (gptel-backend-models backend) + collect (list (concat name ":" + (gptel--model-name model)) + backend model)) + into models-alist + finally return (assoc (completing-read + (format-prompt "Describe gptel model" default) + models-alist nil t nil nil + default) models-alist)))) + (help-setup-xref (list #'describe-gptel-model backend model) + (called-interactively-p 'interactive)) + (with-help-window (help-buffer) + (with-current-buffer standard-output + (cl-flet ((sect (name val) + (insert (make-string (max 0 + (- 18 (string-width name))) + ?\s) + (concat name ": ")) + (insert (if (listp val) + (string-join (mapcar + #'(lambda (s) + (if (symbolp s) + (symbol-name s) + s)) + val) ", ") + val)) + (insert "\n"))) + (let* ((stream (gptel-backend-stream backend)) + (desc (get model :description)) + (caps (get model :capabilities)) + (context (get model :context-window)) + (maxout (get model :max-output)) + (cost-url (get model :cost-url)) + (cutoff (get model :cutoff-date)) + (mime-types (get model :mime-types)) + (input-cost (get model :input-cost)) + (output-cost (get model :output-cost))) + (insert (format "%s:%s is a gptel Large Language Model (LLM).\n\n" + (gptel-backend-name backend) + (gptel--model-name model))) + (when desc (sect "Description" desc)) + (sect "Backend endpoint" + (format "%s://%s%s" + (gptel-backend-protocol backend) + (gptel-backend-host backend) + (gptel-backend-endpoint backend))) + (sect "Streaming" (if stream "yes" "no")) + (when caps (sect "Capabilities" caps)) + (when context (sect "Context window" (format "%dk" context))) + (when maxout (sect "Maximum output" (format "%d tokens" maxout))) + (when cutoff (sect "Cut-off date" cutoff)) + (when mime-types (sect "MIME types" mime-types)) + (when input-cost (sect "Input cost" + (format "$%6.2f per 1M tokens" input-cost))) + (when output-cost (sect "Output cost" + (format "$%6.2f per 1M tokens" output-cost))) + (when (or input-cost output-cost) + (insert (format "%20sPrices subject to change.\n" " ")) + (when cost-url + (insert (make-string 20 ?\s)) + (help-insert-xref-button cost-url 'help-url cost-url)))))))) + ;;; Tool use