diff --git a/gptel-transient.el b/gptel-transient.el index f6fba6e4..77f2378f 100644 --- a/gptel-transient.el +++ b/gptel-transient.el @@ -720,6 +720,7 @@ Also format its value in the Transient menu." (gptel--infix-variable-scope) (gptel--infix-provider) (gptel--infix-max-tokens) + (gptel--infix-rolling-window-prompt) (gptel--infix-num-messages-to-send :if (lambda () (and gptel-expert-commands (or gptel-mode gptel-track-response)))) @@ -1303,6 +1304,27 @@ supports. See `gptel-track-media' for more information." (transient-setup)) ;; ** Infix for additional directive +(transient-define-infix gptel--infix-rolling-window-prompt () + "Control prompt truncation for model context window. +When enabled, gptel will automatically truncate prompts to fit the model's +context window using the specified fraction (or 90% by default)." + :description "Auto-truncate prompt" + :class 'gptel-lisp-variable + :variable 'gptel-rolling-window-prompt + :set-value #'gptel--set-with-scope + :display-nil "Disable" + :display-map '((t . "Enable (90%)")) + :key "-w" + :prompt "Prompt truncation: " + :reader (lambda (prompt &rest _) + (let* ((choices '(("disable" . nil) + ("enable (90%)" . t) + ("custom fraction" . number))) + (choice (completing-read prompt choices nil t))) + (if (equal choice "custom fraction") + (let ((val (read-number "Fraction of context window (0.1-1.0): " 0.9))) + (if (and (>= val 0.1) (<= val 1.0)) val (error "Invalid fraction"))) + (cdr (assoc choice choices)))))) (transient-define-infix gptel--infix-add-directive () "Additional directive intended for the next query only. diff --git a/gptel.el b/gptel.el index c3636287..3719f130 100644 --- a/gptel.el +++ b/gptel.el @@ -304,7 +304,7 @@ command line arguments." 'gptel-prompt-transform-functions "0.9.9") (defcustom gptel-prompt-transform-functions - '(gptel--transform-apply-preset gptel--transform-add-context) + '(gptel--transform-apply-preset gptel--transform-add-context gptel--transform-truncate-prompt) "Handlers to augment or transform a query before sending it. This hook is called in a temporary buffer containing the text to @@ -336,6 +336,22 @@ locally for a specific buffer, or chat topic, or only the context of a certain task." :type 'hook) +(defcustom gptel-rolling-window-prompt nil + "Automatically truncate prompts to fit the model's context window. +When non-nil, gptel will try to keep the prompt size below the +model's advertised context window by removing the oldest parts of the +conversation. + +- Set to `t` to enable (uses 90% of the context window as a safety margin). +- Set to a number between 0.1 and 1.0 to specify the exact fraction of the + context window to use (e.g., 0.8 for 80%). +- Set to nil to disable." + :group 'gptel + :type '(choice + (const :tag "Enable (use 90% of context window)" t) + (const :tag "Disable" nil) + (number :tag "Use fraction of context window"))) + (defcustom gptel-post-request-hook nil "Hook run after sending a gptel request. @@ -1169,6 +1185,18 @@ in any way.") "Curl executable to use." (if (stringp gptel-use-curl) gptel-use-curl "curl")) +(defun gptel--transform-truncate-prompt () + "Truncate the prompt buffer to fit the current model's context window. +This function is intended to be run from `gptel-prompt-transform-functions`. +It respects the `gptel-rolling-window-prompt` user option." + (when (and gptel-rolling-window-prompt (get gptel-model :context-window)) + (let* ((context-window-k (get gptel-model :context-window)) + (safety-margin (if (numberp gptel-rolling-window-prompt) gptel-rolling-window-prompt 0.9)) + (max-chars (round (* context-window-k 1000 3 safety-margin)))) ; Approx 4 chars/token + (when (> (buffer-size) max-chars) + (message "gptel: Truncating prompt to the last %d characters." max-chars) + (delete-region (point-min) (- (point-max) max-chars)))))) + (defun gptel--transform-add-context (callback fsm) (if (and gptel-use-context gptel-context--alist) (gptel-context--wrap callback (plist-get (gptel-fsm-info fsm) :data))