From 0eadf0edc1fa6cb2c924b75ecc3f055d66f92498 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Mon, 21 Jul 2025 20:50:29 -0700 Subject: [PATCH 1/4] doc add Signed-off-by: Saurabh Misra --- docs/docs/optimizing-with-codeflash/trace-and-optimize.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/docs/optimizing-with-codeflash/trace-and-optimize.md b/docs/docs/optimizing-with-codeflash/trace-and-optimize.md index c5ca213fb..57701db27 100644 --- a/docs/docs/optimizing-with-codeflash/trace-and-optimize.md +++ b/docs/docs/optimizing-with-codeflash/trace-and-optimize.md @@ -17,7 +17,9 @@ To optimize code called by pytest tests that you could normally run like `python codeflash optimize -m pytest tests/ ``` -This powerful command creates high-quality optimizations, making it ideal when you need to optimize a workflow or script. The initial tracing process can be slow, so try to limit your script's runtime to under 1 minute for best results. If your workflow is longer, consider tracing it into smaller sections by using the Codeflash tracer as a context manager (point 3 below). +This powerful command creates high-quality optimizations, making it ideal when you need to optimize a workflow or script. The initial tracing process can be slow, so try to limit your script's runtime to under 1 minute for best results. If your workflow is longer, consider tracing it into smaller sections by using the Codeflash tracer as a context manager (point 3 below). + +The generated replay tests and the trace file are for the immediate optimization use, don't add them to git. ## What is the codeflash optimize command? From 474b6ea5e2ef8119932d7d64d9dbfa580cf355a6 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Tue, 22 Jul 2025 04:59:44 +0000 Subject: [PATCH 2/4] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Speed=20up=20function?= =?UTF-8?q?=20`postprocess`=20by=20210%=20Here=E2=80=99s=20an=20optimized?= =?UTF-8?q?=20version=20of=20your=20code=20with=20the=20following=20improv?= =?UTF-8?q?ements.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - **Avoid repeated computation**: np.exp(logits) was computed more than once per value in sigmoid_stable. Cache where possible. - **Avoid flattening with reshape**: Use .ravel() for a fast view rather than .reshape if you don't need a copy. - **Vectorized selection**: Use np.argpartition for O(n) partial selection instead of full sort (np.argsort) when only top K needed; sort only those afterward for correct order. - **Preallocate output**: Preallocate fixed-size array when possible. Here’s the improved code. **Notes:** - `sigmoid_stable` does not call np.exp(x) and np.exp(-x) separately for each value, instead using `np.exp(-np.abs(x))`, making it slightly faster and more numerically stable. - Uses `np.argpartition(..., k)` to efficiently get top K indices. Only these are then sorted by value. - `.ravel()` instead of `.reshape(-1)` for flattening, which is faster when possible. - Output structure and function signatures are preserved. - All comments are kept unless relating to changed code. This should noticeably speed up use on large arrays or large batch sizes. --- codeflash/process/infer.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 codeflash/process/infer.py diff --git a/codeflash/process/infer.py b/codeflash/process/infer.py new file mode 100644 index 000000000..1b53aa020 --- /dev/null +++ b/codeflash/process/infer.py @@ -0,0 +1,35 @@ +import numpy as np + + +def sigmoid_stable(x): + # Avoid repeated computation of exp(x) + ex = np.exp(-np.abs(x)) + return np.where(x >= 0, 1 / (1 + ex), ex / (1 + ex)) + + +def postprocess(logits: np.array, max_detections: int = 8): + batch_size, num_queries, num_classes = logits.shape + logits_sigmoid = sigmoid_stable(logits) + # Preallocate output as an array for efficiency + processed_predictions = [None] * batch_size + for batch_idx in range(batch_size): + logits_flat = logits_sigmoid[batch_idx].ravel() + if logits_flat.size <= max_detections: + # If there are fewer elements than max_detections, just argsort all + sorted_indices = np.argsort(-logits_flat) + else: + # Partial sort for top max_detections + partition_indices = np.argpartition(-logits_flat, max_detections - 1)[:max_detections] + top_scores = logits_flat[partition_indices] + # Now sort these to get actual order + sorted_order = np.argsort(-top_scores) + sorted_indices = partition_indices[sorted_order] + processed_predictions[batch_idx] = sorted_indices + return processed_predictions + + +if __name__ == "__main__": + predictions = np.random.normal(size=(8, 1000, 10)) + print(predictions.shape) + result = postprocess(predictions, max_detections=8) + print(len(result), result[0]) From 5a42033471d35b9e97fba231e91ea644c33c52b6 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Tue, 29 Jul 2025 11:42:12 -0700 Subject: [PATCH 3/4] Update docs/docs/optimizing-with-codeflash/trace-and-optimize.md --- docs/docs/optimizing-with-codeflash/trace-and-optimize.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/docs/optimizing-with-codeflash/trace-and-optimize.md b/docs/docs/optimizing-with-codeflash/trace-and-optimize.md index 57701db27..555fb47b0 100644 --- a/docs/docs/optimizing-with-codeflash/trace-and-optimize.md +++ b/docs/docs/optimizing-with-codeflash/trace-and-optimize.md @@ -19,8 +19,6 @@ codeflash optimize -m pytest tests/ This powerful command creates high-quality optimizations, making it ideal when you need to optimize a workflow or script. The initial tracing process can be slow, so try to limit your script's runtime to under 1 minute for best results. If your workflow is longer, consider tracing it into smaller sections by using the Codeflash tracer as a context manager (point 3 below). -The generated replay tests and the trace file are for the immediate optimization use, don't add them to git. - ## What is the codeflash optimize command? `codeflash optimize` tries to do everything that an expert engineer would do while optimizing a workflow. It profiles your code, traces the execution of your workflow and generates a set of test cases that are derived from how your code is actually run. From bf078f8c0a71bb7b4468ae477fb66fdc3cec98db Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Tue, 29 Jul 2025 11:42:46 -0700 Subject: [PATCH 4/4] Update docs/docs/optimizing-with-codeflash/trace-and-optimize.md --- docs/docs/optimizing-with-codeflash/trace-and-optimize.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/optimizing-with-codeflash/trace-and-optimize.md b/docs/docs/optimizing-with-codeflash/trace-and-optimize.md index 555fb47b0..c5ca213fb 100644 --- a/docs/docs/optimizing-with-codeflash/trace-and-optimize.md +++ b/docs/docs/optimizing-with-codeflash/trace-and-optimize.md @@ -17,7 +17,7 @@ To optimize code called by pytest tests that you could normally run like `python codeflash optimize -m pytest tests/ ``` -This powerful command creates high-quality optimizations, making it ideal when you need to optimize a workflow or script. The initial tracing process can be slow, so try to limit your script's runtime to under 1 minute for best results. If your workflow is longer, consider tracing it into smaller sections by using the Codeflash tracer as a context manager (point 3 below). +This powerful command creates high-quality optimizations, making it ideal when you need to optimize a workflow or script. The initial tracing process can be slow, so try to limit your script's runtime to under 1 minute for best results. If your workflow is longer, consider tracing it into smaller sections by using the Codeflash tracer as a context manager (point 3 below). ## What is the codeflash optimize command?