|
2 | 2 | "cells": [
|
3 | 3 | {
|
4 | 4 | "cell_type": "code",
|
5 |
| - "execution_count": 1, |
| 5 | + "execution_count": 2, |
6 | 6 | "metadata": {},
|
7 | 7 | "outputs": [],
|
8 | 8 | "source": [
|
|
37 | 37 | },
|
38 | 38 | {
|
39 | 39 | "cell_type": "code",
|
40 |
| - "execution_count": 2, |
| 40 | + "execution_count": 5, |
41 | 41 | "metadata": {},
|
42 | 42 | "outputs": [
|
43 |
| - { |
44 |
| - "name": "stderr", |
45 |
| - "output_type": "stream", |
46 |
| - "text": [ |
47 |
| - "An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.\n" |
48 |
| - ] |
49 |
| - }, |
50 | 43 | {
|
51 | 44 | "name": "stdout",
|
52 | 45 | "output_type": "stream",
|
53 | 46 | "text": [
|
54 |
| - "GFloat scalar : 6666.04 nsec (50 runs at size 10000)\n", |
55 |
| - "GFloat vectorized, numpy arrays: 57.84 nsec (50 runs at size 1000000)\n", |
56 |
| - "GFloat vectorized, JAX JIT : 3.17 nsec (1000 runs at size 1000000)\n", |
57 |
| - "ML_dtypes : 2.92 nsec (1000 runs at size 1000000)\n" |
| 47 | + "GFloat scalar : 7518.31 nsec (5 runs at size 10000)\n", |
| 48 | + "GFloat vectorized, numpy arrays: 57.95 nsec (5 runs at size 1000000)\n", |
| 49 | + "GFloat vectorized, JAX JIT : 4.03 nsec (100 runs at size 1000000)\n", |
| 50 | + "ML_dtypes : 3.34 nsec (100 runs at size 1000000)\n" |
58 | 51 | ]
|
59 | 52 | }
|
60 | 53 | ],
|
|
71 | 64 | " return np.array([gfloat.round_float(fi, x) for x in a])\n",
|
72 | 65 | "\n",
|
73 | 66 | "\n",
|
| 67 | + "# About how many seconds to run for (autorange will take at least .2 sec)\n", |
| 68 | + "ACCURACY = 0.2\n", |
| 69 | + "\n", |
| 70 | + "\n", |
74 | 71 | "def time(f, problem_size=1.0):\n",
|
75 | 72 | " units = 1e9 # nsec\n",
|
76 | 73 | " t = Timer(f)\n",
|
77 |
| - " n = t.autorange()[0] * 10 # About 2 sec per run\n", |
| 74 | + " f() # pre-run\n", |
| 75 | + " n = int(t.autorange()[0] * ACCURACY / 0.2)\n", |
78 | 76 | " ts = t.repeat(repeat=3, number=n) # best of 3\n",
|
79 | 77 | " ts = [((t / n) / problem_size) * units for t in ts] # per run\n",
|
80 | 78 | " return f\"{min(ts):8.2f} nsec ({n} runs at size {problem_size})\"\n",
|
|
0 commit comments