|
| 1 | +# Parameters |
| 2 | + |
| 3 | + |
| 4 | +## Additional Options |
| 5 | + |
| 6 | +### Caching |
| 7 | + |
| 8 | +There is a cache layer on the inference API to speed up requests when the inputs are exactly the same. Many models, such as classifiers and embedding models, can use those results as is if they are deterministic, meaning the results will be the same. However, if you use a nondeterministic model, you can disable the cache mechanism from being used, resulting in a real new query. |
| 9 | + |
| 10 | +To do this, you can add `x-use-cache:false` to the request headers. For example |
| 11 | + |
| 12 | +<inferencesnippet> |
| 13 | + |
| 14 | +<curl> |
| 15 | +```diff |
| 16 | +curl https://api-inference.huggingface.co/models/MODEL_ID \ |
| 17 | + -X POST \ |
| 18 | + -d '{"inputs": "Can you please let us know more details about your "}' \ |
| 19 | + -H "Authorization: Bearer hf_***" \ |
| 20 | + -H "Content-Type: application/json" \ |
| 21 | ++ -H "x-use-cache: false" |
| 22 | +``` |
| 23 | +</curl> |
| 24 | + |
| 25 | +<python> |
| 26 | +```diff |
| 27 | +import requests |
| 28 | + |
| 29 | +API_URL = "https://api-inference.huggingface.co/models/MODEL_ID" |
| 30 | +headers = { |
| 31 | + "Authorization": "Bearer hf_***", |
| 32 | + "Content-Type": "application/json", |
| 33 | ++ "x-use-cache": "false" |
| 34 | +} |
| 35 | +data = { |
| 36 | + "inputs": "Can you please let us know more details about your " |
| 37 | +} |
| 38 | +response = requests.post(API_URL, headers=headers, json=data) |
| 39 | +print(response.json()) |
| 40 | +``` |
| 41 | + |
| 42 | +</python> |
| 43 | + |
| 44 | +<js> |
| 45 | +```diff |
| 46 | +import fetch from "node-fetch"; |
| 47 | + |
| 48 | +async function query(data) { |
| 49 | + const response = await fetch( |
| 50 | + "https://api-inference.huggingface.co/models/MODEL_ID", |
| 51 | + { |
| 52 | + method: "POST", |
| 53 | + headers: { |
| 54 | + Authorization: `Bearer hf_***`, |
| 55 | + "Content-Type": "application/json", |
| 56 | ++ "x-use-cache": "false" |
| 57 | + }, |
| 58 | + body: JSON.stringify(data), |
| 59 | + } |
| 60 | + ); |
| 61 | + const result = await response.json(); |
| 62 | + return result; |
| 63 | +} |
| 64 | + |
| 65 | +query({ |
| 66 | + inputs: "Can you please let us know more details about your " |
| 67 | +}).then((response) => { |
| 68 | + console.log(JSON.stringify(response, null, 2)); |
| 69 | +}); |
| 70 | + |
| 71 | +``` |
| 72 | + |
| 73 | +</js> |
| 74 | + |
| 75 | +</inferencesnippet> |
| 76 | + |
| 77 | +### Wait for the model |
| 78 | + |
| 79 | +When a model is warm, it is ready to be used and you will get a response relatively quickly. However, some models are cold and need to be loaded before they can be used. In that case, you will get a 503 error. Rather than doing many requests until it's loaded, you can wait for the model to be loaded by adding `x-wait-for-model:true` to the request headers. We suggest to only use this flag to wait for the model to be loaded when you are sure that the model is cold. That means, first try the request without this flag and only if you get a 503 error, try again with this flag. |
| 80 | + |
| 81 | + |
| 82 | +<inferencesnippet> |
| 83 | + |
| 84 | +<curl> |
| 85 | +```diff |
| 86 | +curl https://api-inference.huggingface.co/models/MODEL_ID \ |
| 87 | + -X POST \ |
| 88 | + -d '{"inputs": "Can you please let us know more details about your "}' \ |
| 89 | + -H "Authorization: Bearer hf_***" \ |
| 90 | + -H "Content-Type: application/json" \ |
| 91 | ++ -H "x-wait-for-model: true" |
| 92 | +``` |
| 93 | +</curl> |
| 94 | + |
| 95 | +<python> |
| 96 | +```diff |
| 97 | +import requests |
| 98 | + |
| 99 | +API_URL = "https://api-inference.huggingface.co/models/MODEL_ID" |
| 100 | +headers = { |
| 101 | + "Authorization": "Bearer hf_***", |
| 102 | + "Content-Type": "application/json", |
| 103 | ++ "x-wait-for-model": "true" |
| 104 | +} |
| 105 | +data = { |
| 106 | + "inputs": "Can you please let us know more details about your " |
| 107 | +} |
| 108 | +response = requests.post(API_URL, headers=headers, json=data) |
| 109 | +print(response.json()) |
| 110 | +``` |
| 111 | + |
| 112 | +</python> |
| 113 | + |
| 114 | +<js> |
| 115 | +```diff |
| 116 | +import fetch from "node-fetch"; |
| 117 | + |
| 118 | +async function query(data) { |
| 119 | + const response = await fetch( |
| 120 | + "https://api-inference.huggingface.co/models/MODEL_ID", |
| 121 | + { |
| 122 | + method: "POST", |
| 123 | + headers: { |
| 124 | + Authorization: `Bearer hf_***`, |
| 125 | + "Content-Type": "application/json", |
| 126 | ++ "x-wait-for-model": "true" |
| 127 | + }, |
| 128 | + body: JSON.stringify(data), |
| 129 | + } |
| 130 | + ); |
| 131 | + const result = await response.json(); |
| 132 | + return result; |
| 133 | +} |
| 134 | + |
| 135 | +query({ |
| 136 | + inputs: "Can you please let us know more details about your " |
| 137 | +}).then((response) => { |
| 138 | + console.log(JSON.stringify(response, null, 2)); |
| 139 | +}); |
| 140 | + |
| 141 | +``` |
| 142 | + |
| 143 | +</js> |
| 144 | + |
| 145 | +</inferencesnippet> |
0 commit comments