From 63f1942c48b4bbeddf936d28c5fbad75f2bc3e18 Mon Sep 17 00:00:00 2001
From: Rich Wareham <rjw57@cam.ac.uk>
Date: Fri, 21 Nov 2014 16:28:37 +0000
Subject: [PATCH] api: add ETag support

Add basic ETag support for the prediction API. On each request, an ETag
is computed by hashing the prediction request with the dataset which
would satisfy it. This ETag is then returned via the ETag header to the
client. Should a server set the If-None-Match header on the incoming
request, it is compared to the computed ETag and, if they match, a 304
response is returned. If ETags match then the prediction is not run and
the client is instructed (via the 304 response) to use the cached
version.

Note that this does not directly help us with the current UI since the
$.ajax() function must have ifModified set explicitly to true to enable
caching support. In that case $.ajax() synthesises a fail. (Quite why
jQuery people can't let the browser sort out its own caching I don't
know.)

It's expected that ETags would be more useful when the predictor is put
behind some sort of caching proxy (e.g. varnish) which understands
ETag/If-None-Match.
---
 tawhiri/api.py | 91 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 77 insertions(+), 14 deletions(-)
diff --git a/tawhiri/api.py b/tawhiri/api.py
index b0c6e92..9948c17 100644
--- a/tawhiri/api.py
+++ b/tawhiri/api.py
@@ -18,8 +18,9 @@
 """
 Provide the HTTP API for Tawhiri.
 """
+import hashlib
 
-from flask import Flask, jsonify, request, g
+from flask import Flask, jsonify, request, g, Response
 from datetime import datetime
 import time
 import strict_rfc3339
@@ -188,30 +189,76 @@ def _extract_parameter(data, parameter, cast, default=None, ignore=False,
 
 
 # Response ####################################################################
-def run_prediction(req):
-    """
-    Run the prediction.
-    """
-    # Response dict
-    resp = {
-        "request": req,
-        "prediction": [],
-    }
+def _wind_dataset_for_request(req):
+    """Return a WindDataset for the specified request or raise an
+    InvalidDatasetException.
 
+    """
     # Find wind data location
     ds_dir = app.config.get('WIND_DATASET_DIR', WindDataset.DEFAULT_DIRECTORY)
 
     # Dataset
     try:
         if req['dataset'] == LATEST_DATASET_KEYWORD:
-            tawhiri_ds = WindDataset.open_latest(persistent=True, directory=ds_dir)
+            tawhiri_ds = WindDataset.open_latest(
+                persistent=True, directory=ds_dir
+            )
         else:
-            tawhiri_ds = WindDataset(datetime.fromtimestamp(req['dataset']), directory=ds_dir)
+            tawhiri_ds = WindDataset(
+                datetime.fromtimestamp(req['dataset']), directory=ds_dir
+            )
     except IOError:
         raise InvalidDatasetException("No matching dataset found.")
     except ValueError as e:
         raise InvalidDatasetException(*e.args)
 
+    return tawhiri_ds
+
+def prediction_etag(req):
+    """Compute an ETag for a particular prediction.
+
+    """
+    # We wrap values in [<name>:<value>] to guard against prefix/suffix
+    # collisions
+    hashfunc = hashlib.sha1()
+    def mix(name, value):
+        hashfunc.update('[{0}:{1}]'.format(name, value).encode('utf8'))
+
+    # The insight here is that the prediction is entirely determined by the
+    # dataset used and the launch request however the ordering of keys in req
+    # is not guaranteed. Mix in an explicit order.
+
+    # Wind dataset
+    tawhiri_ds = _wind_dataset_for_request(req)
+    mix('DATASET', tawhiri_ds.ds_time.isoformat())
+
+    # Request
+    keys = ['launch_latitude', 'launch_longitude', 'launch_altitude',
+            'launch_datetime']
+    if req['profile'] == PROFILE_STANDARD:
+        keys.extend(['ascent_rate', 'descent_rate', 'burst_altitude'])
+    elif req['profile'] == PROFILE_STANDARD:
+        keys.extend(['ascent_rate', 'float_altitude', 'stop_datetime'])
+    else:
+        raise InternalException("No implementation for known profile.")
+
+    for k in keys:
+        mix(k, req[k])
+
+    return hashfunc.hexdigest()
+
+def run_prediction(req):
+    """
+    Run the prediction.
+    """
+    # Response dict
+    resp = {
+        "request": req,
+        "prediction": [],
+    }
+
+    tawhiri_ds = _wind_dataset_for_request(req)
+
     # Note that hours and minutes are set to 00 as Tawhiri uses hourly datasets
     resp['request']['dataset'] = tawhiri_ds.ds_time.strftime(
         "%Y-%m-%dT%H:00:00Z")
@@ -281,11 +328,27 @@ def main():
     Single API endpoint which accepts GET requests.
     """
     g.request_start_time = time.time()
-    response = run_prediction(parse_request(request.args))
+
+    # Parse request into a prediction specification
+    pred_spec = parse_request(request.args)
+
+    # Compute an ETag for this spec
+    pred_etag = prediction_etag(pred_spec)
+
+    # Does this request specify an ETag?
+    inm = request.headers.get('If-None-Match', None)
+    if inm == pred_etag:
+        return Response(status=304)
+
+    # Run the prediction
+    response = run_prediction(pred_spec)
     g.request_complete_time = time.time()
     response['metadata'] = _format_request_metadata()
-    return jsonify(response)
 
+    # Create the response and set ETag header
+    rv = jsonify(response)
+    rv.headers['ETag'] = pred_etag
+    return rv
 
 @app.errorhandler(APIException)
 def handle_exception(error):