From d36d8e8ddeafab11b0ad04c10f3f13febf40bc36 Mon Sep 17 00:00:00 2001
From: Daniel Napierski <napiersk@isi.edu>
Date: Tue, 18 Oct 2022 08:57:28 -0700
Subject: [PATCH 1/8] first docker build using GPU

---
 Dockerfile        | 24 ++++++++++++++++++++++++
 uio/test/check.py |  3 +++
 uio/test/run.py   | 14 ++++++++++++++
 uioi.yml          |  7 +++++++
 4 files changed, 48 insertions(+)
 create mode 100644 Dockerfile
 create mode 100644 uio/test/check.py
 create mode 100644 uio/test/run.py
 create mode 100644 uioi.yml

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..333d5db
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,24 @@
+FROM nvidia/cuda:11.7.0-devel-ubuntu20.04
+LABEL name="unified-io-inference"
+
+WORKDIR /root/.conda
+WORKDIR /root
+RUN apt-get update && apt-get -y install wget nano
+RUN wget \
+    https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
+    && bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \
+    && rm -f Miniconda3-latest-Linux-x86_64.sh 
+ENV PATH=/opt/conda/bin:${PATH}
+RUN bash -c "conda update -n base -c defaults conda"
+
+COPY uioi.yml .
+RUN bash -c "conda env create -f uioi.yml"
+COPY requirements.txt .
+RUN bash -c ". activate uioi && pip install --upgrade pip \
+ && pip install --upgrade "jax[cuda]" \
+ -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html \
+ && python3 -m pip install -r requirements.txt"
+
+COPY . .
+RUN bash -c ". activate uioi && python ./uio/test/check.py"
+ENTRYPOINT bash -c ". activate uioi && python ./uio/test/run.py"
diff --git a/uio/test/check.py b/uio/test/check.py
new file mode 100644
index 0000000..4cd937d
--- /dev/null
+++ b/uio/test/check.py
@@ -0,0 +1,3 @@
+from functools import partial
+from jax import grad, lax
+import jax.numpy as jnp
diff --git a/uio/test/run.py b/uio/test/run.py
new file mode 100644
index 0000000..fba0ff5
--- /dev/null
+++ b/uio/test/run.py
@@ -0,0 +1,14 @@
+from functools import partial
+from jax import grad, lax
+import jax.numpy as jnp
+import jax as jax
+print('<<< jax test >>>')
+print(jax.devices())
+
+def tanh(x):  # Define a function
+  y = jnp.exp(-2.0 * x)
+  return (1.0 - y) / (1.0 + y)
+
+grad_tanh = grad(tanh)  # Obtain its gradient function
+print(grad_tanh(1.0)) 
+print('<<< end >>>')
diff --git a/uioi.yml b/uioi.yml
new file mode 100644
index 0000000..f175f72
--- /dev/null
+++ b/uioi.yml
@@ -0,0 +1,7 @@
+name: uioi
+channels:
+  - defaults
+  - conda-forge
+  - nvidia
+dependencies:
+  - python=3.9

From 4803fe779d81456cb99b7f26ac95d5c12a1ac3d4 Mon Sep 17 00:00:00 2001
From: Daniel Napierski <napiersk@isi.edu>
Date: Wed, 19 Oct 2022 09:31:35 -0400
Subject: [PATCH 2/8] demo build

---
 Dockerfile | 12 +++++++++++-
 README.md  | 14 ++++++++++++++
 uioi.yml   |  2 ++
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 333d5db..ea7f496 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -11,6 +11,16 @@ RUN wget \
 ENV PATH=/opt/conda/bin:${PATH}
 RUN bash -c "conda update -n base -c defaults conda"
 
+RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/xl_1000k.bin \
+ -O xl.bin
+RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/large_1000k.bin \
+ -O large.bin
+RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/base_1000k.bin \
+ -O base.bin
+RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/small_1000k.bin \
+ -O small.bin
+RUN wget -nv https://farm2.staticflickr.com/1362/1261465554_95741e918b_z.jpg -O dbg_img.png
+
 COPY uioi.yml .
 RUN bash -c "conda env create -f uioi.yml"
 COPY requirements.txt .
@@ -21,4 +31,4 @@ RUN bash -c ". activate uioi && pip install --upgrade pip \
 
 COPY . .
 RUN bash -c ". activate uioi && python ./uio/test/check.py"
-ENTRYPOINT bash -c ". activate uioi && python ./uio/test/run.py"
+ENTRYPOINT bash -c ". activate uioi && python ./demo_script.py small small.bin"
diff --git a/README.md b/README.md
index e1fb781..a99315b 100644
--- a/README.md
+++ b/README.md
@@ -79,6 +79,20 @@ Then it can be run with:
 jupyter notebook demo.ipynb
 ```
 
+## Docker 
+To build a docker image:
+```bash
+docker build -t unified-io-inference .
+```
+To run the docker demo:
+```
+docker run -it --gpus=1 unified-io-inference 
+INFO:absl:Setting up model...
+...
+INFO:absl:Model is ready
+INFO:absl:Running model text_inputs=['what color is the sofa?']
+green
+```
 
 ## Just-in-time compilation
 By default `ModelRunner` compiles the underlying inference calls the first time they are used,
diff --git a/uioi.yml b/uioi.yml
index f175f72..b721b04 100644
--- a/uioi.yml
+++ b/uioi.yml
@@ -3,5 +3,7 @@ channels:
   - defaults
   - conda-forge
   - nvidia
+  - anaconda
 dependencies:
   - python=3.9
+  - cudnn

From 75a7e684883dcf4277c3fab3696da909ec2f02e9 Mon Sep 17 00:00:00 2001
From: Daniel Napierski <napiersk@isi.edu>
Date: Wed, 19 Oct 2022 12:09:09 -0400
Subject: [PATCH 3/8] run from list

---
 Dockerfile |  3 ++-
 demo.list  |  1 +
 run.py     | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)
 create mode 100644 demo.list
 create mode 100644 run.py

diff --git a/Dockerfile b/Dockerfile
index ea7f496..20e7443 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -31,4 +31,5 @@ RUN bash -c ". activate uioi && pip install --upgrade pip \
 
 COPY . .
 RUN bash -c ". activate uioi && python ./uio/test/check.py"
-ENTRYPOINT bash -c ". activate uioi && python ./demo_script.py small small.bin"
+ENV INPUT_FILE=demo.list
+ENTRYPOINT bash -c ". activate uioi && python ./run.py small small.bin $INPUT_FILE"
diff --git a/demo.list b/demo.list
new file mode 100644
index 0000000..8d64f07
--- /dev/null
+++ b/demo.list
@@ -0,0 +1 @@
+/root/dbg_img.png:what color is the couch?
diff --git a/run.py b/run.py
new file mode 100644
index 0000000..ba722e3
--- /dev/null
+++ b/run.py
@@ -0,0 +1,36 @@
+import argparse
+from os.path import exists
+from PIL import Image
+from uio import runner
+from uio.configs import CONFIGS
+import numpy as np
+from absl import logging
+import warnings
+# flax kicks up a lot of future warnings at the moment, ignore them
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+# To see INFO messages from `ModelRunner`
+logging.set_verbosity(logging.INFO)
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument("model_size", choices=list(CONFIGS))
+  parser.add_argument("model_weights")
+  parser.add_argument("input_file")
+  args = parser.parse_args()
+
+  model = runner.ModelRunner(args.model_size, args.model_weights)
+  input_file = open(args.input_file, 'r')
+  lines = input_file.readlines()
+  for line in lines:
+    image_path, question = line.strip().split(":")
+    print(image_path)
+    print(question)
+    with Image.open(image_path) as img:
+      image = np.array(img.convert('RGB'))
+      output = model.vqa(image, question)
+      print(output["text"])
+
+
+if __name__ == "__main__":
+  main()

From f98f69d1076bcc1053922b850cc0fc871162c5db Mon Sep 17 00:00:00 2001
From: Daniel Napierski <napiersk@isi.edu>
Date: Mon, 24 Oct 2022 07:04:59 -0700
Subject: [PATCH 4/8] Default to xl model, script captions

---
 Dockerfile       |  2 +-
 README.docker.md | 34 ++++++++++++++++++++++++++++++++++
 README.md        | 14 +-------------
 3 files changed, 36 insertions(+), 14 deletions(-)
 create mode 100644 README.docker.md

diff --git a/Dockerfile b/Dockerfile
index 20e7443..becfe13 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -32,4 +32,4 @@ RUN bash -c ". activate uioi && pip install --upgrade pip \
 COPY . .
 RUN bash -c ". activate uioi && python ./uio/test/check.py"
 ENV INPUT_FILE=demo.list
-ENTRYPOINT bash -c ". activate uioi && python ./run.py small small.bin $INPUT_FILE"
+ENTRYPOINT bash -c ". activate uioi && python ./run.py xl xl.bin $INPUT_FILE"
diff --git a/README.docker.md b/README.docker.md
new file mode 100644
index 0000000..88df26a
--- /dev/null
+++ b/README.docker.md
@@ -0,0 +1,34 @@
+
+## Docker
+To build a docker image:
+```bash
+docker build -t unified-io-inference .
+```
+To run the docker demo:
+```
+docker run -it --gpus=1 unified-io-inference
+INFO:absl:Setting up model...
+...
+INFO:absl:Model is ready
+INFO:absl:Running model text_inputs=['what color is the sofa?']
+green
+```
+
+To run a list of queries construct an input file where each line if a file path
+and a text input, separated by ':'.
+Prepare a directory containing image files.  'cd' to that directory.
+The steps below with write example input files and docker execution with the 
+host images mounted to the `/image-data` directory.
+
+```
+ls -1 | grep -E 'jpg|png' > files.txt
+awk '{print "/image-data/" $0 ":What-does-the-image-describe?"}' ./files.txt > caption.txt
+awk '{print "/image-data/" $0 ":Locate all objects in the image."}' ./files.txt > locate.txt
+
+#Choose an input file to process:
+export INPUT_FILE=[caption.txt or locate.txt or other]
+export HOSTPATH=$(pwd)
+
+docker run -it --gpus=1 -e INPUT_FILE=/image-data/${INPUT_FILE} \ 
+  -v /${HOSTPATH}:/image-data unified-io-inference
+```
diff --git a/README.md b/README.md
index a99315b..27d8143 100644
--- a/README.md
+++ b/README.md
@@ -80,19 +80,7 @@ jupyter notebook demo.ipynb
 ```
 
 ## Docker 
-To build a docker image:
-```bash
-docker build -t unified-io-inference .
-```
-To run the docker demo:
-```
-docker run -it --gpus=1 unified-io-inference 
-INFO:absl:Setting up model...
-...
-INFO:absl:Model is ready
-INFO:absl:Running model text_inputs=['what color is the sofa?']
-green
-```
+To build and run a unified-io-inference docker image see: README.docker.md 
 
 ## Just-in-time compilation
 By default `ModelRunner` compiles the underlying inference calls the first time they are used,

From d4a8e03cd2b90d8b87f24c10ce6771bed6da06de Mon Sep 17 00:00:00 2001
From: Daniel Napierski <napiersk@isi.edu>
Date: Tue, 15 Nov 2022 07:30:01 -0800
Subject: [PATCH 5/8] run obj detect

---
 Dockerfile        |   8 ++-
 README.docker.md  |   8 +--
 run.py            | 171 ++++++++++++++++++++++++++++++++++++++++++++--
 uio/test/check.py |  32 +++++++++
 4 files changed, 209 insertions(+), 10 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index becfe13..7ea31a1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -29,7 +29,13 @@ RUN bash -c ". activate uioi && pip install --upgrade pip \
  -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html \
  && python3 -m pip install -r requirements.txt"
 
+RUN bash -c ". activate uioi && pip install matplotlib notebook"
+RUN bash -c ". activate uioi && pip install setuptools wheel && pip install spacy \
+ && python3 -m spacy download en_core_web_sm"
+
+ENV PYTHONPATH=/root/uio
+
 COPY . .
-RUN bash -c ". activate uioi && python ./uio/test/check.py"
+RUN bash -c ". activate uioi && export PYTHONPATH=/root:/root/uio && python ./uio/test/check.py"
 ENV INPUT_FILE=demo.list
 ENTRYPOINT bash -c ". activate uioi && python ./run.py xl xl.bin $INPUT_FILE"
diff --git a/README.docker.md b/README.docker.md
index 88df26a..d53f7b9 100644
--- a/README.docker.md
+++ b/README.docker.md
@@ -22,13 +22,13 @@ host images mounted to the `/image-data` directory.
 
 ```
 ls -1 | grep -E 'jpg|png' > files.txt
-awk '{print "/image-data/" $0 ":What-does-the-image-describe?"}' ./files.txt > caption.txt
+awk '{print "/image-data/" $0 ":What does the image describe?"}' ./files.txt > caption.txt
 awk '{print "/image-data/" $0 ":Locate all objects in the image."}' ./files.txt > locate.txt
 
 #Choose an input file to process:
 export INPUT_FILE=[caption.txt or locate.txt or other]
 export HOSTPATH=$(pwd)
-
-docker run -it --gpus=1 -e INPUT_FILE=/image-data/${INPUT_FILE} \ 
-  -v /${HOSTPATH}:/image-data unified-io-inference
+echo ${HOSTPATH}${INPUT_FILE}
+docker run -it --gpus=1 -e INPUT_FILE=/image-data/${INPUT_FILE} \
+  -v ${HOSTPATH}:/image-data unified-io-inference
 ```
diff --git a/run.py b/run.py
index ba722e3..fa0809f 100644
--- a/run.py
+++ b/run.py
@@ -1,9 +1,12 @@
 import argparse
+import json
 from os.path import exists
-from PIL import Image
+from PIL import Image, ImageDraw, ImageFont
 from uio import runner
 from uio.configs import CONFIGS
+from uio import utils
 import numpy as np
+import spacy
 from absl import logging
 import warnings
 # flax kicks up a lot of future warnings at the moment, ignore them
@@ -20,17 +23,175 @@ def main():
   args = parser.parse_args()
 
   model = runner.ModelRunner(args.model_size, args.model_weights)
+  logging.info(f"Model: {args.model_size}")
   input_file = open(args.input_file, 'r')
+  logging.info(f"Input file: {args.input_file}")
+  output_file = f"{args.input_file}.{args.model_size}.results.txt"
+  logging.info(f"Output file: {output_file}")
+  nlp = spacy.load("en_core_web_sm")
+
   lines = input_file.readlines()
   for line in lines:
     image_path, question = line.strip().split(":")
-    print(image_path)
-    print(question)
+    logging.info(f"Processing image: {image_path}")
     with Image.open(image_path) as img:
       image = np.array(img.convert('RGB'))
-      output = model.vqa(image, question)
-      print(output["text"])
+#ignore question
+#/image-data/RTS2P7XB.jpg:What does the image describe?:a swamp is full of reeds that have partially bowed.
+#/image-data/RTS2P7XB.jpg:What is in this image?:water.
+      caption = model.vqa(image, "What does the image describe ?")
+      j=[]
+      for k,v in caption.items():
+        type_v = type(v)
+        try:
+          j.append({json.dumps(k):json.dumps(v)})
+        except:
+          j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"})
+      debug_output = json.dumps(j)
+      logging.info((f"DEBUG CAPTION: {debug_output}")[0:1000])
+
+      categorize = model.vqa(image, "What is in this image ?")
+      j=[]
+      for k,v in categorize.items():
+        type_v = type(v)
+        try:
+          j.append({json.dumps(k):json.dumps(v)})
+        except:
+          j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"})
+      debug_output = json.dumps(j)
+      logging.info((f"DEBUG CATEGORIZE: {debug_output}")[0:1000])
+
+      categorize_text = categorize["text"]
+      caption_text = caption["text"]
+      all_text = f"{categorize_text} {caption_text}"
+
+      phrases = []
+      current_text = ''
+
+      for tok in caption_text.split(" "):
+        if len(tok.strip()) > 0:
+          t = tok.strip()
+          doc = nlp(t)
+          pos = str(doc[0].pos_)
+          logging.info(f"{doc[0]} {pos}")
+
+          if ("DET" == pos and '' == current_text) \
+            or ("PRON" == pos and '' == current_text) \
+            or "NOUN" == pos or "PROPN" == pos:
+            current_text = f'{current_text} {doc[0]}'.strip()
+          elif len(current_text) > 0:
+            phrases.append(current_text)
+            re_result = refexp(model, image, current_text)
+            draw(img, re_result, current_text)
+            current_text = ''
+
+      if len(current_text) > 0:
+        phrases.append(current_text)
+        re_result = refexp(model, image, current_text)
+        draw(img, re_result, current_text)
+        current_text = ''         
+
+      output = model.vqa(image, "Locate all objects in the image .")
+      token = ''
+      ref_tokens = []
+      text = output["text"].replace("<"," <")
+
+      for tok in text.split(" "):
+        if len(tok)>10 and tok.startswith("<extra_id_"):
+          ref_tokens.append(tok.strip())
+        elif 2 < len(str(tok).strip()):
+          token = tok.strip()
+          logging.info(f"DEBUG token: {token}, extra_ids: {len(ref_tokens)}")
+          ref_output = refexp(model, image, token)
+          draw(img, ref_output, token)
+
+#          for i in ref_tokens:
+#            logging.info(f"SKIP: {i}")
+#            ref_output = refexp(model, image, i)
+          ref_tokens = []
 
+      categorize_text = categorize["text"]
+      caption_text = caption["text"]
+      write(img, f"1: {caption_text}\n2: {categorize_text}")
+      out_image_path = image_path + '.boxes.png'
+      img.save(out_image_path)
+
+      j=[]
+      for k,v in output.items():
+        type_v = type(v)
+        try:
+          j.append({json.dumps(k):json.dumps(v)})
+        except:
+          j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"})
+
+      debug_output = json.dumps(j)
+      logging.info((f"DEBUG: {debug_output}")[0:1000])
+
+      output_text = output["text"]
+      with open(output_file, 'a') as of:
+        of.write(f"{image_path}:{question}:{output_text}\n")
+      logging.info(f"Output: {output_text}\n\n")
+
+def log(results):
+    text = results["text"]
+    logging.info(f"DEBUG text: {text}")    
+    if "boxes" in results.keys() and len(results["boxes"]) > 0:
+      box = results["boxes"][0]
+      logging.info(f"BOX {box[0]}, {box[1]}, {box[2]}, {box[3]}")
+      if len(results["boxes"]) > 1:
+        logging.info(f"[...more boxes...]")
+
+def draw(img, results, token):
+    canvas = ImageDraw.Draw(img)
+    if "boxes" in results.keys() and len(results["boxes"]) > 0:
+      for box in results["boxes"]:
+        x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
+        shape = [(x1, y1), (x2, y2)]
+        width, height = img.size
+        w = 10 if width > 1000 else 5
+        canvas.rectangle(shape, outline="red", width=w)
+        text = str(results["text"])
+        logging.info(f"DTEXT: {text} TOKEN: {token}")
+        font_size = 80 if width > 1000 else 50
+        font = ImageFont.truetype("DejaVuSans.ttf", font_size)
+        canvas.text((x1-1,y1-1), token, font=font, fill="white")
+        canvas.text((x1-1,y1+1), token, font=font, fill="white")
+        canvas.text((x1+1,y1-1), token, font=font, fill="white")
+        canvas.text((x1+1,y1+1), token, font=font, fill="white")
+        canvas.text((x1,y1), token, font=font, fill="red")
+
+def write(img, text):
+    logging.info(f"WTEXT: {text}")
+    canvas = ImageDraw.Draw(img)
+    width, height = img.size
+    font_size = 80 if width > 3500 else 48 if width > 2000 else 24
+    font = ImageFont.truetype("DejaVuSans.ttf", font_size)
+    x = 25
+    y = height / 2
+    canvas.text((x-1,y-1), text, font=font, fill="white")
+    canvas.text((x-1,y+1), text, font=font, fill="white")
+    canvas.text((x+1,y-1), text, font=font, fill="white")
+    canvas.text((x+1,y+1), text, font=font, fill="white")
+    canvas.text((x,y), text, font=font, fill="red")
+
+def refexp(model, image, text):
+    try:
+      results = model.refexp(image, text)
+      log(results)
+      return results
+    except ValueError as arg:
+      logging.info(f"ERROR: {arg}")
+      return {}
 
 if __name__ == "__main__":
   main()
+
+
+#Workbook example:
+#'Which region does the text " {} " describe ?'
+#sportsball=uio.refexp(soccer_img, "<extra_id_617>")
+#To extract digit from extra_token
+#          logging.info(f"TOKEN: {int(''.join(i for i in tok if i.isdigit()))}")
+#          tokens.append(int(''.join(i for i in tok if i.isdigit())))
+#      a, b = utils.tokens_to_regions(tokens, (384, 384))
+#      logging.info(f"{str(a)}, {str(b)}")
diff --git a/uio/test/check.py b/uio/test/check.py
index 4cd937d..21179d4 100644
--- a/uio/test/check.py
+++ b/uio/test/check.py
@@ -1,3 +1,35 @@
 from functools import partial
 from jax import grad, lax
 import jax.numpy as jnp
+import matplotlib.pylab as plt
+import numpy as np
+from torchvision.io import read_image
+import urllib.request
+import spacy
+from PIL import Image
+import warnings
+warnings.simplefilter(action='ignore', category=FutureWarning)
+from absl import logging
+logging.set_verbosity(logging.INFO)
+import utils
+import runner
+uio = runner.ModelRunner("xl", "xl.bin")
+
+nlp = spacy.load("en_core_web_sm")
+#a soccer player getting ready to control the ball.
+doc = nlp("soccer players")
+print(f'TAG: {doc[0].tag_}, POS: {doc[0].pos_} {str(doc[0])}')
+print(f'TAG: {doc[1].tag_}, POS: {doc[1].pos_} {str(doc[1])}')
+
+doc = nlp("a soccer player getting ready to control the ball.")
+for item in doc:
+  print(f'{str(item)} TAG: {item.tag_}, POS: {item.pos_}')
+
+#def load_image_from_url(url):
+#    with urllib.request.urlopen(url) as f:
+#        img = Image.open(f)
+#        return np.array(img)
+#hotel_img = load_image_from_url('https://farm2.staticflickr.com/1362/1261465554_95741e918b_z.jpg')
+#tennis_img = load_image_from_url('https://farm9.staticflickr.com/8313/7954229658_03f8e8d855_z.jpg')
+#penguin_img = load_image_from_url('https://i.stack.imgur.com/z9vLx.jpg')
+#uio.caption(hotel_img)["text"]

From a00465928ec7f65e6c0d14ed176dd83b275265d0 Mon Sep 17 00:00:00 2001
From: Daniel Napierski <napiersk@isi.edu>
Date: Tue, 17 Jan 2023 07:22:31 -0800
Subject: [PATCH 6/8] add report

---
 uio/report.py | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 uio/report.py

diff --git a/uio/report.py b/uio/report.py
new file mode 100644
index 0000000..f5ace46
--- /dev/null
+++ b/uio/report.py
@@ -0,0 +1,82 @@
+import argparse
+import json
+from os.path import exists
+from PIL import Image
+from uio import runner
+from uio.configs import CONFIGS
+from uio import utils
+import numpy as np
+from absl import logging
+import warnings
+# flax kicks up a lot of future warnings at the moment, ignore them
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+# To see INFO messages from `ModelRunner`
+logging.set_verbosity(logging.INFO)
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument("model_size", choices=list(CONFIGS))
+  parser.add_argument("model_weights")
+  parser.add_argument("input_file")
+  args = parser.parse_args()
+
+  model = runner.ModelRunner(args.model_size, args.model_weights)
+  logging.info(f"Model: {args.model_size}")
+  input_file = open(args.input_file, 'r')
+  logging.info(f"Input file: {args.input_file}")
+  output_file = f"{args.input_file}.{args.model_size}.results.txt"
+  logging.info(f"Output file: {output_file}")
+
+  lines = input_file.readlines()
+  for line in lines:
+    image_path, question = line.strip().split(":")
+    logging.info(f"Processing image: {image_path}")
+    with Image.open(image_path) as img:
+      image = np.array(img.convert('RGB'))
+      output = model.vqa(image, question)
+      token = ''
+      ref_tokens = []
+      for tok in output["text"].split(" "):
+        if len(tok)>10 and tok.startswith("<extra_id_"):
+          ref_tokens.append(tok)
+        else:
+          token = tok
+#          logging.info(f"TOKEN: {int(''.join(i for i in tok if i.isdigit()))}")
+#          tokens.append(int(''.join(i for i in tok if i.isdigit())))
+          break
+
+      logging.info(f"DEBUG REF_TOKEN COUNT: {len(ref_tokens)} {token}")
+
+#'Which region does the text " {} " describe ?'
+#sportsball=uio.refexp(soccer_img, "<extra_id_617>")
+
+      for i in ref_tokens():
+        ref_output = model.vqa(image, f"Which region does the text {i} describe ?")
+        text = ref_output["text"]
+        logging.info(f"{text}")
+        box = ref_output["boxes"][0]
+        logging.info(f"{json.dumps(box)}")
+
+#      a, b = utils.tokens_to_regions(tokens, (384, 384))
+#      logging.info(f"{str(a)}, {str(b)}")
+
+      j=[]
+      for k,v in output.items():
+        type_v = type(v)
+        try:
+          j.append({json.dumps(k):json.dumps(v)})
+        except:
+          j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"})
+
+      debug_output = json.dumps(j)
+      logging.info((f"DEBUG: {debug_output}")[0:1000])
+
+      output_text = output["text"]
+      with open(output_file, 'a') as of:
+        of.write(f"{image_path}:{question}:{output_text}\n")
+      logging.info(f"Output: {output_text}")
+
+
+if __name__ == "__main__":
+  main()

From 7261cb19ab64036566cea50df43ef429be98e193 Mon Sep 17 00:00:00 2001
From: Daniel Napierski <napiersk@isi.edu>
Date: Mon, 23 Jan 2023 06:31:20 -0800
Subject: [PATCH 7/8] run at saga

---
 run-saga-demo.sh | 21 +++++++++++++++++++++
 run.py           | 19 +++++++++++--------
 2 files changed, 32 insertions(+), 8 deletions(-)
 create mode 100644 run-saga-demo.sh

diff --git a/run-saga-demo.sh b/run-saga-demo.sh
new file mode 100644
index 0000000..d407d2b
--- /dev/null
+++ b/run-saga-demo.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+#
+#SBATCH --partition=gaia-lg
+#SBATCH --account=gaia-lg
+#SBATCH --job-name=captioning-unified
+#SBATCH --output=captioning-unified.output.%j.txt
+#SBATCH --error=captioning-unified.error.%j.txt
+#SBATCH --gres=gpu:rtxa6000:4
+export SRC=/nas/gaia02/users/napiersk/github/clean/unified-io-inference
+export INPUT_FILE=caption-part2.txt
+export HOSTPATH=/nas/gaia02/data/phase3/ta1/sample1/
+cd $SRC
+echo $SRC
+docker build -t unified-io-inference .
+echo CAPTIONING-UNIFIED START ${HOSTPATH} ${INPUT_FILE}
+date
+docker run -t --gpus=4 -e INPUT_FILE=/image-data/${INPUT_FILE} -v ${HOSTPATH}:/image-data unified-io-inference:latest
+date
+#grep -rnIE 'Processing image|BOX|TEXT' ./captioning-unified.output.84938.txt 
+
+echo CAPTIONING-UNIFIED DONE
diff --git a/run.py b/run.py
index fa0809f..b828d42 100644
--- a/run.py
+++ b/run.py
@@ -82,12 +82,14 @@ def main():
           elif len(current_text) > 0:
             phrases.append(current_text)
             re_result = refexp(model, image, current_text)
+            logging.info(f"TEXT: {current_text}")
             draw(img, re_result, current_text)
             current_text = ''
 
       if len(current_text) > 0:
         phrases.append(current_text)
         re_result = refexp(model, image, current_text)
+        logging.info(f"TEXT: {current_text}")
         draw(img, re_result, current_text)
         current_text = ''         
 
@@ -112,7 +114,8 @@ def main():
 
       categorize_text = categorize["text"]
       caption_text = caption["text"]
-      write(img, f"1: {caption_text}\n2: {categorize_text}")
+      logging.info(f"1: {caption_text}\n2: {categorize_text}")
+#      write(img, f"1: {caption_text}\n2: {categorize_text}")
       out_image_path = image_path + '.boxes.png'
       img.save(out_image_path)
 
@@ -152,13 +155,13 @@ def draw(img, results, token):
         canvas.rectangle(shape, outline="red", width=w)
         text = str(results["text"])
         logging.info(f"DTEXT: {text} TOKEN: {token}")
-        font_size = 80 if width > 1000 else 50
-        font = ImageFont.truetype("DejaVuSans.ttf", font_size)
-        canvas.text((x1-1,y1-1), token, font=font, fill="white")
-        canvas.text((x1-1,y1+1), token, font=font, fill="white")
-        canvas.text((x1+1,y1-1), token, font=font, fill="white")
-        canvas.text((x1+1,y1+1), token, font=font, fill="white")
-        canvas.text((x1,y1), token, font=font, fill="red")
+#        font_size = 80 if width > 1000 else 50
+#        font = ImageFont.truetype("DejaVuSans.ttf", font_size)
+#        canvas.text((x1-1,y1-1), token, font=font, fill="white")
+#        canvas.text((x1-1,y1+1), token, font=font, fill="white")
+#        canvas.text((x1+1,y1-1), token, font=font, fill="white")
+#        canvas.text((x1+1,y1+1), token, font=font, fill="white")
+#        canvas.text((x1,y1), token, font=font, fill="red")
 
 def write(img, text):
     logging.info(f"WTEXT: {text}")

From ae9dcf56a29f85e55de250a28954b34e5602dd22 Mon Sep 17 00:00:00 2001
From: Liz Lee <elgclee@gmail.com>
Date: Fri, 10 Feb 2023 09:02:44 -0500
Subject: [PATCH 8/8] Add option to run.py to only run captioning

---
 run.py | 107 +++++++++++++++++++++++++++++++--------------------------
 1 file changed, 58 insertions(+), 49 deletions(-)

diff --git a/run.py b/run.py
index b828d42..8315948 100644
--- a/run.py
+++ b/run.py
@@ -20,6 +20,7 @@ def main():
   parser.add_argument("model_size", choices=list(CONFIGS))
   parser.add_argument("model_weights")
   parser.add_argument("input_file")
+  parser.add_argument("captions_only", type=bool, action="store_true")
   args = parser.parse_args()
 
   model = runner.ModelRunner(args.model_size, args.model_weights)
@@ -50,20 +51,25 @@ def main():
       debug_output = json.dumps(j)
       logging.info((f"DEBUG CAPTION: {debug_output}")[0:1000])
 
-      categorize = model.vqa(image, "What is in this image ?")
-      j=[]
-      for k,v in categorize.items():
-        type_v = type(v)
-        try:
-          j.append({json.dumps(k):json.dumps(v)})
-        except:
-          j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"})
-      debug_output = json.dumps(j)
-      logging.info((f"DEBUG CATEGORIZE: {debug_output}")[0:1000])
-
-      categorize_text = categorize["text"]
-      caption_text = caption["text"]
-      all_text = f"{categorize_text} {caption_text}"
+      # Categorize
+      if not args.captions_only:
+        categorize = model.vqa(image, "What is in this image ?")
+        j=[]
+        for k,v in categorize.items():
+          type_v = type(v)
+          try:
+            j.append({json.dumps(k):json.dumps(v)})
+          except:
+            j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"})
+        debug_output = json.dumps(j)
+        logging.info((f"DEBUG CATEGORIZE: {debug_output}")[0:1000])
+
+        categorize_text = categorize["text"]
+        caption_text = caption["text"]
+        all_text = f"{categorize_text} {caption_text}"
+      else:
+        all_text = caption_text
+        categorize_text = ""
 
       phrases = []
       current_text = ''
@@ -93,47 +99,50 @@ def main():
         draw(img, re_result, current_text)
         current_text = ''         
 
-      output = model.vqa(image, "Locate all objects in the image .")
-      token = ''
-      ref_tokens = []
-      text = output["text"].replace("<"," <")
-
-      for tok in text.split(" "):
-        if len(tok)>10 and tok.startswith("<extra_id_"):
-          ref_tokens.append(tok.strip())
-        elif 2 < len(str(tok).strip()):
-          token = tok.strip()
-          logging.info(f"DEBUG token: {token}, extra_ids: {len(ref_tokens)}")
-          ref_output = refexp(model, image, token)
-          draw(img, ref_output, token)
-
-#          for i in ref_tokens:
-#            logging.info(f"SKIP: {i}")
-#            ref_output = refexp(model, image, i)
-          ref_tokens = []
-
-      categorize_text = categorize["text"]
+      # Object detection
+      if not args.captions_only:
+        output = model.vqa(image, "Locate all objects in the image .")
+        token = ''
+        ref_tokens = []
+        text = output["text"].replace("<"," <")
+
+        for tok in text.split(" "):
+          if len(tok)>10 and tok.startswith("<extra_id_"):
+            ref_tokens.append(tok.strip())
+          elif 2 < len(str(tok).strip()):
+            token = tok.strip()
+            logging.info(f"DEBUG token: {token}, extra_ids: {len(ref_tokens)}")
+            ref_output = refexp(model, image, token)
+            draw(img, ref_output, token)
+
+    #          for i in ref_tokens:
+    #            logging.info(f"SKIP: {i}")
+    #            ref_output = refexp(model, image, i)
+            ref_tokens = []
+
+      # categorize_text = categorize["text"]
       caption_text = caption["text"]
       logging.info(f"1: {caption_text}\n2: {categorize_text}")
 #      write(img, f"1: {caption_text}\n2: {categorize_text}")
       out_image_path = image_path + '.boxes.png'
       img.save(out_image_path)
 
-      j=[]
-      for k,v in output.items():
-        type_v = type(v)
-        try:
-          j.append({json.dumps(k):json.dumps(v)})
-        except:
-          j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"})
-
-      debug_output = json.dumps(j)
-      logging.info((f"DEBUG: {debug_output}")[0:1000])
-
-      output_text = output["text"]
-      with open(output_file, 'a') as of:
-        of.write(f"{image_path}:{question}:{output_text}\n")
-      logging.info(f"Output: {output_text}\n\n")
+      if not args.captions_only:
+        j=[]
+        for k,v in output.items():
+          type_v = type(v)
+          try:
+            j.append({json.dumps(k):json.dumps(v)})
+          except:
+            j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"})
+
+        debug_output = json.dumps(j)
+        logging.info((f"DEBUG: {debug_output}")[0:1000])
+
+        output_text = output["text"]
+        with open(output_file, 'a') as of:
+          of.write(f"{image_path}:{question}:{output_text}\n")
+        logging.info(f"Output: {output_text}\n\n")
 
 def log(results):
     text = results["text"]