Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
FROM nvidia/cuda:11.7.0-devel-ubuntu20.04
LABEL name="unified-io-inference"

WORKDIR /root/.conda
WORKDIR /root
RUN apt-get update && apt-get -y install wget nano
RUN wget \
https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
&& bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \
&& rm -f Miniconda3-latest-Linux-x86_64.sh
ENV PATH=/opt/conda/bin:${PATH}
RUN bash -c "conda update -n base -c defaults conda"

RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/xl_1000k.bin \
-O xl.bin
RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/large_1000k.bin \
-O large.bin
RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/base_1000k.bin \
-O base.bin
RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/small_1000k.bin \
-O small.bin
RUN wget -nv https://farm2.staticflickr.com/1362/1261465554_95741e918b_z.jpg -O dbg_img.png

COPY uioi.yml .
RUN bash -c "conda env create -f uioi.yml"
COPY requirements.txt .
RUN bash -c ". activate uioi && pip install --upgrade pip \
&& pip install --upgrade "jax[cuda]" \
-f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html \
&& python3 -m pip install -r requirements.txt"

RUN bash -c ". activate uioi && pip install matplotlib notebook"
RUN bash -c ". activate uioi && pip install setuptools wheel && pip install spacy \
&& python3 -m spacy download en_core_web_sm"

ENV PYTHONPATH=/root/uio

COPY . .
RUN bash -c ". activate uioi && export PYTHONPATH=/root:/root/uio && python ./uio/test/check.py"
ENV INPUT_FILE=demo.list
ENTRYPOINT bash -c ". activate uioi && python ./run.py xl xl.bin $INPUT_FILE"
34 changes: 34 additions & 0 deletions README.docker.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@

## Docker
To build a docker image:
```bash
docker build -t unified-io-inference .
```
To run the docker demo:
```
docker run -it --gpus=1 unified-io-inference
INFO:absl:Setting up model...
...
INFO:absl:Model is ready
INFO:absl:Running model text_inputs=['what color is the sofa?']
green
```

To run a list of queries construct an input file where each line if a file path
and a text input, separated by ':'.
Prepare a directory containing image files. 'cd' to that directory.
The steps below with write example input files and docker execution with the
host images mounted to the `/image-data` directory.

```
ls -1 | grep -E 'jpg|png' > files.txt
awk '{print "/image-data/" $0 ":What does the image describe?"}' ./files.txt > caption.txt
awk '{print "/image-data/" $0 ":Locate all objects in the image."}' ./files.txt > locate.txt

#Choose an input file to process:
export INPUT_FILE=[caption.txt or locate.txt or other]
export HOSTPATH=$(pwd)
echo ${HOSTPATH}${INPUT_FILE}
docker run -it --gpus=1 -e INPUT_FILE=/image-data/${INPUT_FILE} \
-v ${HOSTPATH}:/image-data unified-io-inference
```
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ Then it can be run with:
jupyter notebook demo.ipynb
```

## Docker
To build and run a unified-io-inference docker image see: README.docker.md

## Just-in-time compilation
By default `ModelRunner` compiles the underlying inference calls the first time they are used,
Expand Down
1 change: 1 addition & 0 deletions demo.list
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/root/dbg_img.png:what color is the couch?
21 changes: 21 additions & 0 deletions run-saga-demo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/sh
#
#SBATCH --partition=gaia-lg
#SBATCH --account=gaia-lg
#SBATCH --job-name=captioning-unified
#SBATCH --output=captioning-unified.output.%j.txt
#SBATCH --error=captioning-unified.error.%j.txt
#SBATCH --gres=gpu:rtxa6000:4
export SRC=/nas/gaia02/users/napiersk/github/clean/unified-io-inference
export INPUT_FILE=caption-part2.txt
export HOSTPATH=/nas/gaia02/data/phase3/ta1/sample1/
cd $SRC
echo $SRC
docker build -t unified-io-inference .
echo CAPTIONING-UNIFIED START ${HOSTPATH} ${INPUT_FILE}
date
docker run -t --gpus=4 -e INPUT_FILE=/image-data/${INPUT_FILE} -v ${HOSTPATH}:/image-data unified-io-inference:latest
date
#grep -rnIE 'Processing image|BOX|TEXT' ./captioning-unified.output.84938.txt

echo CAPTIONING-UNIFIED DONE
209 changes: 209 additions & 0 deletions run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
import argparse
import json
from os.path import exists
from PIL import Image, ImageDraw, ImageFont
from uio import runner
from uio.configs import CONFIGS
from uio import utils
import numpy as np
import spacy
from absl import logging
import warnings
# flax kicks up a lot of future warnings at the moment, ignore them
warnings.simplefilter(action='ignore', category=FutureWarning)

# To see INFO messages from `ModelRunner`
logging.set_verbosity(logging.INFO)

def main():
parser = argparse.ArgumentParser()
parser.add_argument("model_size", choices=list(CONFIGS))
parser.add_argument("model_weights")
parser.add_argument("input_file")
parser.add_argument("captions_only", type=bool, action="store_true")
args = parser.parse_args()

model = runner.ModelRunner(args.model_size, args.model_weights)
logging.info(f"Model: {args.model_size}")
input_file = open(args.input_file, 'r')
logging.info(f"Input file: {args.input_file}")
output_file = f"{args.input_file}.{args.model_size}.results.txt"
logging.info(f"Output file: {output_file}")
nlp = spacy.load("en_core_web_sm")

lines = input_file.readlines()
for line in lines:
image_path, question = line.strip().split(":")
logging.info(f"Processing image: {image_path}")
with Image.open(image_path) as img:
image = np.array(img.convert('RGB'))
#ignore question
#/image-data/RTS2P7XB.jpg:What does the image describe?:a swamp is full of reeds that have partially bowed.
#/image-data/RTS2P7XB.jpg:What is in this image?:water.
caption = model.vqa(image, "What does the image describe ?")
j=[]
for k,v in caption.items():
type_v = type(v)
try:
j.append({json.dumps(k):json.dumps(v)})
except:
j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"})
debug_output = json.dumps(j)
logging.info((f"DEBUG CAPTION: {debug_output}")[0:1000])

# Categorize
if not args.captions_only:
categorize = model.vqa(image, "What is in this image ?")
j=[]
for k,v in categorize.items():
type_v = type(v)
try:
j.append({json.dumps(k):json.dumps(v)})
except:
j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"})
debug_output = json.dumps(j)
logging.info((f"DEBUG CATEGORIZE: {debug_output}")[0:1000])

categorize_text = categorize["text"]
caption_text = caption["text"]
all_text = f"{categorize_text} {caption_text}"
else:
all_text = caption_text
categorize_text = ""

phrases = []
current_text = ''

for tok in caption_text.split(" "):
if len(tok.strip()) > 0:
t = tok.strip()
doc = nlp(t)
pos = str(doc[0].pos_)
logging.info(f"{doc[0]} {pos}")

if ("DET" == pos and '' == current_text) \
or ("PRON" == pos and '' == current_text) \
or "NOUN" == pos or "PROPN" == pos:
current_text = f'{current_text} {doc[0]}'.strip()
elif len(current_text) > 0:
phrases.append(current_text)
re_result = refexp(model, image, current_text)
logging.info(f"TEXT: {current_text}")
draw(img, re_result, current_text)
current_text = ''

if len(current_text) > 0:
phrases.append(current_text)
re_result = refexp(model, image, current_text)
logging.info(f"TEXT: {current_text}")
draw(img, re_result, current_text)
current_text = ''

# Object detection
if not args.captions_only:
output = model.vqa(image, "Locate all objects in the image .")
token = ''
ref_tokens = []
text = output["text"].replace("<"," <")

for tok in text.split(" "):
if len(tok)>10 and tok.startswith("<extra_id_"):
ref_tokens.append(tok.strip())
elif 2 < len(str(tok).strip()):
token = tok.strip()
logging.info(f"DEBUG token: {token}, extra_ids: {len(ref_tokens)}")
ref_output = refexp(model, image, token)
draw(img, ref_output, token)

# for i in ref_tokens:
# logging.info(f"SKIP: {i}")
# ref_output = refexp(model, image, i)
ref_tokens = []

# categorize_text = categorize["text"]
caption_text = caption["text"]
logging.info(f"1: {caption_text}\n2: {categorize_text}")
# write(img, f"1: {caption_text}\n2: {categorize_text}")
out_image_path = image_path + '.boxes.png'
img.save(out_image_path)

if not args.captions_only:
j=[]
for k,v in output.items():
type_v = type(v)
try:
j.append({json.dumps(k):json.dumps(v)})
except:
j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"})

debug_output = json.dumps(j)
logging.info((f"DEBUG: {debug_output}")[0:1000])

output_text = output["text"]
with open(output_file, 'a') as of:
of.write(f"{image_path}:{question}:{output_text}\n")
logging.info(f"Output: {output_text}\n\n")

def log(results):
text = results["text"]
logging.info(f"DEBUG text: {text}")
if "boxes" in results.keys() and len(results["boxes"]) > 0:
box = results["boxes"][0]
logging.info(f"BOX {box[0]}, {box[1]}, {box[2]}, {box[3]}")
if len(results["boxes"]) > 1:
logging.info(f"[...more boxes...]")

def draw(img, results, token):
canvas = ImageDraw.Draw(img)
if "boxes" in results.keys() and len(results["boxes"]) > 0:
for box in results["boxes"]:
x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
shape = [(x1, y1), (x2, y2)]
width, height = img.size
w = 10 if width > 1000 else 5
canvas.rectangle(shape, outline="red", width=w)
text = str(results["text"])
logging.info(f"DTEXT: {text} TOKEN: {token}")
# font_size = 80 if width > 1000 else 50
# font = ImageFont.truetype("DejaVuSans.ttf", font_size)
# canvas.text((x1-1,y1-1), token, font=font, fill="white")
# canvas.text((x1-1,y1+1), token, font=font, fill="white")
# canvas.text((x1+1,y1-1), token, font=font, fill="white")
# canvas.text((x1+1,y1+1), token, font=font, fill="white")
# canvas.text((x1,y1), token, font=font, fill="red")

def write(img, text):
logging.info(f"WTEXT: {text}")
canvas = ImageDraw.Draw(img)
width, height = img.size
font_size = 80 if width > 3500 else 48 if width > 2000 else 24
font = ImageFont.truetype("DejaVuSans.ttf", font_size)
x = 25
y = height / 2
canvas.text((x-1,y-1), text, font=font, fill="white")
canvas.text((x-1,y+1), text, font=font, fill="white")
canvas.text((x+1,y-1), text, font=font, fill="white")
canvas.text((x+1,y+1), text, font=font, fill="white")
canvas.text((x,y), text, font=font, fill="red")

def refexp(model, image, text):
try:
results = model.refexp(image, text)
log(results)
return results
except ValueError as arg:
logging.info(f"ERROR: {arg}")
return {}

if __name__ == "__main__":
main()


#Workbook example:
#'Which region does the text " {} " describe ?'
#sportsball=uio.refexp(soccer_img, "<extra_id_617>")
#To extract digit from extra_token
# logging.info(f"TOKEN: {int(''.join(i for i in tok if i.isdigit()))}")
# tokens.append(int(''.join(i for i in tok if i.isdigit())))
# a, b = utils.tokens_to_regions(tokens, (384, 384))
# logging.info(f"{str(a)}, {str(b)}")
Loading