diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7ea31a1 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,41 @@ +FROM nvidia/cuda:11.7.0-devel-ubuntu20.04 +LABEL name="unified-io-inference" + +WORKDIR /root/.conda +WORKDIR /root +RUN apt-get update && apt-get -y install wget nano +RUN wget \ + https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \ + && rm -f Miniconda3-latest-Linux-x86_64.sh +ENV PATH=/opt/conda/bin:${PATH} +RUN bash -c "conda update -n base -c defaults conda" + +RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/xl_1000k.bin \ + -O xl.bin +RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/large_1000k.bin \ + -O large.bin +RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/base_1000k.bin \ + -O base.bin +RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/small_1000k.bin \ + -O small.bin +RUN wget -nv https://farm2.staticflickr.com/1362/1261465554_95741e918b_z.jpg -O dbg_img.png + +COPY uioi.yml . +RUN bash -c "conda env create -f uioi.yml" +COPY requirements.txt . +RUN bash -c ". activate uioi && pip install --upgrade pip \ + && pip install --upgrade "jax[cuda]" \ + -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html \ + && python3 -m pip install -r requirements.txt" + +RUN bash -c ". activate uioi && pip install matplotlib notebook" +RUN bash -c ". activate uioi && pip install setuptools wheel && pip install spacy \ + && python3 -m spacy download en_core_web_sm" + +ENV PYTHONPATH=/root/uio + +COPY . . +RUN bash -c ". activate uioi && export PYTHONPATH=/root:/root/uio && python ./uio/test/check.py" +ENV INPUT_FILE=demo.list +ENTRYPOINT bash -c ". activate uioi && python ./run.py xl xl.bin $INPUT_FILE" diff --git a/README.docker.md b/README.docker.md new file mode 100644 index 0000000..d53f7b9 --- /dev/null +++ b/README.docker.md @@ -0,0 +1,34 @@ + +## Docker +To build a docker image: +```bash +docker build -t unified-io-inference . +``` +To run the docker demo: +``` +docker run -it --gpus=1 unified-io-inference +INFO:absl:Setting up model... +... +INFO:absl:Model is ready +INFO:absl:Running model text_inputs=['what color is the sofa?'] +green +``` + +To run a list of queries construct an input file where each line if a file path +and a text input, separated by ':'. +Prepare a directory containing image files. 'cd' to that directory. +The steps below with write example input files and docker execution with the +host images mounted to the `/image-data` directory. + +``` +ls -1 | grep -E 'jpg|png' > files.txt +awk '{print "/image-data/" $0 ":What does the image describe?"}' ./files.txt > caption.txt +awk '{print "/image-data/" $0 ":Locate all objects in the image."}' ./files.txt > locate.txt + +#Choose an input file to process: +export INPUT_FILE=[caption.txt or locate.txt or other] +export HOSTPATH=$(pwd) +echo ${HOSTPATH}${INPUT_FILE} +docker run -it --gpus=1 -e INPUT_FILE=/image-data/${INPUT_FILE} \ + -v ${HOSTPATH}:/image-data unified-io-inference +``` diff --git a/README.md b/README.md index e1fb781..27d8143 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,8 @@ Then it can be run with: jupyter notebook demo.ipynb ``` +## Docker +To build and run a unified-io-inference docker image see: README.docker.md ## Just-in-time compilation By default `ModelRunner` compiles the underlying inference calls the first time they are used, diff --git a/demo.list b/demo.list new file mode 100644 index 0000000..8d64f07 --- /dev/null +++ b/demo.list @@ -0,0 +1 @@ +/root/dbg_img.png:what color is the couch? diff --git a/run-saga-demo.sh b/run-saga-demo.sh new file mode 100644 index 0000000..d407d2b --- /dev/null +++ b/run-saga-demo.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# +#SBATCH --partition=gaia-lg +#SBATCH --account=gaia-lg +#SBATCH --job-name=captioning-unified +#SBATCH --output=captioning-unified.output.%j.txt +#SBATCH --error=captioning-unified.error.%j.txt +#SBATCH --gres=gpu:rtxa6000:4 +export SRC=/nas/gaia02/users/napiersk/github/clean/unified-io-inference +export INPUT_FILE=caption-part2.txt +export HOSTPATH=/nas/gaia02/data/phase3/ta1/sample1/ +cd $SRC +echo $SRC +docker build -t unified-io-inference . +echo CAPTIONING-UNIFIED START ${HOSTPATH} ${INPUT_FILE} +date +docker run -t --gpus=4 -e INPUT_FILE=/image-data/${INPUT_FILE} -v ${HOSTPATH}:/image-data unified-io-inference:latest +date +#grep -rnIE 'Processing image|BOX|TEXT' ./captioning-unified.output.84938.txt + +echo CAPTIONING-UNIFIED DONE diff --git a/run.py b/run.py new file mode 100644 index 0000000..8315948 --- /dev/null +++ b/run.py @@ -0,0 +1,209 @@ +import argparse +import json +from os.path import exists +from PIL import Image, ImageDraw, ImageFont +from uio import runner +from uio.configs import CONFIGS +from uio import utils +import numpy as np +import spacy +from absl import logging +import warnings +# flax kicks up a lot of future warnings at the moment, ignore them +warnings.simplefilter(action='ignore', category=FutureWarning) + +# To see INFO messages from `ModelRunner` +logging.set_verbosity(logging.INFO) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("model_size", choices=list(CONFIGS)) + parser.add_argument("model_weights") + parser.add_argument("input_file") + parser.add_argument("captions_only", type=bool, action="store_true") + args = parser.parse_args() + + model = runner.ModelRunner(args.model_size, args.model_weights) + logging.info(f"Model: {args.model_size}") + input_file = open(args.input_file, 'r') + logging.info(f"Input file: {args.input_file}") + output_file = f"{args.input_file}.{args.model_size}.results.txt" + logging.info(f"Output file: {output_file}") + nlp = spacy.load("en_core_web_sm") + + lines = input_file.readlines() + for line in lines: + image_path, question = line.strip().split(":") + logging.info(f"Processing image: {image_path}") + with Image.open(image_path) as img: + image = np.array(img.convert('RGB')) +#ignore question +#/image-data/RTS2P7XB.jpg:What does the image describe?:a swamp is full of reeds that have partially bowed. +#/image-data/RTS2P7XB.jpg:What is in this image?:water. + caption = model.vqa(image, "What does the image describe ?") + j=[] + for k,v in caption.items(): + type_v = type(v) + try: + j.append({json.dumps(k):json.dumps(v)}) + except: + j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"}) + debug_output = json.dumps(j) + logging.info((f"DEBUG CAPTION: {debug_output}")[0:1000]) + + # Categorize + if not args.captions_only: + categorize = model.vqa(image, "What is in this image ?") + j=[] + for k,v in categorize.items(): + type_v = type(v) + try: + j.append({json.dumps(k):json.dumps(v)}) + except: + j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"}) + debug_output = json.dumps(j) + logging.info((f"DEBUG CATEGORIZE: {debug_output}")[0:1000]) + + categorize_text = categorize["text"] + caption_text = caption["text"] + all_text = f"{categorize_text} {caption_text}" + else: + all_text = caption_text + categorize_text = "" + + phrases = [] + current_text = '' + + for tok in caption_text.split(" "): + if len(tok.strip()) > 0: + t = tok.strip() + doc = nlp(t) + pos = str(doc[0].pos_) + logging.info(f"{doc[0]} {pos}") + + if ("DET" == pos and '' == current_text) \ + or ("PRON" == pos and '' == current_text) \ + or "NOUN" == pos or "PROPN" == pos: + current_text = f'{current_text} {doc[0]}'.strip() + elif len(current_text) > 0: + phrases.append(current_text) + re_result = refexp(model, image, current_text) + logging.info(f"TEXT: {current_text}") + draw(img, re_result, current_text) + current_text = '' + + if len(current_text) > 0: + phrases.append(current_text) + re_result = refexp(model, image, current_text) + logging.info(f"TEXT: {current_text}") + draw(img, re_result, current_text) + current_text = '' + + # Object detection + if not args.captions_only: + output = model.vqa(image, "Locate all objects in the image .") + token = '' + ref_tokens = [] + text = output["text"].replace("<"," <") + + for tok in text.split(" "): + if len(tok)>10 and tok.startswith(" 0: + box = results["boxes"][0] + logging.info(f"BOX {box[0]}, {box[1]}, {box[2]}, {box[3]}") + if len(results["boxes"]) > 1: + logging.info(f"[...more boxes...]") + +def draw(img, results, token): + canvas = ImageDraw.Draw(img) + if "boxes" in results.keys() and len(results["boxes"]) > 0: + for box in results["boxes"]: + x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3]) + shape = [(x1, y1), (x2, y2)] + width, height = img.size + w = 10 if width > 1000 else 5 + canvas.rectangle(shape, outline="red", width=w) + text = str(results["text"]) + logging.info(f"DTEXT: {text} TOKEN: {token}") +# font_size = 80 if width > 1000 else 50 +# font = ImageFont.truetype("DejaVuSans.ttf", font_size) +# canvas.text((x1-1,y1-1), token, font=font, fill="white") +# canvas.text((x1-1,y1+1), token, font=font, fill="white") +# canvas.text((x1+1,y1-1), token, font=font, fill="white") +# canvas.text((x1+1,y1+1), token, font=font, fill="white") +# canvas.text((x1,y1), token, font=font, fill="red") + +def write(img, text): + logging.info(f"WTEXT: {text}") + canvas = ImageDraw.Draw(img) + width, height = img.size + font_size = 80 if width > 3500 else 48 if width > 2000 else 24 + font = ImageFont.truetype("DejaVuSans.ttf", font_size) + x = 25 + y = height / 2 + canvas.text((x-1,y-1), text, font=font, fill="white") + canvas.text((x-1,y+1), text, font=font, fill="white") + canvas.text((x+1,y-1), text, font=font, fill="white") + canvas.text((x+1,y+1), text, font=font, fill="white") + canvas.text((x,y), text, font=font, fill="red") + +def refexp(model, image, text): + try: + results = model.refexp(image, text) + log(results) + return results + except ValueError as arg: + logging.info(f"ERROR: {arg}") + return {} + +if __name__ == "__main__": + main() + + +#Workbook example: +#'Which region does the text " {} " describe ?' +#sportsball=uio.refexp(soccer_img, "") +#To extract digit from extra_token +# logging.info(f"TOKEN: {int(''.join(i for i in tok if i.isdigit()))}") +# tokens.append(int(''.join(i for i in tok if i.isdigit()))) +# a, b = utils.tokens_to_regions(tokens, (384, 384)) +# logging.info(f"{str(a)}, {str(b)}") diff --git a/uio/report.py b/uio/report.py new file mode 100644 index 0000000..f5ace46 --- /dev/null +++ b/uio/report.py @@ -0,0 +1,82 @@ +import argparse +import json +from os.path import exists +from PIL import Image +from uio import runner +from uio.configs import CONFIGS +from uio import utils +import numpy as np +from absl import logging +import warnings +# flax kicks up a lot of future warnings at the moment, ignore them +warnings.simplefilter(action='ignore', category=FutureWarning) + +# To see INFO messages from `ModelRunner` +logging.set_verbosity(logging.INFO) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("model_size", choices=list(CONFIGS)) + parser.add_argument("model_weights") + parser.add_argument("input_file") + args = parser.parse_args() + + model = runner.ModelRunner(args.model_size, args.model_weights) + logging.info(f"Model: {args.model_size}") + input_file = open(args.input_file, 'r') + logging.info(f"Input file: {args.input_file}") + output_file = f"{args.input_file}.{args.model_size}.results.txt" + logging.info(f"Output file: {output_file}") + + lines = input_file.readlines() + for line in lines: + image_path, question = line.strip().split(":") + logging.info(f"Processing image: {image_path}") + with Image.open(image_path) as img: + image = np.array(img.convert('RGB')) + output = model.vqa(image, question) + token = '' + ref_tokens = [] + for tok in output["text"].split(" "): + if len(tok)>10 and tok.startswith("") + + for i in ref_tokens(): + ref_output = model.vqa(image, f"Which region does the text {i} describe ?") + text = ref_output["text"] + logging.info(f"{text}") + box = ref_output["boxes"][0] + logging.info(f"{json.dumps(box)}") + +# a, b = utils.tokens_to_regions(tokens, (384, 384)) +# logging.info(f"{str(a)}, {str(b)}") + + j=[] + for k,v in output.items(): + type_v = type(v) + try: + j.append({json.dumps(k):json.dumps(v)}) + except: + j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"}) + + debug_output = json.dumps(j) + logging.info((f"DEBUG: {debug_output}")[0:1000]) + + output_text = output["text"] + with open(output_file, 'a') as of: + of.write(f"{image_path}:{question}:{output_text}\n") + logging.info(f"Output: {output_text}") + + +if __name__ == "__main__": + main() diff --git a/uio/test/check.py b/uio/test/check.py new file mode 100644 index 0000000..21179d4 --- /dev/null +++ b/uio/test/check.py @@ -0,0 +1,35 @@ +from functools import partial +from jax import grad, lax +import jax.numpy as jnp +import matplotlib.pylab as plt +import numpy as np +from torchvision.io import read_image +import urllib.request +import spacy +from PIL import Image +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) +from absl import logging +logging.set_verbosity(logging.INFO) +import utils +import runner +uio = runner.ModelRunner("xl", "xl.bin") + +nlp = spacy.load("en_core_web_sm") +#a soccer player getting ready to control the ball. +doc = nlp("soccer players") +print(f'TAG: {doc[0].tag_}, POS: {doc[0].pos_} {str(doc[0])}') +print(f'TAG: {doc[1].tag_}, POS: {doc[1].pos_} {str(doc[1])}') + +doc = nlp("a soccer player getting ready to control the ball.") +for item in doc: + print(f'{str(item)} TAG: {item.tag_}, POS: {item.pos_}') + +#def load_image_from_url(url): +# with urllib.request.urlopen(url) as f: +# img = Image.open(f) +# return np.array(img) +#hotel_img = load_image_from_url('https://farm2.staticflickr.com/1362/1261465554_95741e918b_z.jpg') +#tennis_img = load_image_from_url('https://farm9.staticflickr.com/8313/7954229658_03f8e8d855_z.jpg') +#penguin_img = load_image_from_url('https://i.stack.imgur.com/z9vLx.jpg') +#uio.caption(hotel_img)["text"] diff --git a/uio/test/run.py b/uio/test/run.py new file mode 100644 index 0000000..fba0ff5 --- /dev/null +++ b/uio/test/run.py @@ -0,0 +1,14 @@ +from functools import partial +from jax import grad, lax +import jax.numpy as jnp +import jax as jax +print('<<< jax test >>>') +print(jax.devices()) + +def tanh(x): # Define a function + y = jnp.exp(-2.0 * x) + return (1.0 - y) / (1.0 + y) + +grad_tanh = grad(tanh) # Obtain its gradient function +print(grad_tanh(1.0)) +print('<<< end >>>') diff --git a/uioi.yml b/uioi.yml new file mode 100644 index 0000000..b721b04 --- /dev/null +++ b/uioi.yml @@ -0,0 +1,9 @@ +name: uioi +channels: + - defaults + - conda-forge + - nvidia + - anaconda +dependencies: + - python=3.9 + - cudnn