From d36d8e8ddeafab11b0ad04c10f3f13febf40bc36 Mon Sep 17 00:00:00 2001 From: Daniel Napierski Date: Tue, 18 Oct 2022 08:57:28 -0700 Subject: [PATCH 1/8] first docker build using GPU --- Dockerfile | 24 ++++++++++++++++++++++++ uio/test/check.py | 3 +++ uio/test/run.py | 14 ++++++++++++++ uioi.yml | 7 +++++++ 4 files changed, 48 insertions(+) create mode 100644 Dockerfile create mode 100644 uio/test/check.py create mode 100644 uio/test/run.py create mode 100644 uioi.yml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..333d5db --- /dev/null +++ b/Dockerfile @@ -0,0 +1,24 @@ +FROM nvidia/cuda:11.7.0-devel-ubuntu20.04 +LABEL name="unified-io-inference" + +WORKDIR /root/.conda +WORKDIR /root +RUN apt-get update && apt-get -y install wget nano +RUN wget \ + https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \ + && rm -f Miniconda3-latest-Linux-x86_64.sh +ENV PATH=/opt/conda/bin:${PATH} +RUN bash -c "conda update -n base -c defaults conda" + +COPY uioi.yml . +RUN bash -c "conda env create -f uioi.yml" +COPY requirements.txt . +RUN bash -c ". activate uioi && pip install --upgrade pip \ + && pip install --upgrade "jax[cuda]" \ + -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html \ + && python3 -m pip install -r requirements.txt" + +COPY . . +RUN bash -c ". activate uioi && python ./uio/test/check.py" +ENTRYPOINT bash -c ". activate uioi && python ./uio/test/run.py" diff --git a/uio/test/check.py b/uio/test/check.py new file mode 100644 index 0000000..4cd937d --- /dev/null +++ b/uio/test/check.py @@ -0,0 +1,3 @@ +from functools import partial +from jax import grad, lax +import jax.numpy as jnp diff --git a/uio/test/run.py b/uio/test/run.py new file mode 100644 index 0000000..fba0ff5 --- /dev/null +++ b/uio/test/run.py @@ -0,0 +1,14 @@ +from functools import partial +from jax import grad, lax +import jax.numpy as jnp +import jax as jax +print('<<< jax test >>>') +print(jax.devices()) + +def tanh(x): # Define a function + y = jnp.exp(-2.0 * x) + return (1.0 - y) / (1.0 + y) + +grad_tanh = grad(tanh) # Obtain its gradient function +print(grad_tanh(1.0)) +print('<<< end >>>') diff --git a/uioi.yml b/uioi.yml new file mode 100644 index 0000000..f175f72 --- /dev/null +++ b/uioi.yml @@ -0,0 +1,7 @@ +name: uioi +channels: + - defaults + - conda-forge + - nvidia +dependencies: + - python=3.9 From 4803fe779d81456cb99b7f26ac95d5c12a1ac3d4 Mon Sep 17 00:00:00 2001 From: Daniel Napierski Date: Wed, 19 Oct 2022 09:31:35 -0400 Subject: [PATCH 2/8] demo build --- Dockerfile | 12 +++++++++++- README.md | 14 ++++++++++++++ uioi.yml | 2 ++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 333d5db..ea7f496 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,6 +11,16 @@ RUN wget \ ENV PATH=/opt/conda/bin:${PATH} RUN bash -c "conda update -n base -c defaults conda" +RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/xl_1000k.bin \ + -O xl.bin +RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/large_1000k.bin \ + -O large.bin +RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/base_1000k.bin \ + -O base.bin +RUN wget -nv https://ai2-prior-uio.s3.us-west-2.amazonaws.com/public/model-weights-bin/small_1000k.bin \ + -O small.bin +RUN wget -nv https://farm2.staticflickr.com/1362/1261465554_95741e918b_z.jpg -O dbg_img.png + COPY uioi.yml . RUN bash -c "conda env create -f uioi.yml" COPY requirements.txt . @@ -21,4 +31,4 @@ RUN bash -c ". activate uioi && pip install --upgrade pip \ COPY . . RUN bash -c ". activate uioi && python ./uio/test/check.py" -ENTRYPOINT bash -c ". activate uioi && python ./uio/test/run.py" +ENTRYPOINT bash -c ". activate uioi && python ./demo_script.py small small.bin" diff --git a/README.md b/README.md index e1fb781..a99315b 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,20 @@ Then it can be run with: jupyter notebook demo.ipynb ``` +## Docker +To build a docker image: +```bash +docker build -t unified-io-inference . +``` +To run the docker demo: +``` +docker run -it --gpus=1 unified-io-inference +INFO:absl:Setting up model... +... +INFO:absl:Model is ready +INFO:absl:Running model text_inputs=['what color is the sofa?'] +green +``` ## Just-in-time compilation By default `ModelRunner` compiles the underlying inference calls the first time they are used, diff --git a/uioi.yml b/uioi.yml index f175f72..b721b04 100644 --- a/uioi.yml +++ b/uioi.yml @@ -3,5 +3,7 @@ channels: - defaults - conda-forge - nvidia + - anaconda dependencies: - python=3.9 + - cudnn From 75a7e684883dcf4277c3fab3696da909ec2f02e9 Mon Sep 17 00:00:00 2001 From: Daniel Napierski Date: Wed, 19 Oct 2022 12:09:09 -0400 Subject: [PATCH 3/8] run from list --- Dockerfile | 3 ++- demo.list | 1 + run.py | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 demo.list create mode 100644 run.py diff --git a/Dockerfile b/Dockerfile index ea7f496..20e7443 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,4 +31,5 @@ RUN bash -c ". activate uioi && pip install --upgrade pip \ COPY . . RUN bash -c ". activate uioi && python ./uio/test/check.py" -ENTRYPOINT bash -c ". activate uioi && python ./demo_script.py small small.bin" +ENV INPUT_FILE=demo.list +ENTRYPOINT bash -c ". activate uioi && python ./run.py small small.bin $INPUT_FILE" diff --git a/demo.list b/demo.list new file mode 100644 index 0000000..8d64f07 --- /dev/null +++ b/demo.list @@ -0,0 +1 @@ +/root/dbg_img.png:what color is the couch? diff --git a/run.py b/run.py new file mode 100644 index 0000000..ba722e3 --- /dev/null +++ b/run.py @@ -0,0 +1,36 @@ +import argparse +from os.path import exists +from PIL import Image +from uio import runner +from uio.configs import CONFIGS +import numpy as np +from absl import logging +import warnings +# flax kicks up a lot of future warnings at the moment, ignore them +warnings.simplefilter(action='ignore', category=FutureWarning) + +# To see INFO messages from `ModelRunner` +logging.set_verbosity(logging.INFO) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("model_size", choices=list(CONFIGS)) + parser.add_argument("model_weights") + parser.add_argument("input_file") + args = parser.parse_args() + + model = runner.ModelRunner(args.model_size, args.model_weights) + input_file = open(args.input_file, 'r') + lines = input_file.readlines() + for line in lines: + image_path, question = line.strip().split(":") + print(image_path) + print(question) + with Image.open(image_path) as img: + image = np.array(img.convert('RGB')) + output = model.vqa(image, question) + print(output["text"]) + + +if __name__ == "__main__": + main() From f98f69d1076bcc1053922b850cc0fc871162c5db Mon Sep 17 00:00:00 2001 From: Daniel Napierski Date: Mon, 24 Oct 2022 07:04:59 -0700 Subject: [PATCH 4/8] Default to xl model, script captions --- Dockerfile | 2 +- README.docker.md | 34 ++++++++++++++++++++++++++++++++++ README.md | 14 +------------- 3 files changed, 36 insertions(+), 14 deletions(-) create mode 100644 README.docker.md diff --git a/Dockerfile b/Dockerfile index 20e7443..becfe13 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,4 +32,4 @@ RUN bash -c ". activate uioi && pip install --upgrade pip \ COPY . . RUN bash -c ". activate uioi && python ./uio/test/check.py" ENV INPUT_FILE=demo.list -ENTRYPOINT bash -c ". activate uioi && python ./run.py small small.bin $INPUT_FILE" +ENTRYPOINT bash -c ". activate uioi && python ./run.py xl xl.bin $INPUT_FILE" diff --git a/README.docker.md b/README.docker.md new file mode 100644 index 0000000..88df26a --- /dev/null +++ b/README.docker.md @@ -0,0 +1,34 @@ + +## Docker +To build a docker image: +```bash +docker build -t unified-io-inference . +``` +To run the docker demo: +``` +docker run -it --gpus=1 unified-io-inference +INFO:absl:Setting up model... +... +INFO:absl:Model is ready +INFO:absl:Running model text_inputs=['what color is the sofa?'] +green +``` + +To run a list of queries construct an input file where each line if a file path +and a text input, separated by ':'. +Prepare a directory containing image files. 'cd' to that directory. +The steps below with write example input files and docker execution with the +host images mounted to the `/image-data` directory. + +``` +ls -1 | grep -E 'jpg|png' > files.txt +awk '{print "/image-data/" $0 ":What-does-the-image-describe?"}' ./files.txt > caption.txt +awk '{print "/image-data/" $0 ":Locate all objects in the image."}' ./files.txt > locate.txt + +#Choose an input file to process: +export INPUT_FILE=[caption.txt or locate.txt or other] +export HOSTPATH=$(pwd) + +docker run -it --gpus=1 -e INPUT_FILE=/image-data/${INPUT_FILE} \ + -v /${HOSTPATH}:/image-data unified-io-inference +``` diff --git a/README.md b/README.md index a99315b..27d8143 100644 --- a/README.md +++ b/README.md @@ -80,19 +80,7 @@ jupyter notebook demo.ipynb ``` ## Docker -To build a docker image: -```bash -docker build -t unified-io-inference . -``` -To run the docker demo: -``` -docker run -it --gpus=1 unified-io-inference -INFO:absl:Setting up model... -... -INFO:absl:Model is ready -INFO:absl:Running model text_inputs=['what color is the sofa?'] -green -``` +To build and run a unified-io-inference docker image see: README.docker.md ## Just-in-time compilation By default `ModelRunner` compiles the underlying inference calls the first time they are used, From d4a8e03cd2b90d8b87f24c10ce6771bed6da06de Mon Sep 17 00:00:00 2001 From: Daniel Napierski Date: Tue, 15 Nov 2022 07:30:01 -0800 Subject: [PATCH 5/8] run obj detect --- Dockerfile | 8 ++- README.docker.md | 8 +-- run.py | 171 ++++++++++++++++++++++++++++++++++++++++++++-- uio/test/check.py | 32 +++++++++ 4 files changed, 209 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index becfe13..7ea31a1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,7 +29,13 @@ RUN bash -c ". activate uioi && pip install --upgrade pip \ -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html \ && python3 -m pip install -r requirements.txt" +RUN bash -c ". activate uioi && pip install matplotlib notebook" +RUN bash -c ". activate uioi && pip install setuptools wheel && pip install spacy \ + && python3 -m spacy download en_core_web_sm" + +ENV PYTHONPATH=/root/uio + COPY . . -RUN bash -c ". activate uioi && python ./uio/test/check.py" +RUN bash -c ". activate uioi && export PYTHONPATH=/root:/root/uio && python ./uio/test/check.py" ENV INPUT_FILE=demo.list ENTRYPOINT bash -c ". activate uioi && python ./run.py xl xl.bin $INPUT_FILE" diff --git a/README.docker.md b/README.docker.md index 88df26a..d53f7b9 100644 --- a/README.docker.md +++ b/README.docker.md @@ -22,13 +22,13 @@ host images mounted to the `/image-data` directory. ``` ls -1 | grep -E 'jpg|png' > files.txt -awk '{print "/image-data/" $0 ":What-does-the-image-describe?"}' ./files.txt > caption.txt +awk '{print "/image-data/" $0 ":What does the image describe?"}' ./files.txt > caption.txt awk '{print "/image-data/" $0 ":Locate all objects in the image."}' ./files.txt > locate.txt #Choose an input file to process: export INPUT_FILE=[caption.txt or locate.txt or other] export HOSTPATH=$(pwd) - -docker run -it --gpus=1 -e INPUT_FILE=/image-data/${INPUT_FILE} \ - -v /${HOSTPATH}:/image-data unified-io-inference +echo ${HOSTPATH}${INPUT_FILE} +docker run -it --gpus=1 -e INPUT_FILE=/image-data/${INPUT_FILE} \ + -v ${HOSTPATH}:/image-data unified-io-inference ``` diff --git a/run.py b/run.py index ba722e3..fa0809f 100644 --- a/run.py +++ b/run.py @@ -1,9 +1,12 @@ import argparse +import json from os.path import exists -from PIL import Image +from PIL import Image, ImageDraw, ImageFont from uio import runner from uio.configs import CONFIGS +from uio import utils import numpy as np +import spacy from absl import logging import warnings # flax kicks up a lot of future warnings at the moment, ignore them @@ -20,17 +23,175 @@ def main(): args = parser.parse_args() model = runner.ModelRunner(args.model_size, args.model_weights) + logging.info(f"Model: {args.model_size}") input_file = open(args.input_file, 'r') + logging.info(f"Input file: {args.input_file}") + output_file = f"{args.input_file}.{args.model_size}.results.txt" + logging.info(f"Output file: {output_file}") + nlp = spacy.load("en_core_web_sm") + lines = input_file.readlines() for line in lines: image_path, question = line.strip().split(":") - print(image_path) - print(question) + logging.info(f"Processing image: {image_path}") with Image.open(image_path) as img: image = np.array(img.convert('RGB')) - output = model.vqa(image, question) - print(output["text"]) +#ignore question +#/image-data/RTS2P7XB.jpg:What does the image describe?:a swamp is full of reeds that have partially bowed. +#/image-data/RTS2P7XB.jpg:What is in this image?:water. + caption = model.vqa(image, "What does the image describe ?") + j=[] + for k,v in caption.items(): + type_v = type(v) + try: + j.append({json.dumps(k):json.dumps(v)}) + except: + j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"}) + debug_output = json.dumps(j) + logging.info((f"DEBUG CAPTION: {debug_output}")[0:1000]) + + categorize = model.vqa(image, "What is in this image ?") + j=[] + for k,v in categorize.items(): + type_v = type(v) + try: + j.append({json.dumps(k):json.dumps(v)}) + except: + j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"}) + debug_output = json.dumps(j) + logging.info((f"DEBUG CATEGORIZE: {debug_output}")[0:1000]) + + categorize_text = categorize["text"] + caption_text = caption["text"] + all_text = f"{categorize_text} {caption_text}" + + phrases = [] + current_text = '' + + for tok in caption_text.split(" "): + if len(tok.strip()) > 0: + t = tok.strip() + doc = nlp(t) + pos = str(doc[0].pos_) + logging.info(f"{doc[0]} {pos}") + + if ("DET" == pos and '' == current_text) \ + or ("PRON" == pos and '' == current_text) \ + or "NOUN" == pos or "PROPN" == pos: + current_text = f'{current_text} {doc[0]}'.strip() + elif len(current_text) > 0: + phrases.append(current_text) + re_result = refexp(model, image, current_text) + draw(img, re_result, current_text) + current_text = '' + + if len(current_text) > 0: + phrases.append(current_text) + re_result = refexp(model, image, current_text) + draw(img, re_result, current_text) + current_text = '' + + output = model.vqa(image, "Locate all objects in the image .") + token = '' + ref_tokens = [] + text = output["text"].replace("<"," <") + + for tok in text.split(" "): + if len(tok)>10 and tok.startswith(" 0: + box = results["boxes"][0] + logging.info(f"BOX {box[0]}, {box[1]}, {box[2]}, {box[3]}") + if len(results["boxes"]) > 1: + logging.info(f"[...more boxes...]") + +def draw(img, results, token): + canvas = ImageDraw.Draw(img) + if "boxes" in results.keys() and len(results["boxes"]) > 0: + for box in results["boxes"]: + x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3]) + shape = [(x1, y1), (x2, y2)] + width, height = img.size + w = 10 if width > 1000 else 5 + canvas.rectangle(shape, outline="red", width=w) + text = str(results["text"]) + logging.info(f"DTEXT: {text} TOKEN: {token}") + font_size = 80 if width > 1000 else 50 + font = ImageFont.truetype("DejaVuSans.ttf", font_size) + canvas.text((x1-1,y1-1), token, font=font, fill="white") + canvas.text((x1-1,y1+1), token, font=font, fill="white") + canvas.text((x1+1,y1-1), token, font=font, fill="white") + canvas.text((x1+1,y1+1), token, font=font, fill="white") + canvas.text((x1,y1), token, font=font, fill="red") + +def write(img, text): + logging.info(f"WTEXT: {text}") + canvas = ImageDraw.Draw(img) + width, height = img.size + font_size = 80 if width > 3500 else 48 if width > 2000 else 24 + font = ImageFont.truetype("DejaVuSans.ttf", font_size) + x = 25 + y = height / 2 + canvas.text((x-1,y-1), text, font=font, fill="white") + canvas.text((x-1,y+1), text, font=font, fill="white") + canvas.text((x+1,y-1), text, font=font, fill="white") + canvas.text((x+1,y+1), text, font=font, fill="white") + canvas.text((x,y), text, font=font, fill="red") + +def refexp(model, image, text): + try: + results = model.refexp(image, text) + log(results) + return results + except ValueError as arg: + logging.info(f"ERROR: {arg}") + return {} if __name__ == "__main__": main() + + +#Workbook example: +#'Which region does the text " {} " describe ?' +#sportsball=uio.refexp(soccer_img, "") +#To extract digit from extra_token +# logging.info(f"TOKEN: {int(''.join(i for i in tok if i.isdigit()))}") +# tokens.append(int(''.join(i for i in tok if i.isdigit()))) +# a, b = utils.tokens_to_regions(tokens, (384, 384)) +# logging.info(f"{str(a)}, {str(b)}") diff --git a/uio/test/check.py b/uio/test/check.py index 4cd937d..21179d4 100644 --- a/uio/test/check.py +++ b/uio/test/check.py @@ -1,3 +1,35 @@ from functools import partial from jax import grad, lax import jax.numpy as jnp +import matplotlib.pylab as plt +import numpy as np +from torchvision.io import read_image +import urllib.request +import spacy +from PIL import Image +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) +from absl import logging +logging.set_verbosity(logging.INFO) +import utils +import runner +uio = runner.ModelRunner("xl", "xl.bin") + +nlp = spacy.load("en_core_web_sm") +#a soccer player getting ready to control the ball. +doc = nlp("soccer players") +print(f'TAG: {doc[0].tag_}, POS: {doc[0].pos_} {str(doc[0])}') +print(f'TAG: {doc[1].tag_}, POS: {doc[1].pos_} {str(doc[1])}') + +doc = nlp("a soccer player getting ready to control the ball.") +for item in doc: + print(f'{str(item)} TAG: {item.tag_}, POS: {item.pos_}') + +#def load_image_from_url(url): +# with urllib.request.urlopen(url) as f: +# img = Image.open(f) +# return np.array(img) +#hotel_img = load_image_from_url('https://farm2.staticflickr.com/1362/1261465554_95741e918b_z.jpg') +#tennis_img = load_image_from_url('https://farm9.staticflickr.com/8313/7954229658_03f8e8d855_z.jpg') +#penguin_img = load_image_from_url('https://i.stack.imgur.com/z9vLx.jpg') +#uio.caption(hotel_img)["text"] From a00465928ec7f65e6c0d14ed176dd83b275265d0 Mon Sep 17 00:00:00 2001 From: Daniel Napierski Date: Tue, 17 Jan 2023 07:22:31 -0800 Subject: [PATCH 6/8] add report --- uio/report.py | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 uio/report.py diff --git a/uio/report.py b/uio/report.py new file mode 100644 index 0000000..f5ace46 --- /dev/null +++ b/uio/report.py @@ -0,0 +1,82 @@ +import argparse +import json +from os.path import exists +from PIL import Image +from uio import runner +from uio.configs import CONFIGS +from uio import utils +import numpy as np +from absl import logging +import warnings +# flax kicks up a lot of future warnings at the moment, ignore them +warnings.simplefilter(action='ignore', category=FutureWarning) + +# To see INFO messages from `ModelRunner` +logging.set_verbosity(logging.INFO) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("model_size", choices=list(CONFIGS)) + parser.add_argument("model_weights") + parser.add_argument("input_file") + args = parser.parse_args() + + model = runner.ModelRunner(args.model_size, args.model_weights) + logging.info(f"Model: {args.model_size}") + input_file = open(args.input_file, 'r') + logging.info(f"Input file: {args.input_file}") + output_file = f"{args.input_file}.{args.model_size}.results.txt" + logging.info(f"Output file: {output_file}") + + lines = input_file.readlines() + for line in lines: + image_path, question = line.strip().split(":") + logging.info(f"Processing image: {image_path}") + with Image.open(image_path) as img: + image = np.array(img.convert('RGB')) + output = model.vqa(image, question) + token = '' + ref_tokens = [] + for tok in output["text"].split(" "): + if len(tok)>10 and tok.startswith("") + + for i in ref_tokens(): + ref_output = model.vqa(image, f"Which region does the text {i} describe ?") + text = ref_output["text"] + logging.info(f"{text}") + box = ref_output["boxes"][0] + logging.info(f"{json.dumps(box)}") + +# a, b = utils.tokens_to_regions(tokens, (384, 384)) +# logging.info(f"{str(a)}, {str(b)}") + + j=[] + for k,v in output.items(): + type_v = type(v) + try: + j.append({json.dumps(k):json.dumps(v)}) + except: + j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"}) + + debug_output = json.dumps(j) + logging.info((f"DEBUG: {debug_output}")[0:1000]) + + output_text = output["text"] + with open(output_file, 'a') as of: + of.write(f"{image_path}:{question}:{output_text}\n") + logging.info(f"Output: {output_text}") + + +if __name__ == "__main__": + main() From 7261cb19ab64036566cea50df43ef429be98e193 Mon Sep 17 00:00:00 2001 From: Daniel Napierski Date: Mon, 23 Jan 2023 06:31:20 -0800 Subject: [PATCH 7/8] run at saga --- run-saga-demo.sh | 21 +++++++++++++++++++++ run.py | 19 +++++++++++-------- 2 files changed, 32 insertions(+), 8 deletions(-) create mode 100644 run-saga-demo.sh diff --git a/run-saga-demo.sh b/run-saga-demo.sh new file mode 100644 index 0000000..d407d2b --- /dev/null +++ b/run-saga-demo.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# +#SBATCH --partition=gaia-lg +#SBATCH --account=gaia-lg +#SBATCH --job-name=captioning-unified +#SBATCH --output=captioning-unified.output.%j.txt +#SBATCH --error=captioning-unified.error.%j.txt +#SBATCH --gres=gpu:rtxa6000:4 +export SRC=/nas/gaia02/users/napiersk/github/clean/unified-io-inference +export INPUT_FILE=caption-part2.txt +export HOSTPATH=/nas/gaia02/data/phase3/ta1/sample1/ +cd $SRC +echo $SRC +docker build -t unified-io-inference . +echo CAPTIONING-UNIFIED START ${HOSTPATH} ${INPUT_FILE} +date +docker run -t --gpus=4 -e INPUT_FILE=/image-data/${INPUT_FILE} -v ${HOSTPATH}:/image-data unified-io-inference:latest +date +#grep -rnIE 'Processing image|BOX|TEXT' ./captioning-unified.output.84938.txt + +echo CAPTIONING-UNIFIED DONE diff --git a/run.py b/run.py index fa0809f..b828d42 100644 --- a/run.py +++ b/run.py @@ -82,12 +82,14 @@ def main(): elif len(current_text) > 0: phrases.append(current_text) re_result = refexp(model, image, current_text) + logging.info(f"TEXT: {current_text}") draw(img, re_result, current_text) current_text = '' if len(current_text) > 0: phrases.append(current_text) re_result = refexp(model, image, current_text) + logging.info(f"TEXT: {current_text}") draw(img, re_result, current_text) current_text = '' @@ -112,7 +114,8 @@ def main(): categorize_text = categorize["text"] caption_text = caption["text"] - write(img, f"1: {caption_text}\n2: {categorize_text}") + logging.info(f"1: {caption_text}\n2: {categorize_text}") +# write(img, f"1: {caption_text}\n2: {categorize_text}") out_image_path = image_path + '.boxes.png' img.save(out_image_path) @@ -152,13 +155,13 @@ def draw(img, results, token): canvas.rectangle(shape, outline="red", width=w) text = str(results["text"]) logging.info(f"DTEXT: {text} TOKEN: {token}") - font_size = 80 if width > 1000 else 50 - font = ImageFont.truetype("DejaVuSans.ttf", font_size) - canvas.text((x1-1,y1-1), token, font=font, fill="white") - canvas.text((x1-1,y1+1), token, font=font, fill="white") - canvas.text((x1+1,y1-1), token, font=font, fill="white") - canvas.text((x1+1,y1+1), token, font=font, fill="white") - canvas.text((x1,y1), token, font=font, fill="red") +# font_size = 80 if width > 1000 else 50 +# font = ImageFont.truetype("DejaVuSans.ttf", font_size) +# canvas.text((x1-1,y1-1), token, font=font, fill="white") +# canvas.text((x1-1,y1+1), token, font=font, fill="white") +# canvas.text((x1+1,y1-1), token, font=font, fill="white") +# canvas.text((x1+1,y1+1), token, font=font, fill="white") +# canvas.text((x1,y1), token, font=font, fill="red") def write(img, text): logging.info(f"WTEXT: {text}") From ae9dcf56a29f85e55de250a28954b34e5602dd22 Mon Sep 17 00:00:00 2001 From: Liz Lee Date: Fri, 10 Feb 2023 09:02:44 -0500 Subject: [PATCH 8/8] Add option to run.py to only run captioning --- run.py | 107 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 58 insertions(+), 49 deletions(-) diff --git a/run.py b/run.py index b828d42..8315948 100644 --- a/run.py +++ b/run.py @@ -20,6 +20,7 @@ def main(): parser.add_argument("model_size", choices=list(CONFIGS)) parser.add_argument("model_weights") parser.add_argument("input_file") + parser.add_argument("captions_only", type=bool, action="store_true") args = parser.parse_args() model = runner.ModelRunner(args.model_size, args.model_weights) @@ -50,20 +51,25 @@ def main(): debug_output = json.dumps(j) logging.info((f"DEBUG CAPTION: {debug_output}")[0:1000]) - categorize = model.vqa(image, "What is in this image ?") - j=[] - for k,v in categorize.items(): - type_v = type(v) - try: - j.append({json.dumps(k):json.dumps(v)}) - except: - j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"}) - debug_output = json.dumps(j) - logging.info((f"DEBUG CATEGORIZE: {debug_output}")[0:1000]) - - categorize_text = categorize["text"] - caption_text = caption["text"] - all_text = f"{categorize_text} {caption_text}" + # Categorize + if not args.captions_only: + categorize = model.vqa(image, "What is in this image ?") + j=[] + for k,v in categorize.items(): + type_v = type(v) + try: + j.append({json.dumps(k):json.dumps(v)}) + except: + j.append({json.dumps(k):f"NOT SERIALIZABLE: {type_v}"}) + debug_output = json.dumps(j) + logging.info((f"DEBUG CATEGORIZE: {debug_output}")[0:1000]) + + categorize_text = categorize["text"] + caption_text = caption["text"] + all_text = f"{categorize_text} {caption_text}" + else: + all_text = caption_text + categorize_text = "" phrases = [] current_text = '' @@ -93,47 +99,50 @@ def main(): draw(img, re_result, current_text) current_text = '' - output = model.vqa(image, "Locate all objects in the image .") - token = '' - ref_tokens = [] - text = output["text"].replace("<"," <") - - for tok in text.split(" "): - if len(tok)>10 and tok.startswith("10 and tok.startswith("