NimbleBoxAI
diff --git a/‎README.md‎
Lines changed: 10 additions & 1 deletion b/‎README.md‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎requirements.txt‎
Lines changed: 1 addition & 0 deletions b/‎requirements.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎run.py‎
Lines changed: 51 additions & 45 deletions b/‎run.py‎
Lines changed: 51 additions & 45 deletions
diff --git a/‎sc.png‎
247 KB b/‎sc.png‎
247 KB
diff --git a/‎search_vis.py‎
Lines changed: 0 additions & 74 deletions b/‎search_vis.py‎
Lines changed: 0 additions & 74 deletions
diff --git a/‎usage.gif‎
-3.22 MB b/‎usage.gif‎
-3.22 MB
@@ -22,8 +22,17 @@ $ streamlit run run.py
   Local URL: http://localhost:8501
   Network URL: http://192.168.1.4:8501
 
+100%|███████████████████████████████████████| 256M/256M [02:10<00:00, 1.96MiB/s]
+100%|█████████████████████████████████████| 1.36M/1.36M [00:01<00:00, 1.31MiB/s]
+Embeddings path not found, upload images to create embeddings
 ```
 
 If everything goes correctly, it should automatically open up a browser with the network URL. On the left you will see the build apps, select the one that you want to use.
 
-<img src="./usage.gif">
+<img src="./sc.png">
+
+
+### Search in more than one way!
+
+Perform `image-text`, `text-image`, `image-image`, (`text-text` as well, but it's not good) similarity.
+
@@ -1,3 +1,4 @@
+nbox
 numpy
 Pillow
 ftfy
 
@@ -1,7 +1,5 @@
 import streamlit as st
 
-from clip.utils import get_images
-
 # this caches the output to store the output and not call this function again
 # and again preventing time wastage. `allow_output_mutation = True` tells the
 # function to not hash the output of this function and we can get away with it
@@ -10,59 +8,67 @@
 @st.cache(allow_output_mutation=True, show_spinner=False)
 def get_cross_modal_search_models():
   from clip.clip import CLIP
-  return {
-    'CLIP': CLIP()
-  }
+  return CLIP()
 
 # load all the models before the app starts
-with st.spinner('Downloading and Loading Model with Vocabulary...'):
-  MODELS = get_cross_modal_search_models()
+with st.spinner('Loading Model with Vocabulary ... (might take sometime)'):
+  model = get_cross_modal_search_models()
+
+st.write(f'''
+# Image Searching App
 
-st.write('''
-# NL-Images
-CLIP is used to perform Cross Modal Search:
-- CLIP: CLIP (Contrastive Language-Image Pre-Training) is a neural network that
-consists of a image encoder and a text encoder. It predicts the similarity between
-the given images and textual descriptions.
+Find images using text and yes, there's an easter egg.
 ''')
 
-model_name = st.sidebar.selectbox(
-  'Please select your app',
-  ["CLIP"]
+app_mode = st.sidebar.selectbox(
+  'Please select tasks',
+  ["Text Search", "Image Search", "Text to Text Similarity"]
 )
 
-if model_name != "CLIP":
-  st.write("Use `CLIP` model!")
-  model = MODELS['CLIP']
+st.write('''Upload more images to cache, if you want to add more!''')
+images = st.file_uploader("Images", accept_multiple_files=True, type=['png', 'jpg', 'jpeg'])
+
+if st.button("Upload") and len(images):
+  out = model.upload_images(images)
+  st.write(out)
+  st.write(f'''{model.n_images}''')
 
-if model_name == "CLIP":
-  st.write("### `CLIP` Model")
-  st.write("Please upload images and write text of your choice")
-  st.write("Note: Write each description in a new line")
-  model = MODELS['CLIP']
+# slider to select the number of images to display
+n_images = st.slider('Number of images to see', min_value=1, max_value = model.n_images)
 
-images = st.file_uploader("Images", accept_multiple_files=True, type=['png', 'jpg'])
+if app_mode == "Image Search":
+  st.write('''### Image Search''')
+  st.write(f"Upload any image for similarity search. Searching {n_images} images!")
+  image = st.file_uploader("Images", accept_multiple_files=False, type=['png', 'jpg', 'jpeg'])
+  if st.button("Process") and image:
+    out = model.visual_search(image, n_images)
+    for x in out:
+      st.image(x)
 
-if len(images) != 0:
-  images, image_grid = get_images(images)
-  st.image(image_grid)
+elif app_mode == "Text Search":
+  st.write('''### Text Search''')
+  text = st.text_input(f"Add the text to search. Searching {n_images} images!")
+  if st.button("Process") and text:
+    out = model.text_search(text, n_images)
+    for x in out:
+      st.image(x)
 
-default_ = "a person stuck in traffic\na apple on the table\na garden of sunflowers"
-text = st.text_area("Text", value=default_, key="Text")
-text = text.splitlines()
+elif app_mode == "Text to Text Similarity":
+  st.write('''### Text to Text Similarity
+  
+This requires two different inputs, first is the memory against which to check
+the second input query.''')
 
-# `transpose_flag` tells against which input, should softmax be calculated
-# ie. if transpose_flag = False -> sum(text[i]) == 1 but sum(images[i]) != 1
-# ie. if transpose_flag = True  -> sum(text[i]) != 1 but sum(images[i]) == 1
-transpose_flag = st.radio('Priority', ['Image', 'Text'])
-if len(images) == 1:
-  transpose_flag = True
-elif len(text) == 1:
-  transpose_flag = False
-else:
-  transpose_flag = True if transpose_flag == 'Image' else False
+  default_ = '''How can I sample from the EMNIST letters dataset?
+Simple, efficient way to create Dataset?
+How to use multiple models to perform inference on same data in parallel?
+Get target list from Dataset
+Sparse dataset and dataloader
+Element-Wise Max Between Two Tensors?'''
+  memory = st.text_area("Memory", value=default_)
+  query = st.text_input("Query", value="Can I run mulitple models in parallel?")
+  matches = model.text_to_text_similarity(memory.split("\n"), query)
 
-if st.button("Predict"):
-  with st.spinner('Predicting...'):
-    output = model.eval(images, text, transpose_flag)
-  st.write(output)
+  if st.button("Process"):
+    st.write("**Query**: " + query)
+    st.write("\n".join([f"- {m}" for m in matches]))
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+nbox`
`1`	`2`	`numpy`
`2`	`3`	`Pillow`
`3`	`4`	`ftfy`