Skip to content

Feature: Query Search Images in a Dataset #388

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions roboflow/core/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,91 @@ def search_all(

offset += limit

def query(
self,
query_str: Optional[str] = "",
page_size: int = 100,
fields: Optional[List[str]] = None,
continuation_token: Optional[str] = None,
):
"""
Query images in a project using a semantic search query string.

Args:
query_str (str, optional): Search query string, e.g. 'filename:"example.jpg"' or 'project:foo night'.
page_size (int, optional): Number of results to return per page (default is 100).
fields (list, optional): Fields to return in results.
Defaults to ["tags", "width", "height", "filename", "aspectRatio", "split"].
continuation_token (str, optional): Token to continue fetching next results.

Returns:
tuple: A tuple containing:
- list: A list of images that match the query criteria.
- str or None: A continuation token if more results are available.

Example:
>>> results, token = project.query(query_str='project:example', page_size=10)
"""
if fields is None:
fields = ["tags", "width", "height", "filename", "aspectRatio", "split"]

payload: Dict[str, Union[str, int, List[str]]] = {}

if query_str is not None:
payload["query"] = query_str

if page_size is not None:
payload["pageSize"] = page_size

if continuation_token is not None:
payload["continuationToken"] = continuation_token

payload["fields"] = fields

data = requests.post(
f"{API_URL}/{self.__workspace}/search/v1?api_key={self.__api_key}",
json=payload,
)

return data.json().get("results", []), data.json()["continuationToken"]

def query_all(
self,
query_str: Optional[str] = "",
page_size: int = 100,
fields: Optional[List[str]] = None,
):
"""
Create a paginated list of semantic search results for images in a project.

Args:
query_str (str): Search query string, e.g. 'filename:"example.jpg"' or 'project:foo night'.
page_size (int): Number of results to return per page (default: 100).
fields (list): Fields to return in results
(default: ["tags", "width", "height", "filename", "aspectRatio", "split"]).

Returns:
Generator that yields pages of images that match the query criteria.

Example:
>>> results = project.query_all(query_str="filename:image.png")
>>> for result in results:
>>> print(result)
""" # noqa: E501 // docs

continuation_token = None

while True:
data, continuation_token_temp = self.query(
query_str=query_str, page_size=page_size, fields=fields, continuation_token=continuation_token
)

yield data
continuation_token = continuation_token_temp

if len(data) < page_size:
break

def __str__(self):
"""
Show a string representation of a Project object.
Expand Down