fix support multi-subfolder downloads for Z-Image Qwen3 encoder (#8692)

Pfannkuchensack · lstein · web-flow · commit 21138e5d52a2 · 2025-12-23T23:39:43.000-05:00
* fix(model-install): support multi-subfolder downloads for Z-Image Qwen3 encoder

The Z-Image Qwen3 text encoder requires both text_encoder and tokenizer
subfolders from the HuggingFace repo, but the previous implementation
only downloaded the text_encoder subfolder, causing model identification
to fail.

Changes:
- Add subfolders property to HFModelSource supporting '+' separated paths
- Extend filter_files() and download_urls() to handle multiple subfolders
- Update _multifile_download() to preserve subfolder structure
- Make Qwen3Encoder probe check both nested and direct config.json paths
- Update Qwen3EncoderLoader to handle both directory structures
- Change starter model source to text_encoder+tokenizer

* ruff format

* fix schema description

* fix schema description

---------

Co-authored-by: Lincoln Stein &lt;lincoln.stein@gmail.com&gt;
diff --git a/invokeai/app/services/model_install/model_install_common.py b/invokeai/app/services/model_install/model_install_common.py
@@ -85,9 +85,12 @@ def __str__(self) -> str:
 
 class HFModelSource(StringLikeSource):
     """
-    A HuggingFace repo_id with optional variant, sub-folder and access token.
+    A HuggingFace repo_id with optional variant, sub-folder(s) and access token.
     Note that the variant option, if not provided to the constructor, will default to fp16, which is
     what people (almost) always want.
+
+    The subfolder can be a single path or multiple paths joined by '+' (e.g., "text_encoder+tokenizer").
+    When multiple subfolders are specified, all of them will be downloaded and combined into the model directory.
     """
 
     repo_id: str
@@ -103,6 +106,16 @@ def proper_repo_id(cls, v: str) -> str:  # noqa D102
             raise ValueError(f"{v}: invalid repo_id format")
         return v
 
+    @property
+    def subfolders(self) -> list[Path]:
+        """Return list of subfolders (supports '+' separated multiple subfolders)."""
+        if self.subfolder is None:
+            return []
+        subfolder_str = self.subfolder.as_posix()
+        if "+" in subfolder_str:
+            return [Path(s.strip()) for s in subfolder_str.split("+")]
+        return [self.subfolder]
+
     def __str__(self) -> str:
         """Return string version of repoid when string rep needed."""
         base: str = self.repo_id
diff --git a/invokeai/app/services/model_install/model_install_default.py b/invokeai/app/services/model_install/model_install_default.py
@@ -417,10 +417,15 @@ def download_and_cache_model(
         model_path.mkdir(parents=True, exist_ok=True)
         model_source = self._guess_source(str(source))
         remote_files, _ = self._remote_files_from_source(model_source)
+        # Handle multiple subfolders for HFModelSource
+        subfolders = model_source.subfolders if isinstance(model_source, HFModelSource) else []
         job = self._multifile_download(
             dest=model_path,
             remote_files=remote_files,
-            subfolder=model_source.subfolder if isinstance(model_source, HFModelSource) else None,
+            subfolder=model_source.subfolder
+            if isinstance(model_source, HFModelSource) and len(subfolders) <= 1
+            else None,
+            subfolders=subfolders if len(subfolders) > 1 else None,
         )
         files_string = "file" if len(remote_files) == 1 else "files"
         self._logger.info(f"Queuing model download: {source} ({len(remote_files)} {files_string})")
@@ -438,10 +443,13 @@ def _remote_files_from_source(
         if isinstance(source, HFModelSource):
             metadata = HuggingFaceMetadataFetch(self._session).from_id(source.repo_id, source.variant)
             assert isinstance(metadata, ModelMetadataWithFiles)
+            # Use subfolders property which handles '+' separated multiple subfolders
+            subfolders = source.subfolders
             return (
                 metadata.download_urls(
                     variant=source.variant or self._guess_variant(),
-                    subfolder=source.subfolder,
+                    subfolder=source.subfolder if len(subfolders) <= 1 else None,
+                    subfolders=subfolders if len(subfolders) > 1 else None,
                     session=self._session,
                 ),
                 metadata,
@@ -741,10 +749,13 @@ def _import_remote_model(
         install_job._install_tmpdir = destdir
         install_job.total_bytes = sum((x.size or 0) for x in remote_files)
 
+        # Handle multiple subfolders for HFModelSource
+        subfolders = source.subfolders if isinstance(source, HFModelSource) else []
         multifile_job = self._multifile_download(
             remote_files=remote_files,
             dest=destdir,
-            subfolder=source.subfolder if isinstance(source, HFModelSource) else None,
+            subfolder=source.subfolder if isinstance(source, HFModelSource) and len(subfolders) <= 1 else None,
+            subfolders=subfolders if len(subfolders) > 1 else None,
             access_token=source.access_token,
             submit_job=False,  # Important! Don't submit the job until we have set our _download_cache dict
         )
@@ -771,31 +782,69 @@ def _multifile_download(
         remote_files: List[RemoteModelFile],
         dest: Path,
         subfolder: Optional[Path] = None,
+        subfolders: Optional[List[Path]] = None,
         access_token: Optional[str] = None,
         submit_job: bool = True,
     ) -> MultiFileDownloadJob:
         # HuggingFace repo subfolders are a little tricky. If the name of the model is "sdxl-turbo", and
         # we are installing the "vae" subfolder, we do not want to create an additional folder level, such
         # as "sdxl-turbo/vae", nor do we want to put the contents of the vae folder directly into "sdxl-turbo".
         # So what we do is to synthesize a folder named "sdxl-turbo_vae" here.
-        if subfolder:
+        #
+        # For multiple subfolders (e.g., text_encoder+tokenizer), we create a combined folder name
+        # (e.g., sdxl-turbo_text_encoder_tokenizer) and keep each subfolder's contents in its own
+        # subdirectory within the model folder.
+
+        if subfolders and len(subfolders) > 1:
+            # Multiple subfolders: create combined name and keep subfolder structure
+            top = Path(remote_files[0].path.parts[0])  # e.g. "Z-Image-Turbo/"
+            subfolder_names = [sf.name.replace("/", "_").replace("\\", "_") for sf in subfolders]
+            combined_name = "_".join(subfolder_names)
+            path_to_add = Path(f"{top}_{combined_name}")
+
+            parts: List[RemoteModelFile] = []
+            for model_file in remote_files:
+                assert model_file.size is not None
+                # Determine which subfolder this file belongs to
+                file_path = model_file.path
+                new_path: Optional[Path] = None
+                for sf in subfolders:
+                    try:
+                        # Try to get relative path from this subfolder
+                        relative = file_path.relative_to(top / sf)
+                        # Keep the subfolder name as a subdirectory
+                        new_path = path_to_add / sf.name / relative
+                        break
+                    except ValueError:
+                        continue
+
+                if new_path is None:
+                    # File doesn't match any subfolder, keep original path structure
+                    new_path = path_to_add / file_path.relative_to(top)
+
+                parts.append(RemoteModelFile(url=model_file.url, path=new_path))
+        elif subfolder:
+            # Single subfolder: flatten into renamed folder
             top = Path(remote_files[0].path.parts[0])  # e.g. "sdxl-turbo/"
             path_to_remove = top / subfolder  # sdxl-turbo/vae/
             subfolder_rename = subfolder.name.replace("/", "_").replace("\\", "_")
             path_to_add = Path(f"{top}_{subfolder_rename}")
-        else:
-            path_to_remove = Path(".")
-            path_to_add = Path(".")
-
-        parts: List[RemoteModelFile] = []
-        for model_file in remote_files:
-            assert model_file.size is not None
-            parts.append(
-                RemoteModelFile(
-                    url=model_file.url,  # if a subfolder, then sdxl-turbo_vae/config.json
-                    path=path_to_add / model_file.path.relative_to(path_to_remove),
+
+            parts = []
+            for model_file in remote_files:
+                assert model_file.size is not None
+                parts.append(
+                    RemoteModelFile(
+                        url=model_file.url,
+                        path=path_to_add / model_file.path.relative_to(path_to_remove),
+                    )
                 )
-            )
+        else:
+            # No subfolder specified - pass through unchanged
+            parts = []
+            for model_file in remote_files:
+                assert model_file.size is not None
+                parts.append(RemoteModelFile(url=model_file.url, path=model_file.path))
 
         return self._download_queue.multifile_download(
             parts=parts,
diff --git a/invokeai/backend/model_manager/configs/qwen3_encoder.py b/invokeai/backend/model_manager/configs/qwen3_encoder.py
@@ -94,8 +94,23 @@ def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -
 
         raise_for_override_fields(cls, override_fields)
 
-        # Check for text_encoder config
-        expected_config_path = mod.path / "text_encoder" / "config.json"
+        # Check for text_encoder config - support both:
+        # 1. Full model structure: model_root/text_encoder/config.json
+        # 2. Standalone text_encoder download: model_root/config.json (when text_encoder subfolder is downloaded separately)
+        config_path_nested = mod.path / "text_encoder" / "config.json"
+        config_path_direct = mod.path / "config.json"
+
+        if config_path_nested.exists():
+            expected_config_path = config_path_nested
+        elif config_path_direct.exists():
+            expected_config_path = config_path_direct
+        else:
+            from invokeai.backend.model_manager.configs.identification_utils import NotAMatchError
+
+            raise NotAMatchError(
+                f"unable to load config file(s): {{PosixPath('{config_path_nested}'): 'file does not exist'}}"
+            )
+
         # Qwen3 uses Qwen2VLForConditionalGeneration or similar
         raise_for_class_name(
             expected_config_path,
diff --git a/invokeai/backend/model_manager/load/model_loaders/z_image.py b/invokeai/backend/model_manager/load/model_loaders/z_image.py
@@ -367,15 +367,30 @@ def _load_model(
         if not isinstance(config, Qwen3Encoder_Qwen3Encoder_Config):
             raise ValueError("Only Qwen3Encoder_Qwen3Encoder_Config models are supported here.")
 
+        model_path = Path(config.path)
+
+        # Support both structures:
+        # 1. Full model: model_root/text_encoder/ and model_root/tokenizer/
+        # 2. Standalone download: model_root/ contains text_encoder files directly
+        text_encoder_path = model_path / "text_encoder"
+        tokenizer_path = model_path / "tokenizer"
+
+        # Check if this is a standalone text_encoder download (no nested text_encoder folder)
+        is_standalone = not text_encoder_path.exists() and (model_path / "config.json").exists()
+
+        if is_standalone:
+            text_encoder_path = model_path
+            tokenizer_path = model_path  # Tokenizer files should also be in root
+
         match submodel_type:
             case SubModelType.Tokenizer:
-                return AutoTokenizer.from_pretrained(Path(config.path) / "tokenizer")
+                return AutoTokenizer.from_pretrained(tokenizer_path)
             case SubModelType.TextEncoder:
                 # Determine safe dtype based on target device capabilities
                 target_device = TorchDevice.choose_torch_device()
                 model_dtype = TorchDevice.choose_bfloat16_safe_dtype(target_device)
                 return Qwen3ForCausalLM.from_pretrained(
-                    Path(config.path) / "text_encoder",
+                    text_encoder_path,
                     torch_dtype=model_dtype,
                     low_cpu_mem_usage=True,
                 )
diff --git a/invokeai/backend/model_manager/metadata/metadata_base.py b/invokeai/backend/model_manager/metadata/metadata_base.py
@@ -95,13 +95,15 @@ def download_urls(
         self,
         variant: Optional[ModelRepoVariant] = None,
         subfolder: Optional[Path] = None,
+        subfolders: Optional[List[Path]] = None,
         session: Optional[Session] = None,
     ) -> List[RemoteModelFile]:
         """
-        Return list of downloadable files, filtering by variant and subfolder, if any.
+        Return list of downloadable files, filtering by variant and subfolder(s), if any.
 
         :param variant: Return model files needed to reconstruct the indicated variant
-        :param subfolder: Return model files from the designated subfolder only
+        :param subfolder: Return model files from the designated subfolder only (deprecated, use subfolders)
+        :param subfolders: Return model files from the designated subfolders
         :param session: A request.Session object used for internet-free testing
 
         Note that there is special variant-filtering behavior here:
@@ -111,10 +113,15 @@ def download_urls(
         session = session or Session()
         configure_http_backend(backend_factory=lambda: session)  # used in testing
 
-        paths = filter_files([x.path for x in self.files], variant, subfolder)  #  all files in the model
-        prefix = f"{subfolder}/" if subfolder else ""
+        paths = filter_files([x.path for x in self.files], variant, subfolder, subfolders)  #  all files in the model
+
+        # Determine prefix for model_index.json check - only applies for single subfolder
+        prefix = ""
+        if subfolder and not subfolders:
+            prefix = f"{subfolder}/"
+
         # the next step reads model_index.json to determine which subdirectories belong
-        # to the model
+        # to the model (only for single subfolder case)
         if Path(f"{prefix}model_index.json") in paths:
             url = hf_hub_url(self.id, filename="model_index.json", subfolder=str(subfolder) if subfolder else None)
             resp = session.get(url)
diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py
@@ -694,8 +694,8 @@ class StarterModelBundle(BaseModel):
 z_image_qwen3_encoder = StarterModel(
     name="Z-Image Qwen3 Text Encoder",
     base=BaseModelType.Any,
-    source="Tongyi-MAI/Z-Image-Turbo::text_encoder",
-    description="Qwen3 4B text encoder for Z-Image (full precision). ~8GB",
+    source="Tongyi-MAI/Z-Image-Turbo::text_encoder+tokenizer",
+    description="Qwen3 4B text encoder with tokenizer for Z-Image (full precision). ~8GB",
     type=ModelType.Qwen3Encoder,
 )
 
diff --git a/invokeai/backend/model_manager/util/select_hf_files.py b/invokeai/backend/model_manager/util/select_hf_files.py
@@ -24,24 +24,39 @@ def filter_files(
     files: List[Path],
     variant: Optional[ModelRepoVariant] = None,
     subfolder: Optional[Path] = None,
+    subfolders: Optional[List[Path]] = None,
 ) -> List[Path]:
     """
     Take a list of files in a HuggingFace repo root and return paths to files needed to load the model.
 
     :param files: List of files relative to the repo root.
-    :param subfolder: Filter by the indicated subfolder.
+    :param subfolder: Filter by the indicated subfolder (deprecated, use subfolders instead).
+    :param subfolders: Filter by multiple subfolders. Files from any of these subfolders will be included.
     :param variant: Filter by files belonging to a particular variant, such as fp16.
 
     The file list can be obtained from the `files` field of HuggingFaceMetadata,
     as defined in `invokeai.backend.model_manager.metadata.metadata_base`.
     """
     variant = variant or ModelRepoVariant.Default
     paths: List[Path] = []
-    root = files[0].parts[0]
+
+    if not files:
+        return []
+
+    root = files[0].parts[0] if files[0].parts else Path(".")
+
+    # Build list of subfolders to filter by
+    filter_subfolders: List[Path] = []
+    if subfolders:
+        filter_subfolders = subfolders
+    elif subfolder:
+        filter_subfolders = [subfolder]
 
     # if the subfolder is a single file, then bypass the selection and just return it
-    if subfolder and subfolder.suffix in [".safetensors", ".bin", ".onnx", ".xml", ".pth", ".pt", ".ckpt", ".msgpack"]:
-        return [root / subfolder]
+    if len(filter_subfolders) == 1:
+        sf = filter_subfolders[0]
+        if sf.suffix in [".safetensors", ".bin", ".onnx", ".xml", ".pth", ".pt", ".ckpt", ".msgpack"]:
+            return [root / sf]
 
     # Start by filtering on model file extensions, discarding images, docs, etc
     for file in files:
@@ -66,10 +81,10 @@ def filter_files(
         elif re.search(r"model.*\.(safetensors|bin|onnx|xml|pth|pt|ckpt|msgpack)$", file.name):
             paths.append(file)
 
-    # limit search to subfolder if requested
-    if subfolder:
-        subfolder = root / subfolder
-        paths = [x for x in paths if Path(subfolder) in x.parents]
+    # limit search to subfolder(s) if requested
+    if filter_subfolders:
+        absolute_subfolders = [root / sf for sf in filter_subfolders]
+        paths = [x for x in paths if any(Path(sf) in x.parents for sf in absolute_subfolders)]
 
     # _filter_by_variant uniquifies the paths and returns a set
     return sorted(_filter_by_variant(paths, variant))
diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts
@@ -9557,9 +9557,12 @@ export type components = {
         };
         /**
          * HFModelSource
-         * @description A HuggingFace repo_id with optional variant, sub-folder and access token.
+         * @description A HuggingFace repo_id with optional variant, sub-folder(s) and access token.
          *     Note that the variant option, if not provided to the constructor, will default to fp16, which is
          *     what people (almost) always want.
+         *
+         *     The subfolder can be a single path or multiple paths joined by '+' (e.g., "text_encoder+tokenizer").
+         *     When multiple subfolders are specified, all of them will be downloaded and combined into the model directory.
          */
         HFModelSource: {
             /** Repo Id */