Skip to content

Commit 62923fe

Browse files
committed
fixup! refactor: removed outlines from vllm/hf backend, use latest vllm, V1 API, use llguidance
1 parent 1987115 commit 62923fe

File tree

1 file changed

+6
-15
lines changed

1 file changed

+6
-15
lines changed

mellea/backends/huggingface.py

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -653,24 +653,15 @@ async def _generate_from_context_with_kv_cache(
653653

654654
format_kwargs = {}
655655
if _format:
656-
# outlines.generate.json always parses the resulting json into a python dict.
657-
# We however want to keep it as a json string for later storing it in ModelOutputThunk
658656
schema: dict[str, Any] = _format.model_json_schema()
659-
schema_json: str = json.dumps(schema)
660-
regex_str: str = outlines_core.fsm.json_schema.build_regex_from_schema( # type: ignore
661-
schema_json
657+
grammar: str = llguidance.LLMatcher.grammar_from_json_schema(
658+
schema, defaults={"whitespace_flexible": False}
659+
)
660+
logits_processor = _GuidanceLogitsProcessor(
661+
grammar, self._llguidance_tokenizer
662662
)
663-
664-
from outlines.models.transformers import TransformerTokenizer
665-
from outlines.processors.structured import RegexLogitsProcessor
666-
from transformers import LogitsProcessorList
667-
668663
format_kwargs["logits_processor"] = LogitsProcessorList(
669-
[
670-
RegexLogitsProcessor(
671-
regex_str, tokenizer=TransformerTokenizer(self._tokenizer)
672-
)
673-
]
664+
[logits_processor]
674665
)
675666

676667
streaming_kwargs = {}

0 commit comments

Comments
 (0)