|
| 1 | +//! # Outlines_core |
| 2 | +//! |
| 3 | +//! `outlines_core` crate provides a convenient way to: |
| 4 | +//! |
| 5 | +//! - build regular expressions from JSON schemas |
| 6 | +//! |
| 7 | +//! - construct an [`index::Index`] object by combining a [`vocabulary::Vocabulary`] and regular |
| 8 | +//! expression to efficiently map tokens from a given `Vocabulary` to state transitions in a |
| 9 | +//! finite-state automation |
| 10 | +//! |
| 11 | +//! ## `json_schema` |
| 12 | +//! |
| 13 | +//! [`json_schema`] module provides interfaces to generate a regular expression based on a given JSON schema, depending on its type: |
| 14 | +//! - [`json_schema::regex_from_str`] |
| 15 | +//! - [`json_schema::regex_from_value`] |
| 16 | +//! |
| 17 | +//! Whitespace pattern could be customized, otherwise the default [`json_schema::WHITESPACE`] pattern is used. |
| 18 | +//! |
| 19 | +//! Note, that not all the features of JSON schema are supported for regex generation: [Supported Features](json_schema#supported-features) |
| 20 | +//! |
| 21 | +//! ## `Index` |
| 22 | +//! |
| 23 | +//! Once [`index::Index`] is built, it can be used to evaluate or validate token sequences. |
| 24 | +//! |
| 25 | +//! ### Complexity and construction cost |
| 26 | +//! |
| 27 | +//! `Index` can accommodate large vocabularies and complex regular expressions. However, its size **may** grow |
| 28 | +//! significantly with the complexity of the input, as well as time and computational resources. |
| 29 | +//! |
| 30 | +//! ## Python bindings |
| 31 | +//! |
| 32 | +//! Additionally, crate provides interfaces to integrate the crate's functionality with Python. |
| 33 | +//! |
| 34 | +//! ## Support |
| 35 | +//! |
| 36 | +//! `Outlines_core` is primarily used in structured text generation project [`outlines`](https://github.com/dottxt-ai/outlines), |
| 37 | +//! |
| 38 | +//! ## Example |
| 39 | +//! |
| 40 | +//! Basic example of how it all fits together. |
| 41 | +//! |
| 42 | +//! ```rust |
| 43 | +//! # use outlines_core::Error; |
| 44 | +//! use outlines_core::prelude::*; |
| 45 | +//! |
| 46 | +//! # fn main() -> Result<(), Error> { |
| 47 | +//! // Define a JSON schema |
| 48 | +//! let schema = r#"{ |
| 49 | +//! "type": "object", |
| 50 | +//! "properties": { |
| 51 | +//! "name": { "type": "string" }, |
| 52 | +//! "age": { "type": "integer" } |
| 53 | +//! }, |
| 54 | +//! "required": ["name", "age"] |
| 55 | +//! }"#; |
| 56 | +//! |
| 57 | +//! // Generate a regular expression from it |
| 58 | +//! let regex = json_schema::regex_from_str(&schema, None)?; |
| 59 | +//! println!("Generated regex: {}", regex); |
| 60 | +//! |
| 61 | +//! // Create `Vocabulary` from pretrained large language model (but manually is also possible) |
| 62 | +//! let vocabulary = Vocabulary::from_pretrained("openai-community/gpt2", None); |
| 63 | +//! |
| 64 | +//! // Create new `Index` from regex and a given `Vocabulary` |
| 65 | +//! let index = Index::new(regex, &vocabulary)?; |
| 66 | +//! |
| 67 | +//! let initial_state = index.initial_state(); |
| 68 | +//! println!("Is initial state {} a final state? {}", initial_state, index.is_final_state(&initial_state)); |
| 69 | +//! |
| 70 | +//! let allowed_tokens = index.allowed_tokens(&initial_state).expect("Some allowed tokens"); |
| 71 | +//! println!("Allowed tokens at initial state are {:?}", allowed_tokens); |
| 72 | +//! |
| 73 | +//! let token_id = allowed_tokens.first().expect("First token"); |
| 74 | +//! println!("Next state for the token_id {} is {:?}", token_id, index.next_state(&initial_state, token_id)); |
| 75 | +//! println!("Final states are {:?}", index.final_states()); |
| 76 | +//! println!("Index has exactly {} transitions", index.transitions().len()); |
| 77 | +//! # Ok(()) |
| 78 | +//! } |
| 79 | +//! ``` |
| 80 | +
|
1 | 81 | pub mod error; |
2 | 82 | pub mod index; |
3 | 83 | pub mod json_schema; |
|
0 commit comments