@@ -32,44 +32,6 @@ class AppSettings(BaseModel):
3232 model_config = ConfigDict (protected_namespaces = (), extra = "forbid" )
3333
3434
35- # class AppSettings(BaseModel):
36- # hf_model_name: str = Field(
37- # description="The model to use when constructing the LLM Chat client. This should match the model name running on the vLLM backend",
38- # )
39- # backend_url: HttpUrl = Field(
40- # description="The address of the OpenAI compatible API server (either in-cluster or externally hosted)"
41- # )
42- # page_title: str = Field(default="Large Language Model")
43- # page_description: Optional[str] = Field(default=None)
44- # hf_model_instruction: str = Field(
45- # default="You are a helpful and cheerful AI assistant. Please respond appropriately."
46- # )
47-
48- # # Model settings
49-
50- # # For available parameters, see https://docs.vllm.ai/en/latest/dev/sampling_params.html
51- # # which is based on https://platform.openai.com/docs/api-reference/completions/create
52- # llm_max_tokens: int = Field(default=500)
53- # llm_temperature: float = Field(default=0)
54- # llm_top_p: float = Field(default=1)
55- # llm_top_k: float = Field(default=-1)
56- # llm_presence_penalty: float = Field(default=0, ge=-2, le=2)
57- # llm_frequency_penalty: float = Field(default=0, ge=-2, le=2)
58-
59- # # UI theming
60-
61- # # Variables explicitly passed to gradio.theme.Default()
62- # # For example:
63- # # {"primary_hue": "red"}
64- # theme_params: dict[str, Union[str, List[str]]] = Field(default_factory=dict)
65- # # Overrides for theme.body_background_fill property
66- # theme_background_colour: Optional[str] = Field(default=None)
67- # # Provides arbitrary CSS and JS overrides to the UI,
68- # # see https://www.gradio.app/guides/custom-CSS-and-JS
69- # css_overrides: Optional[str] = Field(default=None)
70- # custom_javascript: Optional[str] = Field(default=None)
71-
72-
7335settings = AppSettings (** load_settings ())
7436logger .info (settings )
7537
@@ -102,7 +64,6 @@ class PossibleSystemPromptException(Exception):
10264 },
10365 streaming = True ,
10466)
105- logger .info (llm )
10667
10768
10869def inference (latest_message , history ):
@@ -176,7 +137,6 @@ def inference(latest_message, history):
176137# UI theming
177138theme = gr .themes .Default (** settings .theme_params )
178139theme .set (** settings .theme_params_extended )
179- # theme.set(text)
180140
181141
182142def inference_wrapper (* args ):
@@ -221,44 +181,5 @@ def inference_wrapper(*args):
221181)
222182logger .debug ("Gradio chat interface config: %s" , app .config )
223183app .queue (
224- # Allow 10 concurrent requests to backend
225- # vLLM backend should be clever enough to
226- # batch these requests appropriately.
227184 default_concurrency_limit = 10 ,
228185).launch (server_name = settings .host_address )
229-
230- # with gr.ChatInterface(
231- # inference_wrapper,
232- # chatbot=gr.Chatbot(
233- # # Height of conversation window in CSS units (string) or pixels (int)
234- # height="68vh",
235- # show_copy_button=True,
236- # ),
237- # textbox=gr.Textbox(
238- # placeholder="Ask me anything...",
239- # container=False,
240- # # Ratio of text box to submit button width
241- # scale=7,
242- # ),
243- # title=settings.page_title,
244- # description=settings.page_description,
245- # retry_btn="Retry",
246- # undo_btn="Undo",
247- # clear_btn="Clear",
248- # analytics_enabled=False,
249- # theme=theme,
250- # css=settings.css_overrides,
251- # js=settings.custom_javascript,
252- # ) as app:
253- # logger.debug("Gradio chat interface config: %s", app.config)
254- # # For running locally in tilt dev setup
255- # if len(sys.argv) > 2 and sys.argv[2] == "localhost":
256- # app.launch()
257- # # For running on cluster
258- # else:
259- # app.queue(
260- # # Allow 10 concurrent requests to backend
261- # # vLLM backend should be clever enough to
262- # # batch these requests appropriately.
263- # default_concurrency_limit=10,
264- # ).launch(server_name=settings.host_address)
0 commit comments