@@ -1875,4 +1875,107 @@ describe.skip("InferenceClient", () => {
18751875 } ,
18761876 TIMEOUT
18771877 ) ;
1878+ describe . concurrent (
1879+ "OVHcloud" ,
1880+ ( ) => {
1881+ const client = new HfInference ( env . HF_OVHCLOUD_KEY ?? "dummy" ) ;
1882+
1883+ HARDCODED_MODEL_INFERENCE_MAPPING [ "ovhcloud" ] = {
1884+ "meta-llama/llama-3.1-8b-instruct" : {
1885+ hfModelId : "meta-llama/llama-3.1-8b-instruct" ,
1886+ providerId : "Llama-3.1-8B-Instruct" ,
1887+ status : "live" ,
1888+ task : "conversational" ,
1889+ } ,
1890+ } ;
1891+
1892+ it ( "chatCompletion" , async ( ) => {
1893+ const res = await client . chatCompletion ( {
1894+ model : "meta-llama/llama-3.1-8b-instruct" ,
1895+ provider : "ovhcloud" ,
1896+ messages : [ { role : "user" , content : "A, B, C, " } ] ,
1897+ seed : 42 ,
1898+ temperature : 0 ,
1899+ top_p : 0.01 ,
1900+ max_tokens : 1 ,
1901+ } ) ;
1902+ expect ( res . choices && res . choices . length > 0 ) ;
1903+ const completion = res . choices [ 0 ] . message ?. content ;
1904+ expect ( completion ) . toContain ( "D" ) ;
1905+ } ) ;
1906+
1907+ it ( "chatCompletion stream" , async ( ) => {
1908+ const stream = client . chatCompletionStream ( {
1909+ model : "meta-llama/llama-3.1-8b-instruct" ,
1910+ provider : "ovhcloud" ,
1911+ messages : [ { role : "user" , content : "A, B, C, " } ] ,
1912+ stream : true ,
1913+ seed : 42 ,
1914+ temperature : 0 ,
1915+ top_p : 0.01 ,
1916+ max_tokens : 1 ,
1917+ } ) as AsyncGenerator < ChatCompletionStreamOutput > ;
1918+
1919+ let fullResponse = "" ;
1920+ for await ( const chunk of stream ) {
1921+ if ( chunk . choices && chunk . choices . length > 0 ) {
1922+ const content = chunk . choices [ 0 ] . delta ?. content ;
1923+ if ( content ) {
1924+ fullResponse += content ;
1925+ }
1926+ }
1927+ }
1928+
1929+ // Verify we got a meaningful response
1930+ expect ( fullResponse ) . toBeTruthy ( ) ;
1931+ expect ( fullResponse ) . toContain ( "D" ) ;
1932+ } ) ;
1933+
1934+ it ( "textGeneration" , async ( ) => {
1935+ const res = await client . textGeneration ( {
1936+ model : "meta-llama/llama-3.1-8b-instruct" ,
1937+ provider : "ovhcloud" ,
1938+ inputs : "A B C " ,
1939+ parameters : {
1940+ seed : 42 ,
1941+ temperature : 0 ,
1942+ top_p : 0.01 ,
1943+ max_new_tokens : 1 ,
1944+ } ,
1945+ } ) ;
1946+ expect ( res . generated_text . length > 0 ) ;
1947+ expect ( res . generated_text ) . toContain ( "D" ) ;
1948+ } ) ;
1949+
1950+ it ( "textGeneration stream" , async ( ) => {
1951+ const stream = client . textGenerationStream ( {
1952+ model : "meta-llama/llama-3.1-8b-instruct" ,
1953+ provider : "ovhcloud" ,
1954+ inputs : "A B C " ,
1955+ stream : true ,
1956+ parameters : {
1957+ seed : 42 ,
1958+ temperature : 0 ,
1959+ top_p : 0.01 ,
1960+ max_new_tokens : 1 ,
1961+ } ,
1962+ } ) as AsyncGenerator < ChatCompletionStreamOutput > ;
1963+
1964+ let fullResponse = "" ;
1965+ for await ( const chunk of stream ) {
1966+ if ( chunk . choices && chunk . choices . length > 0 ) {
1967+ const content = chunk . choices [ 0 ] . text ;
1968+ if ( content ) {
1969+ fullResponse += content ;
1970+ }
1971+ }
1972+ }
1973+
1974+ // Verify we got a meaningful response
1975+ expect ( fullResponse ) . toBeTruthy ( ) ;
1976+ expect ( fullResponse ) . toContain ( "D" ) ;
1977+ } ) ;
1978+ } ,
1979+ TIMEOUT
1980+ ) ;
18781981} ) ;
0 commit comments