Skip to content

Commit 8469983

Browse files
authored
Add Contract Tests for LLM Attributes and Models (#952)
*Description of changes:* Add new contract tests for Gen AI attributes and models. *Test Plan:* <img width="1511" alt="contract-tests-pr" src="https://github.com/user-attachments/assets/78c861af-a2f4-48e6-96e7-5463b16ec69b"> By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent b58b94f commit 8469983

File tree

8 files changed

+1750
-2048
lines changed

8 files changed

+1750
-2048
lines changed

.github/patches/opentelemetry-java-instrumentation.patch

Lines changed: 917 additions & 2030 deletions
Large diffs are not rendered by default.

appsignals-tests/contract-tests/src/test/java/software/amazon/opentelemetry/appsignals/test/awssdk/base/AwsSdkBaseTest.java

Lines changed: 369 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1798,8 +1798,8 @@ protected void doTestBedrockAgentDataSourceId() {
17981798
0.0);
17991799
}
18001800

1801-
protected void doTestBedrockRuntimeModelId() {
1802-
var response = appClient.get("/bedrockruntime/invokeModel").aggregate().join();
1801+
protected void doTestBedrockRuntimeAi21Jamba() {
1802+
var response = appClient.get("/bedrockruntime/invokeModel/ai21Jamba").aggregate().join();
18031803
var traces = mockCollectorClient.getTraces();
18041804
var metrics =
18051805
mockCollectorClient.getMetrics(
@@ -1809,9 +1809,9 @@ protected void doTestBedrockRuntimeModelId() {
18091809
AppSignalsConstants.LATENCY_METRIC));
18101810

18111811
var localService = getApplicationOtelServiceName();
1812-
var localOperation = "GET /bedrockruntime/invokeModel";
1812+
var localOperation = "GET /bedrockruntime/invokeModel/ai21Jamba";
18131813
String type = "AWS::Bedrock::Model";
1814-
String identifier = "anthropic.claude-v2";
1814+
String identifier = "ai21.jamba-1-5-mini-v1:0";
18151815
assertSpanClientAttributes(
18161816
traces,
18171817
bedrockRuntimeSpanName("InvokeModel"),
@@ -1828,7 +1828,371 @@ protected void doTestBedrockRuntimeModelId() {
18281828
200,
18291829
List.of(
18301830
assertAttribute(
1831-
SemanticConventionsConstants.GEN_AI_REQUEST_MODEL, "anthropic.claude-v2")));
1831+
SemanticConventionsConstants.GEN_AI_REQUEST_MODEL, "ai21.jamba-1-5-mini-v1:0"),
1832+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TEMPERATURE, "0.7"),
1833+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TOP_P, "0.8"),
1834+
assertAttribute(SemanticConventionsConstants.GEN_AI_RESPONSE_FINISH_REASONS, "[stop]"),
1835+
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_INPUT_TOKENS, "5"),
1836+
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_OUTPUT_TOKENS, "42")));
1837+
assertMetricClientAttributes(
1838+
metrics,
1839+
AppSignalsConstants.LATENCY_METRIC,
1840+
localService,
1841+
localOperation,
1842+
getBedrockRuntimeServiceName(),
1843+
"InvokeModel",
1844+
type,
1845+
identifier,
1846+
5000.0);
1847+
assertMetricClientAttributes(
1848+
metrics,
1849+
AppSignalsConstants.FAULT_METRIC,
1850+
localService,
1851+
localOperation,
1852+
getBedrockRuntimeServiceName(),
1853+
"InvokeModel",
1854+
type,
1855+
identifier,
1856+
0.0);
1857+
assertMetricClientAttributes(
1858+
metrics,
1859+
AppSignalsConstants.ERROR_METRIC,
1860+
localService,
1861+
localOperation,
1862+
getBedrockRuntimeServiceName(),
1863+
"InvokeModel",
1864+
type,
1865+
identifier,
1866+
0.0);
1867+
}
1868+
1869+
protected void doTestBedrockRuntimeAmazonTitan() {
1870+
var response = appClient.get("/bedrockruntime/invokeModel/amazonTitan").aggregate().join();
1871+
var traces = mockCollectorClient.getTraces();
1872+
var metrics =
1873+
mockCollectorClient.getMetrics(
1874+
Set.of(
1875+
AppSignalsConstants.ERROR_METRIC,
1876+
AppSignalsConstants.FAULT_METRIC,
1877+
AppSignalsConstants.LATENCY_METRIC));
1878+
1879+
var localService = getApplicationOtelServiceName();
1880+
var localOperation = "GET /bedrockruntime/invokeModel/amazonTitan";
1881+
String type = "AWS::Bedrock::Model";
1882+
String identifier = "amazon.titan-text-premier-v1:0";
1883+
assertSpanClientAttributes(
1884+
traces,
1885+
bedrockRuntimeSpanName("InvokeModel"),
1886+
getBedrockRuntimeRpcServiceName(),
1887+
localService,
1888+
localOperation,
1889+
getBedrockRuntimeServiceName(),
1890+
"InvokeModel",
1891+
type,
1892+
identifier,
1893+
"bedrock.test",
1894+
8080,
1895+
"http://bedrock.test:8080",
1896+
200,
1897+
List.of(
1898+
assertAttribute(
1899+
SemanticConventionsConstants.GEN_AI_REQUEST_MODEL,
1900+
"amazon.titan-text-premier-v1:0"),
1901+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_MAX_TOKENS, "100"),
1902+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TEMPERATURE, "0.7"),
1903+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TOP_P, "0.9"),
1904+
assertAttribute(
1905+
SemanticConventionsConstants.GEN_AI_RESPONSE_FINISH_REASONS, "[FINISHED]"),
1906+
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_INPUT_TOKENS, "10"),
1907+
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_OUTPUT_TOKENS, "15")));
1908+
assertMetricClientAttributes(
1909+
metrics,
1910+
AppSignalsConstants.LATENCY_METRIC,
1911+
localService,
1912+
localOperation,
1913+
getBedrockRuntimeServiceName(),
1914+
"InvokeModel",
1915+
type,
1916+
identifier,
1917+
5000.0);
1918+
assertMetricClientAttributes(
1919+
metrics,
1920+
AppSignalsConstants.FAULT_METRIC,
1921+
localService,
1922+
localOperation,
1923+
getBedrockRuntimeServiceName(),
1924+
"InvokeModel",
1925+
type,
1926+
identifier,
1927+
0.0);
1928+
assertMetricClientAttributes(
1929+
metrics,
1930+
AppSignalsConstants.ERROR_METRIC,
1931+
localService,
1932+
localOperation,
1933+
getBedrockRuntimeServiceName(),
1934+
"InvokeModel",
1935+
type,
1936+
identifier,
1937+
0.0);
1938+
}
1939+
1940+
protected void doTestBedrockRuntimeAnthropicClaude() {
1941+
var response = appClient.get("/bedrockruntime/invokeModel/anthropicClaude").aggregate().join();
1942+
1943+
var traces = mockCollectorClient.getTraces();
1944+
var metrics =
1945+
mockCollectorClient.getMetrics(
1946+
Set.of(
1947+
AppSignalsConstants.ERROR_METRIC,
1948+
AppSignalsConstants.FAULT_METRIC,
1949+
AppSignalsConstants.LATENCY_METRIC));
1950+
1951+
var localService = getApplicationOtelServiceName();
1952+
var localOperation = "GET /bedrockruntime/invokeModel/anthropicClaude";
1953+
String type = "AWS::Bedrock::Model";
1954+
String identifier = "anthropic.claude-3-haiku-20240307-v1:0";
1955+
1956+
assertSpanClientAttributes(
1957+
traces,
1958+
bedrockRuntimeSpanName("InvokeModel"),
1959+
getBedrockRuntimeRpcServiceName(),
1960+
localService,
1961+
localOperation,
1962+
getBedrockRuntimeServiceName(),
1963+
"InvokeModel",
1964+
type,
1965+
identifier,
1966+
"bedrock.test",
1967+
8080,
1968+
"http://bedrock.test:8080",
1969+
200,
1970+
List.of(
1971+
assertAttribute(
1972+
SemanticConventionsConstants.GEN_AI_REQUEST_MODEL,
1973+
"anthropic.claude-3-haiku-20240307-v1:0"),
1974+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_MAX_TOKENS, "512"),
1975+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TEMPERATURE, "0.6"),
1976+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TOP_P, "0.53"),
1977+
assertAttribute(
1978+
SemanticConventionsConstants.GEN_AI_RESPONSE_FINISH_REASONS, "[end_turn]"),
1979+
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_INPUT_TOKENS, "2095"),
1980+
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_OUTPUT_TOKENS, "503")));
1981+
assertMetricClientAttributes(
1982+
metrics,
1983+
AppSignalsConstants.LATENCY_METRIC,
1984+
localService,
1985+
localOperation,
1986+
getBedrockRuntimeServiceName(),
1987+
"InvokeModel",
1988+
type,
1989+
identifier,
1990+
5000.0);
1991+
assertMetricClientAttributes(
1992+
metrics,
1993+
AppSignalsConstants.FAULT_METRIC,
1994+
localService,
1995+
localOperation,
1996+
getBedrockRuntimeServiceName(),
1997+
"InvokeModel",
1998+
type,
1999+
identifier,
2000+
0.0);
2001+
assertMetricClientAttributes(
2002+
metrics,
2003+
AppSignalsConstants.ERROR_METRIC,
2004+
localService,
2005+
localOperation,
2006+
getBedrockRuntimeServiceName(),
2007+
"InvokeModel",
2008+
type,
2009+
identifier,
2010+
0.0);
2011+
}
2012+
2013+
protected void doTestBedrockRuntimeCohereCommandR() {
2014+
var response = appClient.get("/bedrockruntime/invokeModel/cohereCommandR").aggregate().join();
2015+
2016+
var traces = mockCollectorClient.getTraces();
2017+
var metrics =
2018+
mockCollectorClient.getMetrics(
2019+
Set.of(
2020+
AppSignalsConstants.ERROR_METRIC,
2021+
AppSignalsConstants.FAULT_METRIC,
2022+
AppSignalsConstants.LATENCY_METRIC));
2023+
2024+
var localService = getApplicationOtelServiceName();
2025+
var localOperation = "GET /bedrockruntime/invokeModel/cohereCommandR";
2026+
String type = "AWS::Bedrock::Model";
2027+
String identifier = "cohere.command-r-v1:0";
2028+
2029+
assertSpanClientAttributes(
2030+
traces,
2031+
bedrockRuntimeSpanName("InvokeModel"),
2032+
getBedrockRuntimeRpcServiceName(),
2033+
localService,
2034+
localOperation,
2035+
getBedrockRuntimeServiceName(),
2036+
"InvokeModel",
2037+
type,
2038+
identifier,
2039+
"bedrock.test",
2040+
8080,
2041+
"http://bedrock.test:8080",
2042+
200,
2043+
List.of(
2044+
assertAttribute(
2045+
SemanticConventionsConstants.GEN_AI_REQUEST_MODEL, "cohere.command-r-v1:0"),
2046+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_MAX_TOKENS, "4096"),
2047+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TEMPERATURE, "0.8"),
2048+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TOP_P, "0.45"),
2049+
assertAttribute(
2050+
SemanticConventionsConstants.GEN_AI_RESPONSE_FINISH_REASONS, "[COMPLETE]"),
2051+
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_INPUT_TOKENS, "9"),
2052+
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_OUTPUT_TOKENS, "16")));
2053+
assertMetricClientAttributes(
2054+
metrics,
2055+
AppSignalsConstants.LATENCY_METRIC,
2056+
localService,
2057+
localOperation,
2058+
getBedrockRuntimeServiceName(),
2059+
"InvokeModel",
2060+
type,
2061+
identifier,
2062+
5000.0);
2063+
assertMetricClientAttributes(
2064+
metrics,
2065+
AppSignalsConstants.FAULT_METRIC,
2066+
localService,
2067+
localOperation,
2068+
getBedrockRuntimeServiceName(),
2069+
"InvokeModel",
2070+
type,
2071+
identifier,
2072+
0.0);
2073+
assertMetricClientAttributes(
2074+
metrics,
2075+
AppSignalsConstants.ERROR_METRIC,
2076+
localService,
2077+
localOperation,
2078+
getBedrockRuntimeServiceName(),
2079+
"InvokeModel",
2080+
type,
2081+
identifier,
2082+
0.0);
2083+
}
2084+
2085+
protected void doTestBedrockRuntimeMetaLlama() {
2086+
var response = appClient.get("/bedrockruntime/invokeModel/metaLlama").aggregate().join();
2087+
2088+
var traces = mockCollectorClient.getTraces();
2089+
var metrics =
2090+
mockCollectorClient.getMetrics(
2091+
Set.of(
2092+
AppSignalsConstants.ERROR_METRIC,
2093+
AppSignalsConstants.FAULT_METRIC,
2094+
AppSignalsConstants.LATENCY_METRIC));
2095+
2096+
var localService = getApplicationOtelServiceName();
2097+
var localOperation = "GET /bedrockruntime/invokeModel/metaLlama";
2098+
String type = "AWS::Bedrock::Model";
2099+
String identifier = "meta.llama3-70b-instruct-v1:0";
2100+
2101+
assertSpanClientAttributes(
2102+
traces,
2103+
bedrockRuntimeSpanName("InvokeModel"),
2104+
getBedrockRuntimeRpcServiceName(),
2105+
localService,
2106+
localOperation,
2107+
getBedrockRuntimeServiceName(),
2108+
"InvokeModel",
2109+
type,
2110+
identifier,
2111+
"bedrock.test",
2112+
8080,
2113+
"http://bedrock.test:8080",
2114+
200,
2115+
List.of(
2116+
assertAttribute(
2117+
SemanticConventionsConstants.GEN_AI_REQUEST_MODEL, "meta.llama3-70b-instruct-v1:0"),
2118+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_MAX_TOKENS, "128"),
2119+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TEMPERATURE, "0.1"),
2120+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TOP_P, "0.9"),
2121+
assertAttribute(SemanticConventionsConstants.GEN_AI_RESPONSE_FINISH_REASONS, "[stop]"),
2122+
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_INPUT_TOKENS, "2095"),
2123+
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_OUTPUT_TOKENS, "503")));
2124+
assertMetricClientAttributes(
2125+
metrics,
2126+
AppSignalsConstants.LATENCY_METRIC,
2127+
localService,
2128+
localOperation,
2129+
getBedrockRuntimeServiceName(),
2130+
"InvokeModel",
2131+
type,
2132+
identifier,
2133+
5000.0);
2134+
assertMetricClientAttributes(
2135+
metrics,
2136+
AppSignalsConstants.FAULT_METRIC,
2137+
localService,
2138+
localOperation,
2139+
getBedrockRuntimeServiceName(),
2140+
"InvokeModel",
2141+
type,
2142+
identifier,
2143+
0.0);
2144+
assertMetricClientAttributes(
2145+
metrics,
2146+
AppSignalsConstants.ERROR_METRIC,
2147+
localService,
2148+
localOperation,
2149+
getBedrockRuntimeServiceName(),
2150+
"InvokeModel",
2151+
type,
2152+
identifier,
2153+
0.0);
2154+
}
2155+
2156+
protected void doTestBedrockRuntimeMistral() {
2157+
var response = appClient.get("/bedrockruntime/invokeModel/mistralAi").aggregate().join();
2158+
2159+
var traces = mockCollectorClient.getTraces();
2160+
var metrics =
2161+
mockCollectorClient.getMetrics(
2162+
Set.of(
2163+
AppSignalsConstants.ERROR_METRIC,
2164+
AppSignalsConstants.FAULT_METRIC,
2165+
AppSignalsConstants.LATENCY_METRIC));
2166+
2167+
var localService = getApplicationOtelServiceName();
2168+
var localOperation = "GET /bedrockruntime/invokeModel/mistralAi";
2169+
String type = "AWS::Bedrock::Model";
2170+
String identifier = "mistral.mistral-large-2402-v1:0";
2171+
2172+
assertSpanClientAttributes(
2173+
traces,
2174+
bedrockRuntimeSpanName("InvokeModel"),
2175+
getBedrockRuntimeRpcServiceName(),
2176+
localService,
2177+
localOperation,
2178+
getBedrockRuntimeServiceName(),
2179+
"InvokeModel",
2180+
type,
2181+
identifier,
2182+
"bedrock.test",
2183+
8080,
2184+
"http://bedrock.test:8080",
2185+
200,
2186+
List.of(
2187+
assertAttribute(
2188+
SemanticConventionsConstants.GEN_AI_REQUEST_MODEL,
2189+
"mistral.mistral-large-2402-v1:0"),
2190+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_MAX_TOKENS, "4096"),
2191+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TEMPERATURE, "0.75"),
2192+
assertAttribute(SemanticConventionsConstants.GEN_AI_REQUEST_TOP_P, "0.25"),
2193+
assertAttribute(SemanticConventionsConstants.GEN_AI_RESPONSE_FINISH_REASONS, "[stop]"),
2194+
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_INPUT_TOKENS, "15"),
2195+
assertAttribute(SemanticConventionsConstants.GEN_AI_USAGE_OUTPUT_TOKENS, "24")));
18322196
assertMetricClientAttributes(
18332197
metrics,
18342198
AppSignalsConstants.LATENCY_METRIC,

0 commit comments

Comments
 (0)