Skip to content

Commit e9bb3e1

Browse files
committed
feat: writing effective tools
1 parent afb6dad commit e9bb3e1

File tree

2 files changed

+1359
-0
lines changed

2 files changed

+1359
-0
lines changed

tool_evaluation/evaluation.xml

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
<evaluation>
2+
<!-- Date/Time Calculations -->
3+
<task>
4+
<prompt>How many days are between March 15, 2024 and September 22, 2025? Include both start and end dates in your count.</prompt>
5+
<response>557</response>
6+
</task>
7+
8+
<task>
9+
<prompt>If a meeting starts at 11:45 AM and lasts for 2 hours and 37 minutes, what time does it end? Express in 24-hour format as HH:MM.</prompt>
10+
<response>14:22</response>
11+
</task>
12+
13+
<!-- Modular Arithmetic -->
14+
<task>
15+
<prompt>What is 2^100 mod 7? Give the exact integer result.</prompt>
16+
<response>2</response>
17+
</task>
18+
19+
<task>
20+
<prompt>What day of the week will it be 1000 days from Monday?</prompt>
21+
<response>Wednesday</response>
22+
</task>
23+
24+
<!-- Factorial/Combinatorics -->
25+
<task>
26+
<prompt>Calculate 15! (15 factorial). Give the exact integer result.</prompt>
27+
<response>1307674368000</response>
28+
</task>
29+
30+
<task>
31+
<prompt>How many different ways can you choose 5 items from a set of 12 items? (Calculate C(12,5))</prompt>
32+
<response>792</response>
33+
</task>
34+
35+
<!-- Trigonometric Functions -->
36+
<task>
37+
<prompt>Calculate sin(π/6) + cos(π/3) + tan(π/4). Give the exact value.</prompt>
38+
<response>2</response>
39+
</task>
40+
41+
<!-- Logarithms/Exponentials -->
42+
<task>
43+
<prompt>Solve for x: 2^x = 128. Give the exact integer value.</prompt>
44+
<response>7</response>
45+
</task>
46+
47+
<task>
48+
<prompt>Calculate ln(e^3) + log₁₀(1000) - log₂(8). Give the exact value.</prompt>
49+
<response>3</response>
50+
</task>
51+
52+
<!-- Matrix Operations -->
53+
<task>
54+
<prompt>Calculate the determinant of the 2x2 matrix [[3, 7], [2, 5]].</prompt>
55+
<response>1</response>
56+
</task>
57+
58+
<!-- Number Theory -->
59+
<task>
60+
<prompt>What is the greatest common divisor (GCD) of 1071 and 462?</prompt>
61+
<response>21</response>
62+
</task>
63+
64+
<task>
65+
<prompt>Is 97 a prime number? Answer 'true' or 'false'.</prompt>
66+
<response>true</response>
67+
</task>
68+
69+
<!-- Bitwise Operations -->
70+
<task>
71+
<prompt>Calculate 42 XOR 15 (bitwise exclusive OR).</prompt>
72+
<response>37</response>
73+
</task>
74+
75+
<!-- Floor/Ceiling Functions -->
76+
<task>
77+
<prompt>Calculate floor(7.8) × ceiling(2.1) + round(4.5).</prompt>
78+
<response>25</response>
79+
</task>
80+
81+
<!-- Complex Numbers -->
82+
<task>
83+
<prompt>Calculate the magnitude of the complex number 3 + 4i.</prompt>
84+
<response>5</response>
85+
</task>
86+
87+
<!-- Base Conversions -->
88+
<task>
89+
<prompt>Convert the hexadecimal number FF to decimal.</prompt>
90+
<response>255</response>
91+
</task>
92+
93+
<!-- Statistical Functions -->
94+
<task>
95+
<prompt>Calculate the median of this dataset: [3, 7, 2, 9, 1, 5, 8].</prompt>
96+
<response>5</response>
97+
</task>
98+
99+
<!-- Recursive Calculations -->
100+
<task>
101+
<prompt>Calculate the 10th Fibonacci number (where F(1)=1, F(2)=1).</prompt>
102+
<response>55</response>
103+
</task>
104+
105+
<!-- Percentage of Percentage -->
106+
<task>
107+
<prompt>What is 25% of 40% of 80% of 500?</prompt>
108+
<response>40</response>
109+
</task>
110+
111+
<!-- Unit Conversions -->
112+
<task>
113+
<prompt>Convert 72 degrees Fahrenheit to Celsius. Round to 1 decimal place.</prompt>
114+
<response>22.2</response>
115+
</task>
116+
</evaluation>

0 commit comments

Comments
 (0)