|
22 | 22 | from websocket import create_connection
|
23 | 23 |
|
24 | 24 |
|
| 25 | +def test_generator(): |
| 26 | + """ |
| 27 | + Sends two messages, makes sure everything is correct with display both on and off. |
| 28 | + """ |
| 29 | + |
| 30 | + for tests in [ |
| 31 | + {"query": "What's 38023*40334? Use Python", "display": True}, |
| 32 | + {"query": "What's 2334*34335555? Use Python", "display": True}, |
| 33 | + {"query": "What's 3545*22? Use Python", "display": False}, |
| 34 | + {"query": "What's 0.0021*3433335555? Use Python", "display": False}, |
| 35 | + ]: |
| 36 | + assistant_message_found = False |
| 37 | + console_output_found = False |
| 38 | + active_line_found = False |
| 39 | + flag_checker = [] |
| 40 | + |
| 41 | + for chunk in interpreter.chat( |
| 42 | + tests["query"] |
| 43 | + + "\nNo talk or plan, just immediately code, then tell me the answer.", |
| 44 | + stream=True, |
| 45 | + display=True, |
| 46 | + ): |
| 47 | + print(chunk) |
| 48 | + # Check if chunk has the right schema |
| 49 | + assert "role" in chunk, "Chunk missing 'role'" |
| 50 | + assert "type" in chunk, "Chunk missing 'type'" |
| 51 | + if "start" not in chunk and "end" not in chunk: |
| 52 | + assert "content" in chunk, "Chunk missing 'content'" |
| 53 | + if "format" in chunk: |
| 54 | + assert isinstance(chunk["format"], str), "'format' should be a string" |
| 55 | + |
| 56 | + flag_checker.append(chunk) |
| 57 | + |
| 58 | + # Check if assistant message, console output, and active line are found |
| 59 | + if chunk["role"] == "assistant" and chunk["type"] == "message": |
| 60 | + assistant_message_found = True |
| 61 | + if chunk["role"] == "computer" and chunk["type"] == "console": |
| 62 | + console_output_found = True |
| 63 | + if "format" in chunk: |
| 64 | + if ( |
| 65 | + chunk["role"] == "computer" |
| 66 | + and chunk["type"] == "console" |
| 67 | + and chunk["format"] == "active_line" |
| 68 | + ): |
| 69 | + active_line_found = True |
| 70 | + |
| 71 | + # Ensure all flags are proper |
| 72 | + assert ( |
| 73 | + flag_checker.count( |
| 74 | + {"role": "assistant", "type": "code", "format": "python", "start": True} |
| 75 | + ) |
| 76 | + == 1 |
| 77 | + ), "Incorrect number of 'assistant code start' flags" |
| 78 | + assert ( |
| 79 | + flag_checker.count( |
| 80 | + {"role": "assistant", "type": "code", "format": "python", "end": True} |
| 81 | + ) |
| 82 | + == 1 |
| 83 | + ), "Incorrect number of 'assistant code end' flags" |
| 84 | + assert ( |
| 85 | + flag_checker.count({"role": "assistant", "type": "message", "start": True}) |
| 86 | + == 1 |
| 87 | + ), "Incorrect number of 'assistant message start' flags" |
| 88 | + assert ( |
| 89 | + flag_checker.count({"role": "assistant", "type": "message", "end": True}) |
| 90 | + == 1 |
| 91 | + ), "Incorrect number of 'assistant message end' flags" |
| 92 | + assert ( |
| 93 | + flag_checker.count({"role": "computer", "type": "console", "start": True}) |
| 94 | + == 1 |
| 95 | + ), "Incorrect number of 'computer console output start' flags" |
| 96 | + assert ( |
| 97 | + flag_checker.count({"role": "computer", "type": "console", "end": True}) |
| 98 | + == 1 |
| 99 | + ), "Incorrect number of 'computer console output end' flags" |
| 100 | + |
| 101 | + # Assert that assistant message, console output, and active line were found |
| 102 | + assert assistant_message_found, "No assistant message was found" |
| 103 | + assert console_output_found, "No console output was found" |
| 104 | + assert active_line_found, "No active line was found" |
| 105 | + |
| 106 | + |
25 | 107 | @pytest.mark.skip(reason="Requires uvicorn, which we don't require by default")
|
26 | 108 | def test_server():
|
27 | 109 | # Start the server in a new thread
|
@@ -483,87 +565,6 @@ def setup_function():
|
483 | 565 | interpreter.verbose = False
|
484 | 566 |
|
485 | 567 |
|
486 |
| -def test_generator(): |
487 |
| - """ |
488 |
| - Sends two messages, makes sure everything is correct with display both on and off. |
489 |
| - """ |
490 |
| - |
491 |
| - for tests in [ |
492 |
| - {"query": "What's 38023*40334? Use Python", "display": True}, |
493 |
| - {"query": "What's 2334*34335555? Use Python", "display": True}, |
494 |
| - {"query": "What's 3545*22? Use Python", "display": False}, |
495 |
| - {"query": "What's 0.0021*3433335555? Use Python", "display": False}, |
496 |
| - ]: |
497 |
| - assistant_message_found = False |
498 |
| - console_output_found = False |
499 |
| - active_line_found = False |
500 |
| - flag_checker = [] |
501 |
| - for chunk in interpreter.chat( |
502 |
| - tests["query"] |
503 |
| - + "\nNo talk or plan, just immediately code, then tell me the answer.", |
504 |
| - stream=True, |
505 |
| - display=tests["display"], |
506 |
| - ): |
507 |
| - print(chunk) |
508 |
| - # Check if chunk has the right schema |
509 |
| - assert "role" in chunk, "Chunk missing 'role'" |
510 |
| - assert "type" in chunk, "Chunk missing 'type'" |
511 |
| - if "start" not in chunk and "end" not in chunk: |
512 |
| - assert "content" in chunk, "Chunk missing 'content'" |
513 |
| - if "format" in chunk: |
514 |
| - assert isinstance(chunk["format"], str), "'format' should be a string" |
515 |
| - |
516 |
| - flag_checker.append(chunk) |
517 |
| - |
518 |
| - # Check if assistant message, console output, and active line are found |
519 |
| - if chunk["role"] == "assistant" and chunk["type"] == "message": |
520 |
| - assistant_message_found = True |
521 |
| - if chunk["role"] == "computer" and chunk["type"] == "console": |
522 |
| - console_output_found = True |
523 |
| - if "format" in chunk: |
524 |
| - if ( |
525 |
| - chunk["role"] == "computer" |
526 |
| - and chunk["type"] == "console" |
527 |
| - and chunk["format"] == "active_line" |
528 |
| - ): |
529 |
| - active_line_found = True |
530 |
| - |
531 |
| - # Ensure all flags are proper |
532 |
| - assert ( |
533 |
| - flag_checker.count( |
534 |
| - {"role": "assistant", "type": "code", "format": "python", "start": True} |
535 |
| - ) |
536 |
| - == 1 |
537 |
| - ), "Incorrect number of 'assistant code start' flags" |
538 |
| - assert ( |
539 |
| - flag_checker.count( |
540 |
| - {"role": "assistant", "type": "code", "format": "python", "end": True} |
541 |
| - ) |
542 |
| - == 1 |
543 |
| - ), "Incorrect number of 'assistant code end' flags" |
544 |
| - assert ( |
545 |
| - flag_checker.count({"role": "assistant", "type": "message", "start": True}) |
546 |
| - == 1 |
547 |
| - ), "Incorrect number of 'assistant message start' flags" |
548 |
| - assert ( |
549 |
| - flag_checker.count({"role": "assistant", "type": "message", "end": True}) |
550 |
| - == 1 |
551 |
| - ), "Incorrect number of 'assistant message end' flags" |
552 |
| - assert ( |
553 |
| - flag_checker.count({"role": "computer", "type": "console", "start": True}) |
554 |
| - == 1 |
555 |
| - ), "Incorrect number of 'computer console output start' flags" |
556 |
| - assert ( |
557 |
| - flag_checker.count({"role": "computer", "type": "console", "end": True}) |
558 |
| - == 1 |
559 |
| - ), "Incorrect number of 'computer console output end' flags" |
560 |
| - |
561 |
| - # Assert that assistant message, console output, and active line were found |
562 |
| - assert assistant_message_found, "No assistant message was found" |
563 |
| - assert console_output_found, "No console output was found" |
564 |
| - assert active_line_found, "No active line was found" |
565 |
| - |
566 |
| - |
567 | 568 | @pytest.mark.skip(
|
568 | 569 | reason="Not working consistently, I think GPT related changes? It worked recently"
|
569 | 570 | )
|
|
0 commit comments