diff --git a/run_inference.py b/run_inference.py index f3ab727b..c8e8f565 100644 --- a/run_inference.py +++ b/run_inference.py @@ -45,7 +45,7 @@ def signal_handler(sig, frame): # Usage: python run_inference.py -p "Microsoft Corporation is an American multinational corporation and technology company headquartered in Redmond, Washington." parser = argparse.ArgumentParser(description='Run inference') parser.add_argument("-m", "--model", type=str, help="Path to model file", required=False, default="models/bitnet_b1_58-3B/ggml-model-i2_s.gguf") - parser.add_argument("-n", "--n-predict", type=int, help="Number of tokens to predict when generating text", required=False, default=128) + parser.add_argument("-n", "--n-predict", type=int, help="Number of tokens to predict when generating text", required=False, default=-1) parser.add_argument("-p", "--prompt", type=str, help="Prompt to generate text from", required=True) parser.add_argument("-t", "--threads", type=int, help="Number of threads to use", required=False, default=2) parser.add_argument("-c", "--ctx-size", type=int, help="Size of the prompt context", required=False, default=2048) @@ -53,4 +53,4 @@ def signal_handler(sig, frame): parser.add_argument("-cnv", "--conversation", action='store_true', help="Whether to enable chat mode or not (for instruct models.)") args = parser.parse_args() - run_inference() \ No newline at end of file + run_inference()