Merge pull request #4 from Node0/main

Node0 · web-flow · commit e8250bc3a626 · 2025-03-13T10:28:22.000Z
Added true tokenizer stats
diff --git a/python-tools/ollama_cli_prompt_util.py b/python-tools/ollama_cli_prompt_util.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import http.client
+import sys
+
+def list_models(hostname, port):
+    """List available models from the Ollama server."""
+    endpoint = "/api/tags"  # Correct endpoint for listing local models
+    connection = http.client.HTTPConnection(hostname, port)
+    try:
+        connection.request("GET", endpoint)
+        response = connection.getresponse()
+
+        if response.status != 200:
+            print(f"Error: HTTP {response.status} - {response.reason}")
+            return
+
+        data = json.loads(response.read().decode("utf-8"))
+        models = data.get("models", [])
+
+        print("Available Models:\n")
+        for model in models:
+            name = model.get("name", "Unknown")
+            size = model.get("size", 0)
+            modified_at = model.get("modified_at", "Unknown")
+            details = model.get("details", {})
+            parameter_size = details.get("parameter_size", "Unknown")
+            quantization = details.get("quantization_level", "Unknown")
+            format_ = details.get("format", "Unknown")
+
+            # Display model information
+            print(f"Name: {name}")
+            print(f"  Size: {size / 1_000_000_000:.2f} GB")  # Convert bytes to GB
+            print(f"  Modified At: {modified_at}")
+            print(f"  Parameters: {parameter_size}")
+            print(f"  Quantization Level: {quantization}")
+            print(f"  Format: {format_}")
+            print("-" * 40)
+
+    finally:
+        connection.close()
+
+
+def stream_ollama_response(hostname, port, model, prompt, system_prompt, temperature, max_tokens):
+    """Stream responses from the Ollama server."""
+    endpoint = "/api/generate"
+    headers = {"Content-Type": "application/json"}
+    payload = json.dumps({
+        "model": model,
+        "prompt": prompt,
+        "system": system_prompt,
+        "temperature": temperature,
+        "max_tokens": max_tokens
+    })
+
+    connection = http.client.HTTPConnection(hostname, port)
+    try:
+        connection.request("POST", endpoint, body=payload, headers=headers)
+        response = connection.getresponse()
+
+        if response.status != 200:
+            print(f"Error: HTTP {response.status} - {response.reason}")
+            return
+
+        concatenated_response = ""
+        for line in response:
+            line = line.decode("utf-8").strip()
+            if line:
+                try:
+                    response_data = json.loads(line)
+                    word = response_data.get("response", "")
+                    concatenated_response += word
+                    print(word, end="", flush=True)
+                except json.JSONDecodeError:
+                    print(f"\nInvalid JSON: {line}")
+
+        print("\n\nFinal Response:")
+        print(concatenated_response)
+
+    finally:
+        connection.close()
+
+def main():
+    parser = argparse.ArgumentParser(description="Interact with Ollama server.")
+    parser.add_argument("--ollama-hostname", required=True, help="Hostname of the Ollama server")
+    parser.add_argument("--ollama-port", type=int, required=True, help="Port number of the Ollama server")
+
+    # Mutually exclusive arguments
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument("--list-models", action="store_true", 
+                       help="List all available models on the Ollama server")
+    group.add_argument("--model", help="Specify the model to use")
+
+    # Other arguments
+    parser.add_argument("--prompt", help="Prompt to send to the Ollama server")
+    parser.add_argument("--system-prompt", default="You are a helpful assistant.", 
+                        help="System prompt to define the model's behavior")
+    parser.add_argument("--temperature", type=float, default=0.7, 
+                        help="Temperature for response randomness")
+    parser.add_argument("--max-tokens", type=int, default=100, 
+                        help="Maximum number of tokens to generate")
+
+    args = parser.parse_args()
+
+    # If --list-models is specified, list models and exit
+    if args.list_models:
+        list_models(args.ollama_hostname, args.ollama_port)
+        return
+
+    # Validate other required arguments
+    if not args.prompt:
+        print("Error: --prompt is required when --list-models is not specified.")
+        sys.exit(1)
+
+    if not args.model:
+        print("Error: --model is required when --list-models is not specified.")
+        sys.exit(1)
+
+    # Stream response from the server
+    stream_ollama_response(
+        args.ollama_hostname,
+        args.ollama_port,
+        args.model,
+        args.prompt,
+        args.system_prompt,
+        args.temperature,
+        args.max_tokens
+    )
+
+if __name__ == "__main__":
+    main()
+
diff --git a/python-tools/tkps_vs_gpu_count_opimizer.py b/python-tools/tkps_vs_gpu_count_opimizer.py
@@ -1,9 +1,24 @@
 #!/usr/bin/env python3
 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# Copyright (C) 2025 Joe Hacobian
+
+
 import argparse
 
 # Constants
-CONTINENTS = 7
 WORDS_PER_ARTICLE = 2500
 TOKENS_PER_WORD = 1.88
 SECONDS_PER_HOUR = 3600
@@ -13,13 +28,13 @@
 debug = False
 
 # Function to calculate processing time
-def calculate_time(tkps, gpu_count, articles_per_continent):
+def calculate_time(tkps, gpu_count, articles_per_continent, continents):
     if tkps <= 0 or gpu_count <= 0:
         if debug:
             print(f"[DEBUG] Invalid tkps ({tkps}) or gpu_count ({gpu_count}), returning infinity.")
         return float('inf')  # Avoid division by zero
     
-    total_tokens = articles_per_continent * CONTINENTS * WORDS_PER_ARTICLE * TOKENS_PER_WORD
+    total_tokens = articles_per_continent * continents * WORDS_PER_ARTICLE * TOKENS_PER_WORD
     tokens_processed_per_second = tkps * gpu_count
     tokens_processed_per_hour = tokens_processed_per_second * SECONDS_PER_HOUR
     time_hours = total_tokens / tokens_processed_per_hour
@@ -30,17 +45,17 @@ def calculate_time(tkps, gpu_count, articles_per_continent):
     return time_minutes
 
 # Optimized function to find the optimal configuration
-def find_optimal_configuration(target_time, max_tkps, max_gpus, articles_per_continent):
+def find_optimal_configuration(target_time, max_tkps, max_gpus, articles_per_continent, continents):
     best_configuration = None
     closest_time = float('inf')
 
     if debug:
-        print(f"[DEBUG] Starting optimization: target_time={target_time}, max_tkps={max_tkps}, max_gpus={max_gpus}, articles_per_continent={articles_per_continent}")
+        print(f"[DEBUG] Starting optimization: target_time={target_time}, max_tkps={max_tkps}, max_gpus={max_gpus}, articles_per_continent={articles_per_continent}, continents={continents}")
 
     # Start by maximizing tkps per GPU and minimizing GPU count
     for tkps in range(max_tkps, 0, -1):  # Start with the highest feasible tkps
         for gpu_count in range(1, max_gpus + 1):  # Start with one GPU
-            current_time = calculate_time(tkps, gpu_count, articles_per_continent)
+            current_time = calculate_time(tkps, gpu_count, articles_per_continent, continents)
 
             if debug:
                 print(f"[DEBUG] Checking configuration -> tkps: {tkps}, gpu_count: {gpu_count}, current_time: {current_time:.2f}")
@@ -91,6 +106,12 @@ def main():
         default=70,
         help="Maximum number of GPUs available (default: 70)."
     )
+    parser.add_argument(
+        "--continents",
+        type=int,
+        default=7,
+        help="Number of continents of newsfeeds (default: 7)."
+    )
     parser.add_argument(
         "--debug",
         action="store_true",
@@ -101,19 +122,25 @@ def main():
     debug = args.debug
 
     if debug:
-        print(f"[DEBUG] Input arguments -> articles_per_continent: {args.articles_per_continent}, target_time: {args.target_time}, max_tkps: {args.max_tkps}, max_gpus: {args.max_gpus}")
+        print(f"[DEBUG] Input arguments -> articles_per_continent: {args.articles_per_continent}, target_time: {args.target_time}, max_tkps: {args.max_tkps}, max_gpus: {args.max_gpus}, continents: {args.continents}")
 
     # Execute optimization
     optimal_config, achieved_time = find_optimal_configuration(
         target_time=args.target_time,
         max_tkps=args.max_tkps,
         max_gpus=args.max_gpus,
-        articles_per_continent=args.articles_per_continent
+        articles_per_continent=args.articles_per_continent,
+        continents=args.continents
     )
 
     if optimal_config:
         optimal_tkps, optimal_gpus = optimal_config
-        print(f"Optimal Configuration to achieve ~{args.target_time} minutes:")
+        print("Optimal Configuration to achieve:")
+        print(f"Target max processing time:  <= {args.target_time} minutes")
+        print(f"Number of continents of newsfeeds: {args.continents}")
+        print(f"Number of articles per continent per 3 hours: {args.articles_per_continent}")
+        print()
+        print("Optimized compute resources estimated for processing time goal:")
         print(f"Tokens per second (tkps) per GPU: {optimal_tkps}")
         print(f"Number of GPUs: {optimal_gpus}")
         print(f"Achieved Processing Time: {achieved_time:.2f} minutes")
diff --git a/single-page-apps/text_stats_analyzer.html b/single-page-apps/text_stats_analyzer.html