diff --git a/tkyo-drift/README.md b/tkyo-drift/README.md index f833a78..baaebb2 100644 --- a/tkyo-drift/README.md +++ b/tkyo-drift/README.md @@ -114,9 +114,13 @@ tkyoDrift(userSubmission, 'input') 5. Enjoy the benefits of having drift detection: +```bash +npx tkyo cos +npx tkyo scalar +🏎️☁️☁️☁️ ← THAT GUY IS DRIFTING ``` -🏎️☁️☁️☁️ <- THAT GUY IS DRIFTING -``` + +This library will create a tkyoData folder at the project root! Don't forget to add it to your `.gitIgnore` as it may contain large files depending on your throughput. All logs, scalars, and binary files tkyoDrift needs to operate will be placed there. # How do you use this thing? diff --git a/tkyo-drift/util/downloadTrainingData.py b/tkyo-drift/getHFTrainingData.py similarity index 100% rename from tkyo-drift/util/downloadTrainingData.py rename to tkyo-drift/getHFTrainingData.py diff --git a/tkyo-drift/package-lock.json b/tkyo-drift/package-lock.json index 725ce30..e8397e8 100644 --- a/tkyo-drift/package-lock.json +++ b/tkyo-drift/package-lock.json @@ -1,21 +1,23 @@ { - "name": "tkyodrifttest1", - "version": "1.0.0", + "name": "tkyodrift", + "version": "1.0.6", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "tkyodrifttest1", - "version": "1.0.0", - "license": "ISC", + "name": "tkyodrift", + "version": "1.0.6", + "license": "MIT", "dependencies": { "@xenova/transformers": "^2.17.2", "chalk": "^5.4.1", "cli-table3": "^0.6.5", "fs": "^0.0.1-security", "path": "^0.12.7", - "tkyodrifttest1": "^1.0.0", "uuid": "^11.1.0" + }, + "bin": { + "tkyo": "tkyoDrift.js" } }, "node_modules/@colors/colors": { @@ -938,20 +940,6 @@ "b4a": "^1.6.4" } }, - "node_modules/tkyodrifttest1": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/tkyodrifttest1/-/tkyodrifttest1-1.0.0.tgz", - "integrity": "sha512-475elQaD3QMC4zrVPba9k6783lVTBXm2wmjl0SGB7+S/zzw+fi+P2SmrJyzr9IDNA3DpwBNb+o6kamxFiscT+A==", - "license": "ISC", - "dependencies": { - "@xenova/transformers": "^2.17.2", - "chalk": "^5.4.1", - "cli-table3": "^0.6.5", - "fs": "^0.0.1-security", - "path": "^0.12.7", - "uuid": "^11.1.0" - } - }, "node_modules/tunnel-agent": { "version": "0.6.0", "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", diff --git a/tkyo-drift/package.json b/tkyo-drift/package.json index f0d5892..72d96db 100644 --- a/tkyo-drift/package.json +++ b/tkyo-drift/package.json @@ -3,7 +3,7 @@ "version": "1.0.7", "description": "Lightweight CLI tool and library for detecting AI model drift using embeddings and scalar metrics. Tracks semantic, conceptual, and lexical change over time.", "main": "./tkyoDrift.js", - "bin":{ + "bin": { "tkyo": "./tkyoDrift.js" }, "types": "./tkyo.d.ts", @@ -16,9 +16,6 @@ "ai-monitoring", "embedding", "model-drift", - "semantic-drift", - "concept-drift", - "lexical-drift", "ai-evaluation", "machine-learning", "transformers", diff --git a/tkyo-drift/tkyoDrift.js b/tkyo-drift/tkyoDrift.js index eb25e72..8d5f668 100755 --- a/tkyo-drift/tkyoDrift.js +++ b/tkyo-drift/tkyoDrift.js @@ -42,9 +42,9 @@ @@@@@@@@@@@@@@@@@%+:--::=****=:..::-. ...... ...:::::.......................... . @%%%####******+++++++++=============------:::::............. ...............................::::::::::::::::::::::------=====+++++++*******#######%%%%%%@@@@@@@ @@@@@@@@@@@@@@@@@@%%%##############%%%%%%%%%%%%%%%%%%%%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/ -import tkyoDriftSetTrainingHook from './util/tkyoDriftSetTrainingHook.js'; -import printScalarCLI from './util/printScalarCLI.js'; -import printLogCLI from './util/printLogCLI.js'; +import tkyoDriftSetTrainingHook from './util/batchPythonHook.js'; +import printScalarCLI from './util/logPrintScalarCLI.js'; +import printLogCLI from './util/logPrintCosCLI.js'; import tkyoDrift from './util/oneOffEmb.js'; import chalk from 'chalk'; import path from 'path'; @@ -106,14 +106,14 @@ if (process.argv[1].endsWith('tkyo')) { default: console.log( chalk.gray(` -↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓ ↑↑↑ ↗↓↓↓↗ ↓↓↓ ↓↓↓ ↓↓↓↓↓↓↓↓↓↓↓↓↖ - ↑↑↑ ↑↑↑ ↗↑↑↑ ↑↑↑ ↑↑↑ ↑↑↑↑ ↖↑↑ - ↑↑↑ ↑↑↑ ↗↑↑↑ ↑↑↑ ↑↑↑ ↑↑↑ ↖↑↑ - ↑↑↑ ↑↑↑↑↑↑↑↘ ↑↑↑ ↑↑↑↑ ↑↑↑ ↖↑↑ - ↖↑↑ →↑↑ ↑↑↑↘ ↑↑↑↑↑↑↑↑↑↑↑↑↑ ←↑↑ ↑↑↑↗ - ↑↑↑ ↑↑↑ ↑↑↑↘ ↑↑↑ ↑↑↑ ↗↑↑↓ - ↑↑↑ ↑↑↑ ↑↑↑↘ ↑↑↑ ↑↑↑↑ ↗↑↑↑ - ↑↑↑ ↑↑↑ ↑↑↑↘ ↑↑↑ ↑↑↑↑↑↑↑↑↑↑↑↑↑↗ +↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓ ↗↑↑ ↗↓↓↓↗ ↓↓↓ ↓↓↓ ↓↓↓↓↓↓↓↓↓↓↓↓↖ + ↗↑↑ ↗↑↑ ↗↑↑↑ ↗↑↑ ↗↑↑ ↗↑↑↑ ↖↑↑ + ↗↑↑ ↗↑↑ ↗↑↑↑ ↗↑↑ ↗↑↑ ↗↑↑ ↖↑↑ + ↗↑↑ ↑↑↑↑↑↑↑↘ ↗↑↑ ↗↑↑↑ ↗↑↑ ↖↑↑ + ↖↑↑ →↑↑ ↑↑↑↘ ↑↑↑↑↑↑↑↑↑↑↑↑↑ ←↑↑ ↗↑↑↓ + ↗↑↑ ↗↑↑ ↑↑↑↘ ↑↑↑ ↖↑↑ ↗↑↑↓ + ↗↑↑ ↗↑↑ ↑↑↑↘ ↑↑↑ ↖↑↑↑ ↗↑↑↗ + ↗↑↑ ↗↑↑ ↑↑↑↘ ↑↑↑ ↖↑↑↑↑↑↑↑↑↑↑↑↑↗ Usage: ${chalk.yellowBright('tkyo')} ${chalk.white('cos')} ${chalk.blueBright( @@ -126,7 +126,7 @@ Usage: ' ' )} Embed dataset and update training baseline -Readme docs in the node package or at ${chalk.blueBright( +Readme docs are in the node package or at ${chalk.blueBright( 'https://github.com/oslabs-beta/tkyo-drift' )} `) diff --git a/tkyo-drift/util/tkyoDriftSetTraining.py b/tkyo-drift/util/batchEmbController.py similarity index 94% rename from tkyo-drift/util/tkyoDriftSetTraining.py rename to tkyo-drift/util/batchEmbController.py index 1955380..623747f 100644 --- a/tkyo-drift/util/tkyoDriftSetTraining.py +++ b/tkyo-drift/util/batchEmbController.py @@ -2,8 +2,8 @@ import sys sys.dont_write_bytecode = True # Import helper function to load and embed the data -import pythonTrainingEmb -from writeSharedScalars import write_shared_scalar_metrics +import batchEmbWriter +from batchScalarWriteShared import write_shared_scalar_metrics # Allows the use of time functions @@ -30,7 +30,7 @@ def tkyoDriftSetTraining(data_set_Path, io_type, io_type_name): # Iterate through models dictionary for model_type, model_name in MODELS.items(): - pythonTrainingEmb.trainingEmb( + batchEmbWriter.trainingEmb( model_type=model_type, model_name=model_name, data_path=data_set_Path, diff --git a/tkyo-drift/util/pythonTrainingEmb.py b/tkyo-drift/util/batchEmbWriter.py similarity index 98% rename from tkyo-drift/util/pythonTrainingEmb.py rename to tkyo-drift/util/batchEmbWriter.py index 9a6ec73..7fa659a 100644 --- a/tkyo-drift/util/pythonTrainingEmb.py +++ b/tkyo-drift/util/batchEmbWriter.py @@ -3,7 +3,7 @@ sys.dont_write_bytecode = True # Import helper function to create kmeans of data -import pythonKMeans +import batchMakeKMeans # This is good for vectors/matrices import numpy as np @@ -150,7 +150,7 @@ def chunk_text(text, tokenizer, max_length=512, stride=256): return chunks # Embed Data - print(f"Embedding {io_type}s using {model_name} for {model_type} knowledge...") + print(f"Embedding {io_type}s using {model_name}") # Initialize an empty list to store all input embeddings embeddings = [] # Set the number of examples to process at once (smaller = less memory, larger = faster) @@ -250,7 +250,7 @@ def chunk_text(text, tokenizer, max_length=512, stride=256): embeddings.astype(np.float32).tofile(f) else: print(f"You have >= 100000 {io_type} embeddings: Performing K Means analysis to filter embeddings.") - kMeansEmbedding = pythonKMeans.kMeansClustering(embeddings) + kMeansEmbedding = batchMakeKMeans.kMeansClustering(embeddings) # Assign the number of vectors for the training data num_vectors = kMeansEmbedding.shape[0] diff --git a/tkyo-drift/util/pythonKMeans.py b/tkyo-drift/util/batchMakeKMeans.py similarity index 100% rename from tkyo-drift/util/pythonKMeans.py rename to tkyo-drift/util/batchMakeKMeans.py diff --git a/tkyo-drift/util/tkyoDriftSetTrainingHook.js b/tkyo-drift/util/batchPythonHook.js similarity index 96% rename from tkyo-drift/util/tkyoDriftSetTrainingHook.js rename to tkyo-drift/util/batchPythonHook.js index 16c2482..51ca538 100644 --- a/tkyo-drift/util/tkyoDriftSetTrainingHook.js +++ b/tkyo-drift/util/batchPythonHook.js @@ -26,7 +26,7 @@ export default async function tkyoDriftSetTraining( ); } // Ensures we are running tkyoDriftSetTraining.py correctly - const scriptPath = path.join(__dirname, './tkyoDriftSetTraining.py'); + const scriptPath = path.join(__dirname, './batchEmbController.py'); const pyProg = spawn('python3', [ '-u', scriptPath, diff --git a/tkyo-drift/util/writeSharedScalars.py b/tkyo-drift/util/batchScalarWriteShared.py similarity index 98% rename from tkyo-drift/util/writeSharedScalars.py rename to tkyo-drift/util/batchScalarWriteShared.py index cd906b5..d76ce5f 100644 --- a/tkyo-drift/util/writeSharedScalars.py +++ b/tkyo-drift/util/batchScalarWriteShared.py @@ -4,7 +4,7 @@ import numpy as np import time from datetime import datetime -from pythonTrainingEmb import resolve_io_column +from batchEmbWriter import resolve_io_column # * Writes shared scalar metrics (like character length, entropy, etc.) for training data # * One file is created per metric (e.g., ioTypeName.characterLength.training.scalar.jsonl) diff --git a/tkyo-drift/util/makeLogEntry.js b/tkyo-drift/util/logMakeEntry.js similarity index 100% rename from tkyo-drift/util/makeLogEntry.js rename to tkyo-drift/util/logMakeEntry.js diff --git a/tkyo-drift/util/makeErrorLogEntry.js b/tkyo-drift/util/logMakeError.js similarity index 100% rename from tkyo-drift/util/makeErrorLogEntry.js rename to tkyo-drift/util/logMakeError.js diff --git a/tkyo-drift/util/printLogCLI.js b/tkyo-drift/util/logPrintCosCLI.js similarity index 100% rename from tkyo-drift/util/printLogCLI.js rename to tkyo-drift/util/logPrintCosCLI.js diff --git a/tkyo-drift/util/printScalarCLI.js b/tkyo-drift/util/logPrintScalarCLI.js similarity index 97% rename from tkyo-drift/util/printScalarCLI.js rename to tkyo-drift/util/logPrintScalarCLI.js index 1ed7ec0..14e8c43 100644 --- a/tkyo-drift/util/printScalarCLI.js +++ b/tkyo-drift/util/logPrintScalarCLI.js @@ -2,8 +2,8 @@ import fs from 'fs'; import path from 'path'; import chalk from 'chalk'; import Table from 'cli-table3'; -import { compareScalarDistributions } from './compareScalarDistributions.js'; -import { loadScalarMetrics } from './loadScalarMetrics.js'; +import { compareScalarDistributions } from './scalarCompare.js'; +import { loadScalarMetrics } from './scalarLoadMetrics.js'; import { OUTPUT_DIR } from './oneOffEmb.js'; export default async function printScalarCLI() { diff --git a/tkyo-drift/util/oneOffEmb.js b/tkyo-drift/util/oneOffEmb.js index 1af6b12..504957e 100644 --- a/tkyo-drift/util/oneOffEmb.js +++ b/tkyo-drift/util/oneOffEmb.js @@ -1,10 +1,10 @@ import fs from 'fs'; import path from 'path'; import { v4 } from 'uuid'; -import { DriftModel } from './DriftModel.js'; -import makeLogEntry from './makeLogEntry.js'; -import makeErrorLogEntry from './makeErrorLogEntry.js'; -import captureSharedScalarMetrics from './captureSharedScalarMetrics.js'; +import { DriftModel } from './oneOffModel.js'; +import makeLogEntry from './logMakeEntry.js'; +import makeErrorLogEntry from './logMakeError.js'; +import captureSharedScalarMetrics from './scalarCaptureShared.js'; // * Global Variables for the utilities // Embedding Models diff --git a/tkyo-drift/util/DriftModel.js b/tkyo-drift/util/oneOffModel.js similarity index 99% rename from tkyo-drift/util/DriftModel.js rename to tkyo-drift/util/oneOffModel.js index 16e9822..4838025 100644 --- a/tkyo-drift/util/DriftModel.js +++ b/tkyo-drift/util/oneOffModel.js @@ -294,7 +294,7 @@ export class DriftModel { ); } // Ensures we are running pythonHNSW.py correctly - const scriptPath = path.join(__dirname, 'pythonHNSW.py'); + const scriptPath = path.join(__dirname, 'sharedHNSW.py'); try { return new Promise((resolve, reject) => { diff --git a/tkyo-drift/util/captureSharedScalarMetrics.js b/tkyo-drift/util/scalarCaptureShared.js similarity index 100% rename from tkyo-drift/util/captureSharedScalarMetrics.js rename to tkyo-drift/util/scalarCaptureShared.js diff --git a/tkyo-drift/util/compareScalarDistributions.js b/tkyo-drift/util/scalarCompare.js similarity index 100% rename from tkyo-drift/util/compareScalarDistributions.js rename to tkyo-drift/util/scalarCompare.js diff --git a/tkyo-drift/util/loadScalarMetrics.js b/tkyo-drift/util/scalarLoadMetrics.js similarity index 100% rename from tkyo-drift/util/loadScalarMetrics.js rename to tkyo-drift/util/scalarLoadMetrics.js diff --git a/tkyo-drift/util/pythonHNSW.py b/tkyo-drift/util/sharedHNSW.py similarity index 100% rename from tkyo-drift/util/pythonHNSW.py rename to tkyo-drift/util/sharedHNSW.py