Skip to content

Refactor/semanticnaming #45

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions tkyo-drift/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,13 @@ tkyoDrift(userSubmission, 'input')

5. Enjoy the benefits of having drift detection:

```bash
npx tkyo cos
npx tkyo scalar
🏎️☁️☁️☁️ ← THAT GUY IS DRIFTING
```
🏎️☁️☁️☁️ <- THAT GUY IS DRIFTING
```

This library will create a tkyoData folder at the project root! Don't forget to add it to your `.gitIgnore` as it may contain large files depending on your throughput. All logs, scalars, and binary files tkyoDrift needs to operate will be placed there.

# How do you use this thing?

Expand Down
28 changes: 8 additions & 20 deletions tkyo-drift/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 1 addition & 4 deletions tkyo-drift/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"version": "1.0.7",
"description": "Lightweight CLI tool and library for detecting AI model drift using embeddings and scalar metrics. Tracks semantic, conceptual, and lexical change over time.",
"main": "./tkyoDrift.js",
"bin":{
"bin": {
"tkyo": "./tkyoDrift.js"
},
"types": "./tkyo.d.ts",
Expand All @@ -16,9 +16,6 @@
"ai-monitoring",
"embedding",
"model-drift",
"semantic-drift",
"concept-drift",
"lexical-drift",
"ai-evaluation",
"machine-learning",
"transformers",
Expand Down
24 changes: 12 additions & 12 deletions tkyo-drift/tkyoDrift.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@
@@@@@@@@@@@@@@@@@%+:--::=****=:..::-. ...... ...:::::.......................... .
@%%%####******+++++++++=============------:::::............. ...............................::::::::::::::::::::::------=====+++++++*******#######%%%%%%@@@@@@@
@@@@@@@@@@@@@@@@@@%%%##############%%%%%%%%%%%%%%%%%%%%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
import tkyoDriftSetTrainingHook from './util/tkyoDriftSetTrainingHook.js';
import printScalarCLI from './util/printScalarCLI.js';
import printLogCLI from './util/printLogCLI.js';
import tkyoDriftSetTrainingHook from './util/batchPythonHook.js';
import printScalarCLI from './util/logPrintScalarCLI.js';
import printLogCLI from './util/logPrintCosCLI.js';
import tkyoDrift from './util/oneOffEmb.js';
import chalk from 'chalk';
import path from 'path';
Expand Down Expand Up @@ -106,14 +106,14 @@ if (process.argv[1].endsWith('tkyo')) {
default:
console.log(
chalk.gray(`
↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓ ↑↑↑ ↗↓↓↓↗ ↓↓↓ ↓↓↓ ↓↓↓↓↓↓↓↓↓↓↓↓↖
↑↑↑ ↑↑↑ ↗↑↑↑ ↑↑↑ ↑↑↑ ↑↑↑ ↖↑↑
↑↑↑ ↑↑↑ ↗↑↑↑ ↑↑↑ ↑↑↑ ↑↑ ↖↑↑
↑↑↑ ↑↑↑↑↑↑↑↘ ↑↑↑ ↑↑↑ ↑↑ ↖↑↑
↖↑↑ →↑↑ ↑↑↑↘ ↑↑↑↑↑↑↑↑↑↑↑↑↑ ←↑↑ ↑↑↑↗
↑↑↑ ↑↑↑ ↑↑↑↘ ↑↑↑ ↑↑ ↗↑↑↓
↑↑↑ ↑↑↑ ↑↑↑↘ ↑↑↑ ↑↑↑ ↗↑↑
↑↑↑ ↑↑↑ ↑↑↑↘ ↑↑↑ ↑↑↑↑↑↑↑↑↑↑↑↑↗
↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓ ↗↑↑ ↗↓↓↓↗ ↓↓↓ ↓↓↓ ↓↓↓↓↓↓↓↓↓↓↓↓↖
↗↑↑ ↗↑↑ ↗↑↑↑ ↗↑↑ ↗↑↑ ↑↑↑ ↖↑↑
↗↑↑ ↗↑↑ ↗↑↑↑ ↗↑↑ ↗↑↑ ↑↑ ↖↑↑
↗↑↑ ↑↑↑↑↑↑↑↘ ↗↑↑ ↑↑↑ ↑↑ ↖↑↑
↖↑↑ →↑↑ ↑↑↑↘ ↑↑↑↑↑↑↑↑↑↑↑↑↑ ←↑↑ ↗↑↑↓
↗↑↑ ↗↑↑ ↑↑↑↘ ↑↑↑ ↑↑ ↗↑↑↓
↗↑↑ ↗↑↑ ↑↑↑↘ ↑↑↑ ↑↑↑ ↗↑↑
↗↑↑ ↗↑↑ ↑↑↑↘ ↑↑↑ ↑↑↑↑↑↑↑↑↑↑↑↑↗

Usage:
${chalk.yellowBright('tkyo')} ${chalk.white('cos')} ${chalk.blueBright(
Expand All @@ -126,7 +126,7 @@ Usage:
'<path to data> <column name> <ioType>'
)} Embed dataset and update training baseline

Readme docs in the node package or at ${chalk.blueBright(
Readme docs are in the node package or at ${chalk.blueBright(
'https://github.com/oslabs-beta/tkyo-drift'
)}
`)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import sys
sys.dont_write_bytecode = True
# Import helper function to load and embed the data
import pythonTrainingEmb
from writeSharedScalars import write_shared_scalar_metrics
import batchEmbWriter
from batchScalarWriteShared import write_shared_scalar_metrics


# Allows the use of time functions
Expand All @@ -30,7 +30,7 @@ def tkyoDriftSetTraining(data_set_Path, io_type, io_type_name):

# Iterate through models dictionary
for model_type, model_name in MODELS.items():
pythonTrainingEmb.trainingEmb(
batchEmbWriter.trainingEmb(
model_type=model_type,
model_name=model_name,
data_path=data_set_Path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
sys.dont_write_bytecode = True

# Import helper function to create kmeans of data
import pythonKMeans
import batchMakeKMeans

# This is good for vectors/matrices
import numpy as np
Expand Down Expand Up @@ -150,7 +150,7 @@ def chunk_text(text, tokenizer, max_length=512, stride=256):
return chunks

# Embed Data
print(f"Embedding {io_type}s using {model_name} for {model_type} knowledge...")
print(f"Embedding {io_type}s using {model_name}")
# Initialize an empty list to store all input embeddings
embeddings = []
# Set the number of examples to process at once (smaller = less memory, larger = faster)
Expand Down Expand Up @@ -250,7 +250,7 @@ def chunk_text(text, tokenizer, max_length=512, stride=256):
embeddings.astype(np.float32).tofile(f)
else:
print(f"You have >= 100000 {io_type} embeddings: Performing K Means analysis to filter embeddings.")
kMeansEmbedding = pythonKMeans.kMeansClustering(embeddings)
kMeansEmbedding = batchMakeKMeans.kMeansClustering(embeddings)

# Assign the number of vectors for the training data
num_vectors = kMeansEmbedding.shape[0]
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ export default async function tkyoDriftSetTraining(
);
}
// Ensures we are running tkyoDriftSetTraining.py correctly
const scriptPath = path.join(__dirname, './tkyoDriftSetTraining.py');
const scriptPath = path.join(__dirname, './batchEmbController.py');
const pyProg = spawn('python3', [
'-u',
scriptPath,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
import time
from datetime import datetime
from pythonTrainingEmb import resolve_io_column
from batchEmbWriter import resolve_io_column

# * Writes shared scalar metrics (like character length, entropy, etc.) for training data
# * One file is created per metric (e.g., ioTypeName.characterLength.training.scalar.jsonl)
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ import fs from 'fs';
import path from 'path';
import chalk from 'chalk';
import Table from 'cli-table3';
import { compareScalarDistributions } from './compareScalarDistributions.js';
import { loadScalarMetrics } from './loadScalarMetrics.js';
import { compareScalarDistributions } from './scalarCompare.js';
import { loadScalarMetrics } from './scalarLoadMetrics.js';
import { OUTPUT_DIR } from './oneOffEmb.js';

export default async function printScalarCLI() {
Expand Down
8 changes: 4 additions & 4 deletions tkyo-drift/util/oneOffEmb.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import fs from 'fs';
import path from 'path';
import { v4 } from 'uuid';
import { DriftModel } from './DriftModel.js';
import makeLogEntry from './makeLogEntry.js';
import makeErrorLogEntry from './makeErrorLogEntry.js';
import captureSharedScalarMetrics from './captureSharedScalarMetrics.js';
import { DriftModel } from './oneOffModel.js';
import makeLogEntry from './logMakeEntry.js';
import makeErrorLogEntry from './logMakeError.js';
import captureSharedScalarMetrics from './scalarCaptureShared.js';

// * Global Variables for the utilities
// Embedding Models
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ export class DriftModel {
);
}
// Ensures we are running pythonHNSW.py correctly
const scriptPath = path.join(__dirname, 'pythonHNSW.py');
const scriptPath = path.join(__dirname, 'sharedHNSW.py');

try {
return new Promise((resolve, reject) => {
Expand Down
File renamed without changes.