From 03cb8f3e71ff13e8a2288e2052ce6b6d352d24bb Mon Sep 17 00:00:00 2001 From: Jorge Cortes Date: Fri, 23 May 2025 17:13:26 -0500 Subject: [PATCH] [Components] lamini - new components --- .../create-fine-tune-job.mjs | 103 +++++++++++ .../actions/evaluate-job/evaluate-job.mjs | 43 +++++ .../generate-completion.mjs | 75 ++++++++ .../get-batch-completions.mjs | 88 +++++++++ .../actions/upload-dataset/upload-dataset.mjs | 117 ++++++++++++ components/lamini/common/constants.mjs | 14 ++ components/lamini/common/utils.mjs | 34 ++++ components/lamini/lamini.app.mjs | 171 +++++++++++++++++- components/lamini/package.json | 7 +- .../sources/get-job-status/get-job-status.mjs | 65 +++++++ pnpm-lock.yaml | 11 +- 11 files changed, 717 insertions(+), 11 deletions(-) create mode 100644 components/lamini/actions/create-fine-tune-job/create-fine-tune-job.mjs create mode 100644 components/lamini/actions/evaluate-job/evaluate-job.mjs create mode 100644 components/lamini/actions/generate-completion/generate-completion.mjs create mode 100644 components/lamini/actions/get-batch-completions/get-batch-completions.mjs create mode 100644 components/lamini/actions/upload-dataset/upload-dataset.mjs create mode 100644 components/lamini/common/constants.mjs create mode 100644 components/lamini/common/utils.mjs create mode 100644 components/lamini/sources/get-job-status/get-job-status.mjs diff --git a/components/lamini/actions/create-fine-tune-job/create-fine-tune-job.mjs b/components/lamini/actions/create-fine-tune-job/create-fine-tune-job.mjs new file mode 100644 index 0000000000000..2767721143c0b --- /dev/null +++ b/components/lamini/actions/create-fine-tune-job/create-fine-tune-job.mjs @@ -0,0 +1,103 @@ +import app from "../../lamini.app.mjs"; +import constants from "../../common/constants.mjs"; +import utils from "../../common/utils.mjs"; + +export default { + key: "lamini-create-fine-tune-job", + name: "Create Fine-Tune Job", + description: "Create a fine-tuning job with a dataset. [See the documentation](https://docs.lamini.ai/api/).", + version: "0.0.1", + type: "action", + props: { + app, + modelName: { + description: "Base model to be fine-tuned.", + propDefinition: [ + app, + "modelName", + () => ({ + includeFineTunedModels: false, + }), + ], + }, + datasetId: { + type: "string", + label: "Dataset ID", + description: "Previously uploaded dataset to use for training. Please use the **Upload Dataset** action to upload a dataset.", + }, + fineTuneArgs: { + type: "object", + label: "Finetune Arguments", + description: "Optional hyperparameters for fine-tuning. Each property is optional:\n- `index_pq_m`: Number of subquantizers for PQ (eg. 8)\n- `index_max_size`: Maximum index size (eg. 65536)\n- `max_steps`: Maximum number of training steps (eg. 60)\n- `batch_size`: Training batch size (eg. 1)\n- `learning_rate`: Learning rate (eg. 0.0003)\n- `index_pq_nbits`: Number of bits per subquantizer (eg. 8)\n- `max_length`: Maximum sequence length (eg. 2048)\n- `index_ivf_nlist`: Number of IVF lists (eg. 2048)\n- `save_steps`: Steps between checkpoints (eg. 60)\n- `args_name`: Name for the argument set (eg. \"demo\")\n- `r_value`: R value for LoRA (eg. 32)\n- `index_hnsw_m`: Number of neighbors in HNSW (eg. 32)\n- `index_method`: Indexing method (eg. \"IndexIVFPQ\")\n- `optim`: Optimizer to use (eg. \"adafactor\")\n- `index_hnsw_efConstruction`: HNSW construction parameter (eg. 16)\n- `index_hnsw_efSearch`: HNSW search parameter (eg. 8)\n- `index_k`: Number of nearest neighbors (eg. 2)\n- `index_ivf_nprobe`: Number of IVF probes (eg. 48)\n- `eval_steps`: Steps between evaluations (eg. 30)\n[See the documentation](https://docs.lamini.ai/tuning/hyperparameters/#finetune_args).", + optional: true, + }, + gpuConfig: { + type: "object", + label: "GPU Config", + description: "Optional GPU configuration for fine-tuning. [See the documentation](https://docs.lamini.ai/tuning/hyperparameters/#gpu_config).", + optional: true, + }, + isPublic: { + type: "boolean", + label: "Is Public", + description: "Whether this fine-tuning job and dataset should be publicly accessible.", + optional: true, + }, + customModelName: { + type: "string", + label: "Custom Model Name", + description: "A human-readable name for the fine-tuned model.", + optional: true, + }, + }, + methods: { + createFineTuneJob(args = {}) { + return this.app.post({ + versionPath: constants.VERSION_PATH.V1, + path: "/train", + ...args, + }); + }, + }, + async run({ $ }) { + const { + app, + createFineTuneJob, + modelName, + datasetId, + fineTuneArgs, + gpuConfig, + isPublic, + customModelName, + } = this; + + const { upload_base_path: uploadBasePath } = + await app.getUploadBasePath({ + $, + }); + + await app.getExistingDataset({ + $, + data: { + dataset_id: datasetId, + upload_base_path: uploadBasePath, + }, + }); + + const response = await createFineTuneJob({ + $, + data: { + model_name: modelName, + dataset_id: datasetId, + upload_file_path: `${uploadBasePath}/${datasetId}.jsonlines`, + finetune_args: utils.parseJson(fineTuneArgs), + gpu_config: utils.parseJson(gpuConfig), + is_public: isPublic, + custom_model_name: customModelName, + }, + }); + + $.export("$summary", `Successfully created a fine-tune job with ID \`${response.job_id}\`.`); + return response; + }, +}; diff --git a/components/lamini/actions/evaluate-job/evaluate-job.mjs b/components/lamini/actions/evaluate-job/evaluate-job.mjs new file mode 100644 index 0000000000000..53979fb75b51d --- /dev/null +++ b/components/lamini/actions/evaluate-job/evaluate-job.mjs @@ -0,0 +1,43 @@ +import app from "../../lamini.app.mjs"; + +export default { + key: "lamini-evaluate-job", + name: "Evaluate Job", + description: "Evaluate a fine-tuning job by job ID. [See the documentation](https://docs.lamini.ai/api/).", + version: "0.0.1", + type: "action", + props: { + app, + jobId: { + propDefinition: [ + app, + "jobId", + ], + description: "The ID of the fine-tuning job to evaluate.", + }, + }, + methods: { + evaluateJob({ + jobId, ...args + } = {}) { + return this.app.makeRequest({ + path: `/finetune_eval/jobs/${jobId}`, + ...args, + }); + }, + }, + async run({ $ }) { + const { + evaluateJob, + jobId, + } = this; + + const response = await evaluateJob({ + $, + jobId, + }); + + $.export("$summary", `Successfully evaluated job with ID \`${jobId}\`.`); + return response; + }, +}; diff --git a/components/lamini/actions/generate-completion/generate-completion.mjs b/components/lamini/actions/generate-completion/generate-completion.mjs new file mode 100644 index 0000000000000..e4e8510de22a0 --- /dev/null +++ b/components/lamini/actions/generate-completion/generate-completion.mjs @@ -0,0 +1,75 @@ +import app from "../../lamini.app.mjs"; +import utils from "../../common/utils.mjs"; + +export default { + key: "lamini-generate-completion", + name: "Generate Completion", + description: "Generate completions using a Lamini model. [See the documentation](https://docs.lamini.ai/api/).", + version: "0.0.1", + type: "action", + props: { + app, + modelName: { + propDefinition: [ + app, + "modelName", + ], + description: "The model to use for completion.", + }, + prompt: { + type: "string", + label: "Prompt", + description: "The prompt to send to the model.", + }, + outputType: { + propDefinition: [ + app, + "outputType", + ], + }, + maxTokens: { + propDefinition: [ + app, + "maxTokens", + ], + }, + maxNewTokens: { + propDefinition: [ + app, + "maxNewTokens", + ], + }, + }, + methods: { + generateCompletion(args = {}) { + return this.app.post({ + path: "/completions", + ...args, + }); + }, + }, + async run({ $ }) { + const { + generateCompletion, + modelName, + prompt, + outputType, + maxTokens, + maxNewTokens, + } = this; + + const response = await generateCompletion({ + $, + data: { + model_name: modelName, + prompt, + output_type: utils.parseJson(outputType), + max_tokens: maxTokens, + max_new_tokens: maxNewTokens, + }, + }); + + $.export("$summary", `Successfully generated completion for prompt with model ${modelName}.`); + return response; + }, +}; diff --git a/components/lamini/actions/get-batch-completions/get-batch-completions.mjs b/components/lamini/actions/get-batch-completions/get-batch-completions.mjs new file mode 100644 index 0000000000000..6d4bec55e3ff7 --- /dev/null +++ b/components/lamini/actions/get-batch-completions/get-batch-completions.mjs @@ -0,0 +1,88 @@ +import app from "../../lamini.app.mjs"; +import utils from "../../common/utils.mjs"; + +export default { + key: "lamini-get-batch-completions", + name: "Get Batch Completions", + description: "Retrieve the results of a batch completion request from Lamini. [See the documentation](https://docs.lamini.ai/api/).", + version: "0.0.1", + type: "action", + props: { + app, + modelName: { + propDefinition: [ + app, + "modelName", + ], + }, + prompt: { + type: "string[]", + label: "Prompts", + description: "The prompts to use for completion.", + }, + outputType: { + propDefinition: [ + app, + "outputType", + ], + }, + maxTokens: { + propDefinition: [ + app, + "maxTokens", + ], + }, + maxNewTokens: { + propDefinition: [ + app, + "maxNewTokens", + ], + }, + }, + methods: { + submitBatchCompletions(args = {}) { + return this.app.post({ + path: "/batch_completions", + ...args, + }); + }, + getBatchCompletions({ + id, ...args + } = {}) { + return this.app.makeRequest({ + path: `/batch_completions/${id}/result`, + ...args, + }); + }, + }, + async run({ $ }) { + const { + submitBatchCompletions, + getBatchCompletions, + modelName, + prompt, + outputType, + maxTokens, + maxNewTokens, + } = this; + + const { id } = await submitBatchCompletions({ + $, + data: { + model_name: modelName, + prompt, + output_type: utils.parseJson(outputType), + max_tokens: maxTokens, + max_new_tokens: maxNewTokens, + }, + }); + + const response = await getBatchCompletions({ + $, + id, + }); + + $.export("$summary", `Successfully submitted batch completion with ID \`${id}\`.`); + return response; + }, +}; diff --git a/components/lamini/actions/upload-dataset/upload-dataset.mjs b/components/lamini/actions/upload-dataset/upload-dataset.mjs new file mode 100644 index 0000000000000..bd756d405a50c --- /dev/null +++ b/components/lamini/actions/upload-dataset/upload-dataset.mjs @@ -0,0 +1,117 @@ +import fs from "fs"; +import { ConfigurationError } from "@pipedream/platform"; +import app from "../../lamini.app.mjs"; +import constants from "../../common/constants.mjs"; + +export default { + key: "lamini-upload-dataset", + name: "Upload Dataset", + description: "Upload a dataset to Lamini for training.", + version: "0.0.1", + type: "action", + props: { + app, + fileUrl: { + type: "string", + label: "Dataset File URL", + description: "URL of the file containing your training data. Supported formats include `.jsonl` and `.jsonlines`. Eg. `https://raw.githubusercontent.com/lamini-ai/lamini-examples/refs/heads/main/data/results/spot_check_results.jsonl`.", + }, + inputKey: { + type: "string", + label: "Input Key", + description: "Key of the JSON dictionary to use as the input. For CSV files, this should be a column header. Eg. `question`.", + }, + outputKey: { + type: "string", + label: "Output Key", + description: "Key of the JSON dictionary to use as the output. For CSV files, this should be a column header. Eg. `answer`.", + optional: true, + }, + isPublic: { + type: "boolean", + label: "Is Public", + description: "Whether this dataset should be publicly accessible.", + optional: true, + }, + }, + methods: { + uploadLocalData(args = {}) { + return this.app.post({ + versionPath: constants.VERSION_PATH.V1, + path: "/local-data", + ...args, + }); + }, + getUploadBasePath() { + return this.app.makeRequest({ + versionPath: constants.VERSION_PATH.V1, + path: "/get-upload-base-path", + }); + }, + }, + async run({ $ }) { + const { + app, + fileUrl, + inputKey, + outputKey, + isPublic, + } = this; + + const { + fileName, filePath, + } = await app.downloadFileToTmp({ + $, + url: fileUrl, + }); + + if (!fileName.endsWith(".jsonl") && !fileName.endsWith(".jsonlines")) { + throw new ConfigurationError(`Unsupported file format for \`${fileName}\`. Only **.jsonl** and **.jsonlines** files are supported.`); + } + + const { upload_base_path: uploadBasePath } = await this.getUploadBasePath(); + + let allData = []; + + const fileContent = fs.readFileSync(filePath, "utf8"); + const lines = fileContent.trim().split("\n"); + + for (const line of lines) { + try { + const row = JSON.parse(line); + if (!row[inputKey]) { + throw new ConfigurationError(`File ${fileName} is missing required key: ${inputKey}`); + } + allData.push({ + input: row[inputKey], + output: row[outputKey] || "", + }); + } catch (e) { + if (e.message.includes("missing required key")) { + throw e; + } + // Skip invalid JSON lines + } + } + + if (allData.length === 0) { + throw new ConfigurationError("No valid data found in the provided files."); + } + + const response = await this.uploadLocalData({ + $, + data: { + upload_base_path: uploadBasePath, + data: allData, + is_public: isPublic, + }, + }); + + $.export("$summary", `Successfully uploaded file as dataset with ID \`${response.dataset_id}\`.`); + + return { + ...response, + num_data_points: allData.length, + }; + }, +}; diff --git a/components/lamini/common/constants.mjs b/components/lamini/common/constants.mjs new file mode 100644 index 0000000000000..e9310ffa053db --- /dev/null +++ b/components/lamini/common/constants.mjs @@ -0,0 +1,14 @@ +const BASE_URL = "https://api.lamini.ai"; + +const VERSION_PATH = { + V1: "/v1", + V2: "/v2", + V3: "/v3", + V1ALPHA: "/v1alpha", + ALPHA: "/alpha", +}; + +export default { + BASE_URL, + VERSION_PATH, +}; diff --git a/components/lamini/common/utils.mjs b/components/lamini/common/utils.mjs new file mode 100644 index 0000000000000..912c17a941700 --- /dev/null +++ b/components/lamini/common/utils.mjs @@ -0,0 +1,34 @@ +const parseJson = (input, maxDepth = 100) => { + const seen = new WeakSet(); + const parse = (value) => { + if (maxDepth <= 0) { + return value; + } + if (typeof(value) === "string") { + try { + return parseJson(JSON.parse(value), maxDepth - 1); + } catch (e) { + return value; + } + } else if (typeof(value) === "object" && value !== null) { + if (seen.has(value)) { + return value; + } + seen.add(value); + return Object.entries(value) + .reduce((acc, [ + key, + val, + ]) => Object.assign(acc, { + [key]: parse(val), + }), {}); + } + return value; + }; + + return parse(input); +}; + +export default { + parseJson, +}; diff --git a/components/lamini/lamini.app.mjs b/components/lamini/lamini.app.mjs index 68531f340f4ac..98adac5f98858 100644 --- a/components/lamini/lamini.app.mjs +++ b/components/lamini/lamini.app.mjs @@ -1,11 +1,174 @@ +import fs from "fs"; +import { + axios, + ConfigurationError, +} from "@pipedream/platform"; +import constants from "./common/constants.mjs"; + export default { type: "app", app: "lamini", - propDefinitions: {}, + propDefinitions: { + modelName: { + type: "string", + label: "Model Name", + description: "The name of the model to use", + async options({ includeFineTunedModels = true }) { + const models = await this.listDownloadedModels(); + const hfModels = models.map(({ model_name: modelName }) => modelName); + + if (!includeFineTunedModels) { + return hfModels; + } + + const jobs = await this.listTrainedJobs(); + const tunedModels = jobs.filter(({ status }) => status === "COMPLETED") + .map(({ + custom_model_name: label, + model_name: value, + }) => ({ + label, + value, + })); + + return hfModels.concat(tunedModels); + }, + }, + jobId: { + type: "string", + label: "Job ID", + description: "The ID of the fine-tuning job to use.", + async options() { + const jobs = await this.listTrainedJobs(); + return jobs + .filter(({ status }) => status === "COMPLETED") + .map(({ + custom_model_name: label, + job_id: value, + }) => ({ + value, + label, + })); + }, + }, + outputType: { + type: "object", + label: "Output Type", + description: "Output type specification for structured outputs. Eg. `{ answer: \"str\" }`.", + }, + maxTokens: { + type: "integer", + label: "Max Tokens", + description: "The maximum number of tokens to generate.", + optional: true, + }, + maxNewTokens: { + type: "integer", + label: "Max New Tokens", + description: "The maximum number of new tokens to generate.", + optional: true, + }, + }, methods: { - // this.$auth contains connected account data - authKeys() { - console.log(Object.keys(this.$auth)); + getUrl(path, versionPath = constants.VERSION_PATH.V1) { + return `${constants.BASE_URL}${versionPath}${path}`; + }, + getHeaders(headers) { + if (headers?.noHeaders) { + return; + } + return { + ...headers, + "Authorization": `Bearer ${this.$auth.api_key}`, + }; + }, + async makeRequest({ + $ = this, path, url, versionPath, headers, ...args + } = {}) { + try { + return await axios($, { + ...args, + url: url || this.getUrl(path, versionPath), + headers: this.getHeaders(headers), + }); + + } catch (error) { + if (error.status === 424 && error.data === "") { + console.log("API is not ready yet. Retrying...", error); + throw new ConfigurationError("The API is not ready yet. Please try again later."); + } + throw error; + } + }, + post(args = {}) { + return this.makeRequest({ + method: "post", + ...args, + }); + }, + completions(args = {}) { + return this.post({ + versionPath: constants.VERSION_PATH.V2, + path: "/completions", + ...args, + }); + }, + listDownloadedModels(args = {}) { + return this.makeRequest({ + versionPath: constants.VERSION_PATH.V1ALPHA, + path: "/downloaded_models", + ...args, + }); + }, + listTrainedJobs(args = {}) { + return this.makeRequest({ + path: "/train/jobs", + ...args, + }); + }, + getJobStatus({ + jobId, ...args + } = {}) { + return this.makeRequest({ + path: `/train/jobs/${jobId}`, + ...args, + }); + }, + getUploadBasePath(args = {}) { + return this.makeRequest({ + path: "/get-upload-base-path", + ...args, + }); + }, + getExistingDataset(args = {}) { + return this.post({ + path: "/existing-data", + ...args, + }); + }, + async downloadFileToTmp({ + $, url, + }) { + const fileName = url.split("/").pop(); + + const resp = await this.makeRequest({ + $, + url, + responseType: "arraybuffer", + headers: { + noHeaders: true, + }, + }); + const rawcontent = resp.toString("base64"); + const buffer = Buffer.from(rawcontent, "base64"); + const filePath = `/tmp/${fileName}`; + + fs.writeFileSync(filePath, buffer); + + return { + fileName, + filePath, + }; }, }, }; diff --git a/components/lamini/package.json b/components/lamini/package.json index 4b9f5853b00ca..5f991c55abace 100644 --- a/components/lamini/package.json +++ b/components/lamini/package.json @@ -1,6 +1,6 @@ { "name": "@pipedream/lamini", - "version": "0.0.1", + "version": "0.1.0", "description": "Pipedream Lamini Components", "main": "lamini.app.mjs", "keywords": [ @@ -11,5 +11,8 @@ "author": "Pipedream (https://pipedream.com/)", "publishConfig": { "access": "public" + }, + "dependencies": { + "@pipedream/platform": "^3.0.3" } -} \ No newline at end of file +} diff --git a/components/lamini/sources/get-job-status/get-job-status.mjs b/components/lamini/sources/get-job-status/get-job-status.mjs new file mode 100644 index 0000000000000..72f3e481689ae --- /dev/null +++ b/components/lamini/sources/get-job-status/get-job-status.mjs @@ -0,0 +1,65 @@ +import { + ConfigurationError, + DEFAULT_POLLING_SOURCE_TIMER_INTERVAL, +} from "@pipedream/platform"; +import app from "../../lamini.app.mjs"; + +export default { + key: "lamini-get-job-status", + name: "New Job Status", + description: "Emit new events with the status of a training job. [See the documentation](https://docs.lamini.ai/api/).", + version: "0.0.1", + type: "source", + props: { + app, + timer: { + type: "$.interface.timer", + label: "Polling Schedule", + description: "How often to poll the API", + default: { + intervalSeconds: DEFAULT_POLLING_SOURCE_TIMER_INTERVAL, + }, + }, + jobId: { + propDefinition: [ + app, + "jobId", + ], + }, + }, + methods: { + processEvent(response) { + const ts = Date.now(); + this.$emit(response, { + id: `${this.jobId}-${ts}`, + summary: `New Status ${response.status}`, + ts, + }); + }, + }, + async run({ $ }) { + const { + app, + processEvent, + jobId, + } = this; + + let response; + + try { + response = await app.getJobStatus({ + $, + jobId, + }); + + } catch (error) { + if (error.status === 524 && error.data === "") { + throw new ConfigurationError(`Job \`${jobId}\` status is not COMPLETED, current status: failed.`); + } + console.log("Failed to fetch job status", JSON.stringify(error.message)); + throw new ConfigurationError("Failed to fetch job status"); + } + + processEvent(response); + }, +}; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e3d5267dd7b43..37425bf31ef2d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -7136,7 +7136,11 @@ importers: components/lambdatest: {} - components/lamini: {} + components/lamini: + dependencies: + '@pipedream/platform': + specifier: ^3.0.3 + version: 3.0.3 components/landbot: dependencies: @@ -11169,8 +11173,7 @@ importers: specifier: ^3.0.3 version: 3.0.3 - components/ringg_ai: - specifiers: {} + components/ringg_ai: {} components/ringover: dependencies: @@ -35904,8 +35907,6 @@ snapshots: '@putout/operator-filesystem': 5.0.0(putout@36.13.1(eslint@8.57.1)(typescript@5.6.3)) '@putout/operator-json': 2.2.0 putout: 36.13.1(eslint@8.57.1)(typescript@5.6.3) - transitivePeerDependencies: - - supports-color '@putout/operator-regexp@1.0.0(putout@36.13.1(eslint@8.57.1)(typescript@5.6.3))': dependencies: