diff --git a/package.json b/package.json
index 361158609..00d1d3622 100644
--- a/package.json
+++ b/package.json
@@ -24,7 +24,7 @@
   "scripts": {
     "format": "prettier --write .",
     "format:check": "prettier --check .",
-    "typegen": "tsc ./src/transformers.js --allowJs --declaration --emitDeclarationOnly --declarationMap --outDir types",
+    "typegen": "tsc --build",
     "dev": "webpack serve --no-client-overlay",
     "build": "webpack && npm run typegen",
     "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --verbose",
diff --git a/src/base/image_processors_utils.js b/src/base/image_processors_utils.js
index 6788258f6..ce5bf2298 100644
--- a/src/base/image_processors_utils.js
+++ b/src/base/image_processors_utils.js
@@ -604,14 +604,20 @@ export class ImageProcessor extends Callable {
         this.do_thumbnail = config.do_thumbnail;
         this.size = config.size ?? config.image_size;
         this.do_resize = config.do_resize ?? (this.size !== undefined);
+        // @ts-expect-error TS2339
         this.size_divisibility = config.size_divisibility ?? config.size_divisor;
 
         this.do_center_crop = config.do_center_crop;
+        // @ts-expect-error TS2339
         this.crop_size = config.crop_size;
+        // @ts-expect-error TS2339
         this.do_convert_rgb = config.do_convert_rgb ?? true;
+        // @ts-expect-error TS2339
         this.do_crop_margin = config.do_crop_margin;
 
+        // @ts-expect-error TS2339
         this.pad_size = config.pad_size;
+        // @ts-expect-error TS2339
         this.do_pad = config.do_pad;
 
         if (this.do_pad && !this.pad_size && this.size && this.size.width !== undefined && this.size.height !== undefined) {
@@ -818,6 +824,7 @@ export class ImageProcessor extends Callable {
         // Support both formats for backwards compatibility
         else if (Number.isInteger(size)) {
             shortest_edge = size;
+            // @ts-expect-error TS2339
             longest_edge = this.config.max_size ?? shortest_edge;
 
         } else if (size !== undefined) {
@@ -886,6 +893,7 @@ export class ImageProcessor extends Callable {
         } else if (size.min_pixels !== undefined && size.max_pixels !== undefined) {
             // Custom resize logic for Qwen2-VL models
             const { min_pixels, max_pixels } = size;
+            // @ts-expect-error TS2339
             const factor = this.config.patch_size * this.config.merge_size;
             return smart_resize(srcHeight, srcWidth, factor, min_pixels, max_pixels);
         } else {
@@ -901,6 +909,7 @@ export class ImageProcessor extends Callable {
     async resize(image) {
         const [newWidth, newHeight] = this.get_resize_output_image_size(image, this.size);
         return await image.resize(newWidth, newHeight, {
+            // @ts-expect-error TS2322
             resample: this.resample,
         });
     }
@@ -951,6 +960,7 @@ export class ImageProcessor extends Callable {
 
         // Resize the image using thumbnail method.
         if (this.do_thumbnail) {
+            // @ts-expect-error TS2345
             image = await this.thumbnail(image, this.size, this.resample);
         }
 
@@ -975,6 +985,7 @@ export class ImageProcessor extends Callable {
         // NOTE: All pixel-level manipulation (i.e., modifying `pixelData`)
         // occurs with data in the hwc format (height, width, channels), 
         // to emulate the behavior of the original Python code (w/ numpy).
+        /** @type {Float32Array} */
         let pixelData = Float32Array.from(image.data);
         let imgDims = [image.height, image.width, image.channels];
 
diff --git a/src/base/processing_utils.js b/src/base/processing_utils.js
index 2e457e20d..ecc1c6155 100644
--- a/src/base/processing_utils.js
+++ b/src/base/processing_utils.js
@@ -28,6 +28,7 @@ import { getModelJSON } from '../utils/hub.js';
 /**
  * @typedef {Object} ProcessorProperties Additional processor-specific properties.
  * @typedef {import('../utils/hub.js').PretrainedOptions & ProcessorProperties} PretrainedProcessorOptions
+ * @typedef {import('../tokenizers.js').PreTrainedTokenizer} PreTrainedTokenizer
  */
 
 
@@ -61,7 +62,7 @@ export class Processor extends Callable {
     }
 
     /**
-     * @returns {import('../tokenizers.js').PreTrainedTokenizer|undefined} The tokenizer of the processor, if it exists.
+     * @returns {PreTrainedTokenizer|undefined} The tokenizer of the processor, if it exists.
      */
     get tokenizer() {
         return this.components.tokenizer;
@@ -74,6 +75,11 @@ export class Processor extends Callable {
         return this.components.feature_extractor;
     }
 
+    /**
+     * @param {Parameters<PreTrainedTokenizer['apply_chat_template']>[0]} messages
+     * @param {Parameters<PreTrainedTokenizer['apply_chat_template']>[1]} options
+     * @returns {ReturnType<PreTrainedTokenizer['apply_chat_template']>}
+     */
     apply_chat_template(messages, options = {}) {
         if (!this.tokenizer) {
             throw new Error('Unable to apply chat template without a tokenizer.');
@@ -84,6 +90,10 @@ export class Processor extends Callable {
         });
     }
 
+    /**
+     * @param {Parameters<PreTrainedTokenizer['batch_decode']>} args
+     * @returns {ReturnType<PreTrainedTokenizer['batch_decode']>}
+     */
     batch_decode(...args) {
         if (!this.tokenizer) {
             throw new Error('Unable to decode without a tokenizer.');
diff --git a/src/configs.js b/src/configs.js
index a40bb59d9..ce01139e6 100644
--- a/src/configs.js
+++ b/src/configs.js
@@ -70,15 +70,19 @@ function getNormalizedConfig(config) {
         case 'florence2':
         case 'llava_onevision':
         case 'idefics3':
+            // @ts-expect-error TS2339
             init_normalized_config = getNormalizedConfig(config.text_config);
             break;
         case 'moondream1':
+            // @ts-expect-error TS2339
             init_normalized_config = getNormalizedConfig(config.phi_config);
             break;
         case 'musicgen':
+            // @ts-expect-error TS2339
             init_normalized_config = getNormalizedConfig(config.decoder);
             break;
         case 'multi_modality':
+            // @ts-expect-error TS2339
             init_normalized_config = getNormalizedConfig(config.language_config);
             break;
 
@@ -191,6 +195,7 @@ function getNormalizedConfig(config) {
             break;
 
         case 'vision-encoder-decoder':
+            // @ts-expect-error TS2339
             const decoderConfig = getNormalizedConfig(config.decoder);
 
             const add_encoder_pkv = 'num_decoder_layers' in decoderConfig;
diff --git a/src/models.js b/src/models.js
index f8242b5f0..fe4d46365 100644
--- a/src/models.js
+++ b/src/models.js
@@ -269,8 +269,11 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
     } else if (session_options.externalData !== undefined) {
         externalDataPromises = session_options.externalData.map(async (ext) => {
             // if the external data is a string, fetch the file and replace the string with its content
+            // @ts-expect-error TS2339
             if (typeof ext.data === "string") {
+                // @ts-expect-error TS2339
                 const ext_buffer = await getModelFile(pretrained_model_name_or_path, ext.data, true, options);
+                // @ts-expect-error TS2698
                 return { ...ext, data: ext_buffer };
             }
             return ext;
@@ -1502,6 +1505,7 @@ export class PreTrainedModel extends Callable {
                 if (this.config.model_type === 'musicgen') {
                     // Custom logic (TODO: move to Musicgen class)
                     decoder_input_ids = Array.from({
+                        // @ts-expect-error TS2339
                         length: batch_size * this.config.decoder.num_codebooks
                     }, () => [decoder_start_token_id]);
 
@@ -1831,11 +1835,13 @@ export class PreTrainedModel extends Callable {
     async encode_image({ pixel_values }) {
         // image_inputs === { pixel_values }
         const features = (await sessionRun(this.sessions['vision_encoder'], { pixel_values })).image_features;
+        // @ts-expect-error TS2339
         if (!this.config.num_image_tokens) {
             console.warn(
                 'The number of image tokens was not set in the model configuration. ' +
                 `Setting it to the number of features detected by the vision encoder (${features.dims[1]}).`
             )
+            // @ts-expect-error TS2339
             this.config.num_image_tokens = features.dims[1];
         }
         return features;
@@ -3220,6 +3226,7 @@ export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
 
         if (generation_config.return_token_timestamps) {
             outputs["token_timestamps"] = this._extract_token_timestamps(
+                // @ts-expect-error TS2345
                 outputs,
                 generation_config.alignment_heads,
                 generation_config.num_frames,
@@ -3255,6 +3262,7 @@ export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
             );
         }
 
+        // @ts-expect-error TS2339
         let median_filter_width = this.config.median_filter_width;
         if (median_filter_width === undefined) {
             console.warn("Model config has no `median_filter_width`, using default value of 7.")
@@ -3265,6 +3273,7 @@ export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
         const batch = generate_outputs.cross_attentions;
         // Create a list with `decoder_layers` elements, each a tensor of shape
         // (batch size, attention_heads, output length, input length).
+        // @ts-expect-error TS2339
         const cross_attentions = Array.from({ length: this.config.decoder_layers },
             // Concatenate the cross attentions for each layer across sequence length dimension.
             (_, i) => cat(batch.map(x => x[i]), 2)
@@ -3385,6 +3394,7 @@ export class LlavaForConditionalGeneration extends LlavaPreTrainedModel {
         attention_mask,
     }) {
 
+        // @ts-expect-error TS2339
         const image_token_index = this.config.image_token_index;
 
         const idsList = input_ids.tolist();
@@ -6003,10 +6013,12 @@ export class SpeechT5ForTextToSpeech extends SpeechT5PreTrainedModel {
 
         const { encoder_outputs, encoder_attention_mask } = await encoderForward(this, model_inputs);
 
+        // @ts-expect-error TS2339
         const r = encoder_outputs.dims[1] / this.config.reduction_factor;
         const maxlen = Math.floor(r * maxlenratio);
         const minlen = Math.floor(r * minlenratio);
 
+        // @ts-expect-error TS2339
         const num_mel_bins = this.config.num_mel_bins;
 
         let spectrogramParts = [];
@@ -6367,11 +6379,13 @@ export class MusicgenForConditionalGeneration extends PreTrainedModel { // NOTE:
      */
     _apply_and_filter_by_delay_pattern_mask(outputs) {
         const [bs_x_codebooks, seqLength] = outputs.dims;
+        // @ts-expect-error TS2339
         const num_codebooks = this.config.decoder.num_codebooks;
         const upperBound = (seqLength - num_codebooks);
 
         let newDataSize = 0;
         for (let i = 0; i < outputs.size; ++i) {
+            // @ts-expect-error TS2339
             if (outputs.data[i] === this.config.decoder.pad_token_id) {
                 continue;
             }
@@ -6401,7 +6415,9 @@ export class MusicgenForConditionalGeneration extends PreTrainedModel { // NOTE:
         let clonedInputIds = structuredClone(input_ids);
         for (let i = 0; i < clonedInputIds.length; ++i) {
             for (let j = 0; j < clonedInputIds[i].length; ++j) {
+                // @ts-expect-error TS2339
                 if ((i % this.config.decoder.num_codebooks) >= j) {
+                    // @ts-expect-error TS2339
                     clonedInputIds[i][j] = BigInt(this.config.decoder.pad_token_id);
                 }
             }
@@ -6558,6 +6574,9 @@ export class MultiModalityCausalLM extends MultiModalityPreTrainedModel {
         'past_key_values',
     ];
 
+    /**
+     * @param {ConstructorParameters<typeof MultiModalityPreTrainedModel>} args
+     */
     constructor(...args) {
         super(...args);
 
diff --git a/src/models/convnext/image_processing_convnext.js b/src/models/convnext/image_processing_convnext.js
index 525e736cd..7b867a60a 100644
--- a/src/models/convnext/image_processing_convnext.js
+++ b/src/models/convnext/image_processing_convnext.js
@@ -9,6 +9,7 @@ export class ConvNextImageProcessor extends ImageProcessor {
         /**
          * Percentage of the image to crop. Only has an effect if this.size < 384.
          */
+        // @ts-expect-error TS2339
         this.crop_pct = this.config.crop_pct ?? (224 / 256);
     }
 
diff --git a/src/models/efficientnet/image_processing_efficientnet.js b/src/models/efficientnet/image_processing_efficientnet.js
index 9fde87156..29a594e18 100644
--- a/src/models/efficientnet/image_processing_efficientnet.js
+++ b/src/models/efficientnet/image_processing_efficientnet.js
@@ -5,6 +5,7 @@ import {
 export class EfficientNetImageProcessor extends ImageProcessor {
     constructor(config) {
         super(config);
+        // @ts-expect-error TS2339
         this.include_top = this.config.include_top ?? true;
         if (this.include_top) {
             this.image_std = this.image_std.map(x => x * x);
diff --git a/src/models/florence2/processing_florence2.js b/src/models/florence2/processing_florence2.js
index ec644df25..5c1dfcc24 100644
--- a/src/models/florence2/processing_florence2.js
+++ b/src/models/florence2/processing_florence2.js
@@ -10,8 +10,11 @@ export class Florence2Processor extends Processor {
         super(config, components);
 
         const {
+            // @ts-expect-error TS2339
             tasks_answer_post_processing_type,
+            // @ts-expect-error TS2339
             task_prompts_without_inputs,
+            // @ts-expect-error TS2339
             task_prompts_with_input,
         } = this.image_processor.config;
 
diff --git a/src/models/janus/image_processing_janus.js b/src/models/janus/image_processing_janus.js
index 4dae64ff4..ff191855a 100644
--- a/src/models/janus/image_processing_janus.js
+++ b/src/models/janus/image_processing_janus.js
@@ -13,6 +13,7 @@ export class VLMImageProcessor extends ImageProcessor {
             },
             ...config,
         });
+        // @ts-expect-error TS2339
         this.constant_values = this.config.background_color.map(x => x * this.rescale_factor)
     }
 
diff --git a/src/models/mgp_str/processing_mgp_str.js b/src/models/mgp_str/processing_mgp_str.js
index eb4dbdf6e..a4da992cc 100644
--- a/src/models/mgp_str/processing_mgp_str.js
+++ b/src/models/mgp_str/processing_mgp_str.js
@@ -119,6 +119,8 @@ export class MgpstrProcessor extends Processor {
      * - bpe_preds: The list of BPE decoded sentences.
      * - wp_preds: The list of wp decoded sentences.
      */
+    // @ts-expect-error The type of this method is not compatible with the one
+    // in the base class. It might be a good idea to fix this.
     batch_decode([char_logits, bpe_logits, wp_logits]) {
         const [char_preds, char_scores] = this._decode_helper(char_logits, 'char');
         const [bpe_preds, bpe_scores] = this._decode_helper(bpe_logits, 'bpe');
diff --git a/src/models/paligemma/processing_paligemma.js b/src/models/paligemma/processing_paligemma.js
index 9fb580cd8..63f50b99d 100644
--- a/src/models/paligemma/processing_paligemma.js
+++ b/src/models/paligemma/processing_paligemma.js
@@ -41,6 +41,7 @@ export class PaliGemmaProcessor extends Processor {
         }
 
         const bos_token = this.tokenizer.bos_token;
+        // @ts-expect-error TS2339
         const image_seq_length = this.image_processor.config.image_seq_length;
         let input_strings;
         if (text.some((t) => t.includes(IMAGE_TOKEN))) {
diff --git a/src/models/qwen2_vl/processing_qwen2_vl.js b/src/models/qwen2_vl/processing_qwen2_vl.js
index d5f05535b..692987f86 100644
--- a/src/models/qwen2_vl/processing_qwen2_vl.js
+++ b/src/models/qwen2_vl/processing_qwen2_vl.js
@@ -28,6 +28,7 @@ export class Qwen2VLProcessor extends Processor {
         }
 
         if (image_grid_thw) {
+            // @ts-expect-error TS2551
             let merge_length = this.image_processor.config.merge_size ** 2;
             let index = 0;
 
diff --git a/src/pipelines.js b/src/pipelines.js
index a61cb1dde..ce8dbd9cc 100644
--- a/src/pipelines.js
+++ b/src/pipelines.js
@@ -294,6 +294,7 @@ export class TextClassificationPipeline extends (/** @type {new (options: TextPi
 
         // TODO: Use softmax tensor function
         const function_to_apply =
+            // @ts-expect-error TS2339
             this.model.config.problem_type === 'multi_label_classification'
                 ? batch => batch.sigmoid()
                 : batch => new Tensor(
@@ -302,6 +303,7 @@ export class TextClassificationPipeline extends (/** @type {new (options: TextPi
                     batch.dims,
                 ); // single_label_classification (default)
 
+        // @ts-expect-error TS2339
         const id2label = this.model.config.id2label;
 
         const toReturn = [];
@@ -404,6 +406,7 @@ export class TokenClassificationPipeline extends (/** @type {new (options: TextP
         const outputs = await this.model(model_inputs)
 
         const logits = outputs.logits;
+        // @ts-expect-error TS2339
         const id2label = this.model.config.id2label;
 
         const toReturn = [];
@@ -743,11 +746,14 @@ export class Text2TextGenerationPipeline extends (/** @type {new (options: TextP
 
 
         // Add global prefix, if present
+        // @ts-expect-error TS2339
         if (this.model.config.prefix) {
+            // @ts-expect-error TS2339
             texts = texts.map(x => this.model.config.prefix + x)
         }
 
         // Handle task specific params:
+        // @ts-expect-error TS2339
         const task_specific_params = this.model.config.task_specific_params
         if (task_specific_params && task_specific_params[this.task]) {
             // Add prefixes, if present
@@ -1486,6 +1492,7 @@ export class AudioClassificationPipeline extends (/** @type {new (options: Audio
         const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
         const preparedAudios = await prepareAudios(audio, sampling_rate);
 
+        // @ts-expect-error TS2339
         const id2label = this.model.config.id2label;
 
         const toReturn = [];
@@ -1794,6 +1801,7 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
             audio = [/** @type {AudioInput} */ (audio)];
         }
 
+        // @ts-expect-error TS2339
         const time_precision = this.processor.feature_extractor.config.chunk_length / this.model.config.max_source_positions;
         const hop_length = this.processor.feature_extractor.config.hop_length;
 
@@ -1859,7 +1867,9 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
 
                 // TODO: Right now we only get top beam
                 if (return_timestamps === 'word') {
+                    // @ts-expect-error TS2339
                     chunk.tokens = data.sequences.tolist()[0];
+                    // @ts-expect-error TS2339
                     chunk.token_timestamps = data.token_timestamps.tolist()[0].map(
                         (/** @type {number} */ x) => round(x, 2)
                     );
@@ -2025,6 +2035,7 @@ export class ImageClassificationPipeline extends (/** @type {new (options: Image
         const { pixel_values } = await this.processor(preparedImages);
         const output = await this.model({ pixel_values });
 
+        // @ts-expect-error TS2339
         const id2label = this.model.config.id2label;
 
         /** @type {ImageClassificationOutput[]} */
@@ -2139,6 +2150,7 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
             }
         }
 
+        // @ts-expect-error TS2339
         const id2label = this.model.config.id2label;
 
         /** @type {ImageSegmentationPipelineOutput[]} */
@@ -2365,6 +2377,7 @@ export class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipe
         const processed = this.processor.image_processor.post_process_object_detection(output, threshold, imageSizes);
 
         // Add labels
+        // @ts-expect-error TS2339
         const id2label = this.model.config.id2label;
 
         // Format output
@@ -2584,6 +2597,7 @@ export class DocumentQuestionAnsweringPipeline extends (/** @type {new (options:
         // Run model
         const output = await this.model.generate({
             inputs: pixel_values,
+            // @ts-expect-error TS2339
             max_length: this.model.config.decoder.max_position_embeddings,
             decoder_input_ids,
             ...generate_kwargs,
@@ -2699,6 +2713,7 @@ export class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPi
         // Generate waveform
         const { waveform } = await this.model(inputs);
 
+        // @ts-expect-error TS2339
         const sampling_rate = this.model.config.sampling_rate;
         return {
             audio: waveform.data,
@@ -3338,4 +3353,4 @@ async function loadItems(mapping, model, pretrainedOptions) {
     }
 
     return result;
-}
\ No newline at end of file
+}
diff --git a/src/utils/dtypes.js b/src/utils/dtypes.js
index 0d6e190e0..845eef5e0 100644
--- a/src/utils/dtypes.js
+++ b/src/utils/dtypes.js
@@ -1,3 +1,5 @@
+/// <reference types="@webgpu/types" />
+
 import { apis } from "../env.js";
 
 import { DEVICE_TYPES } from "./devices.js";
diff --git a/src/utils/hub.js b/src/utils/hub.js
index c5fcfacf1..17ee4c1b1 100755
--- a/src/utils/hub.js
+++ b/src/utils/hub.js
@@ -121,7 +121,7 @@ class FileResponse {
      */
     async arrayBuffer() {
         const data = await fs.promises.readFile(this.filePath);
-        return data.buffer;
+        return /** @type {ArrayBuffer} */ (data.buffer);
     }
 
     /**
diff --git a/jsconfig.json b/tsconfig.json
similarity index 56%
rename from jsconfig.json
rename to tsconfig.json
index 9af7d54be..fb6de7097 100644
--- a/jsconfig.json
+++ b/tsconfig.json
@@ -6,7 +6,14 @@
     "checkJs": true,
     "target": "esnext",
     "module": "nodenext",
-    "moduleResolution": "nodenext"
+    "moduleResolution": "nodenext",
+    "outDir": "types",
+    "strict": false,
+    "skipLibCheck": true,
+    "declaration": true,
+    "declarationMap": true,
+    "noEmit": false,
+    "emitDeclarationOnly": true
   },
   "typeAcquisition": {
     "include": ["jest"]