diff --git a/nbs/common.base_model.ipynb b/nbs/common.base_model.ipynb
index b5c6585a1..8d1722ac8 100644
--- a/nbs/common.base_model.ipynb
+++ b/nbs/common.base_model.ipynb
@@ -154,6 +154,7 @@
     "        windows_batch_size: int,\n",
     "        inference_windows_batch_size: Union[int, None],\n",
     "        start_padding_enabled: bool,\n",
+    "        training_data_availability_threshold: Union[float, List[float]] = 0.0,\n",
     "        n_series: Union[int, None] = None,\n",
     "        n_samples: Union[int, None] = 100,\n",
     "        h_train: int = 1,\n",
@@ -358,6 +359,28 @@
     "        else:\n",
     "            self.inference_windows_batch_size = inference_windows_batch_size\n",
     "\n",
+    "        # Filtering training windows by available sample fractions\n",
+    "        if isinstance(training_data_availability_threshold, int):\n",
+    "            raise ValueError(\"training_data_availability_threshold cannot be an integer - must be a float\")\n",
+    "        elif isinstance(training_data_availability_threshold, float):\n",
+    "            if training_data_availability_threshold < 0.0 or training_data_availability_threshold > 1.0:\n",
+    "                raise ValueError(f\"training_data_availability_threshold must be between 0.0 and 1.0, got {training_data_availability_threshold}\")\n",
+    "            self.min_insample_fraction = training_data_availability_threshold\n",
+    "            self.min_outsample_fraction = training_data_availability_threshold\n",
+    "        elif isinstance(training_data_availability_threshold, (list, tuple)) and len(training_data_availability_threshold) == 2:\n",
+    "            for i, value in enumerate(training_data_availability_threshold):\n",
+    "                if isinstance(value, int):\n",
+    "                    raise ValueError(f\"training_data_availability_threshold[{i}] cannot be an integer - must be a float\")\n",
+    "                if not isinstance(value, float):\n",
+    "                    raise ValueError(f\"training_data_availability_threshold[{i}] must be a float\")\n",
+    "                if value < 0.0 or value > 1.0:\n",
+    "                    raise ValueError(f\"training_data_availability_threshold[{i}] must be between 0.0 and 1.0, got {value}\")\n",
+    "            \n",
+    "            self.min_insample_fraction = training_data_availability_threshold[0]\n",
+    "            self.min_outsample_fraction = training_data_availability_threshold[1]\n",
+    "        else:\n",
+    "            raise ValueError(\"training_data_availability_threshold must be a float or a list/tuple of two floats\")\n",
+    "\n",
     "        # Optimization \n",
     "        self.learning_rate = learning_rate\n",
     "        self.max_steps = max_steps\n",
@@ -674,16 +697,20 @@
     "                windows = windows.flatten(0, 1)\n",
     "                windows = windows.unsqueeze(-1)\n",
     "\n",
-    "            # Sample and Available conditions\n",
-    "            available_idx = temporal_cols.get_loc('available_mask')           \n",
-    "            available_condition = windows[:, :self.input_size, available_idx]\n",
-    "            available_condition = torch.sum(available_condition, axis=(1, -1)) # Sum over time & series dimension\n",
-    "            final_condition = (available_condition > 0)\n",
-    "            \n",
+    "            # Calculate minimum required available points based on fractions\n",
+    "            min_insample_points = max(1, int(self.input_size * self.min_insample_fraction * self.n_series))\n",
+    "            min_outsample_points = max(1, int(self.h * self.min_outsample_fraction * self.n_series))\n",
+    "\n",
+    "            # Sample based on available conditions\n",
+    "            available_idx = temporal_cols.get_loc(\"available_mask\")\n",
+    "            insample_condition = windows[:, : self.input_size, available_idx]\n",
+    "            insample_condition = torch.sum(insample_condition, axis=(1, -1))  # Sum over time & series dimension\n",
+    "            final_condition = insample_condition >= min_insample_points\n",
+    "\n",
     "            if self.h > 0:\n",
-    "                sample_condition = windows[:, self.input_size:, available_idx]\n",
-    "                sample_condition = torch.sum(sample_condition, axis=(1, -1)) # Sum over time & series dimension\n",
-    "                final_condition = (sample_condition > 0) & (available_condition > 0)\n",
+    "                outsample_condition = windows[:, self.input_size :, available_idx]\n",
+    "                outsample_condition = torch.sum(outsample_condition, axis=(1, -1))  # Sum over time & series dimension\n",
+    "                final_condition = (outsample_condition >= min_outsample_points) & (insample_condition >= min_insample_points)\n",
     "            \n",
     "            windows = windows[final_condition]\n",
     "\n",
diff --git a/nbs/core.ipynb b/nbs/core.ipynb
index 36c0e75fc..8a15db7f6 100644
--- a/nbs/core.ipynb
+++ b/nbs/core.ipynb
@@ -3676,6 +3676,319 @@
     "       'LSTM', 'LSTM1', 'LSTM1-median', 'LSTM2_ql0.5', 'TSMixer', 'TSMixer1',\n",
     "       'TSMixer1-median', 'TSMixer2_ql0.5']"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a67cf15a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "# test training_data_availability_threshold\n",
+    "AirPassengersPanel_train = AirPassengersPanel[AirPassengersPanel['ds'] < AirPassengersPanel['ds'].values[-12]].reset_index(drop=True)\n",
+    "\n",
+    "def count_training_windows(model, data):\n",
+    "    \"\"\"Helper function to count training windows\"\"\"\n",
+    "    \n",
+    "    # Monkey patch the _create_windows method to capture window counts\n",
+    "    original_create_windows = model._create_windows\n",
+    "    window_counts = {}\n",
+    "    \n",
+    "    def patched_create_windows(batch, step):\n",
+    "        windows, static, static_cols = original_create_windows(batch, step)\n",
+    "        if step == 'train':\n",
+    "            window_counts['total_windows'] = windows.shape[0]\n",
+    "        return windows, static, static_cols\n",
+    "    \n",
+    "    model._create_windows = patched_create_windows\n",
+    "    \n",
+    "    nf = NeuralForecast(models=[model], freq=\"M\")\n",
+    "    nf.fit(data)\n",
+    "    \n",
+    "    model._create_windows = original_create_windows\n",
+    "    \n",
+    "    return window_counts.get('total_windows', 0)\n",
+    "\n",
+    "# Theoretical window count for a univariate model, start_padding_enabled=False\n",
+    "m = AirPassengersPanel_train['unique_id'].nunique()\n",
+    "n = AirPassengersPanel_train['ds'].nunique()\n",
+    "h = 12\n",
+    "input_size = 24\n",
+    "step_size = 1\n",
+    "n_windows = m * (((n - input_size) / step_size + 1) - (n % h == 0))\n",
+    "\n",
+    "## Test NHITS\n",
+    "# Get max count from default behavior\n",
+    "nhits_model = NHITS(\n",
+    "    h=h, \n",
+    "    input_size=input_size,\n",
+    "    max_steps=2)\n",
+    "            \n",
+    "max_count_nhits = count_training_windows(nhits_model, AirPassengersPanel_train)\n",
+    "assert max_count_nhits == n_windows, f\"Expected {n_windows} windows, got {max_count_nhits}\"\n",
+    "\n",
+    "# Test with threshold\n",
+    "nhits_model = NHITS(h=h, \n",
+    "              input_size=input_size, \n",
+    "              training_data_availability_threshold=0.2, # Enforces at least 4 insample points and 2 outsample points\n",
+    "              max_steps=2)\n",
+    "            \n",
+    "count = count_training_windows(nhits_model, AirPassengersPanel_train) # Should now have max_count - 2 windows\n",
+    "assert count == max_count_nhits - 2, f\"Expected {max_count_nhits - 2} windows, got {count}\"\n",
+    "\n",
+    "# Theoretical window count for a univariate model, start_padding_enabled=True\n",
+    "n_windows = m * (((n - 1) / step_size + 1) - (n % h == 0))\n",
+    "\n",
+    "## Test NHITS\n",
+    "# Get max count from default behavior\n",
+    "nhits_model = NHITS(\n",
+    "    h=12, \n",
+    "    input_size=12,\n",
+    "    training_data_availability_threshold=0.0, \n",
+    "    start_padding_enabled=True,\n",
+    "    max_steps=2)\n",
+    "            \n",
+    "max_count_nhits = count_training_windows(nhits_model, AirPassengersPanel_train)\n",
+    "assert max_count_nhits == n_windows, f\"Expected {n_windows} windows, got {max_count_nhits}\"\n",
+    "\n",
+    "# Test with threshold\n",
+    "nhits_model = NHITS(h=h, \n",
+    "              input_size=input_size, \n",
+    "              training_data_availability_threshold=0.2, # Enforces at least 4 insample points and 2 outsample points\n",
+    "              start_padding_enabled=True,\n",
+    "              max_steps=2)\n",
+    "            \n",
+    "count = count_training_windows(nhits_model, AirPassengersPanel_train) # Should now have max_count - (4 * 2) windows\n",
+    "assert count == max_count_nhits - 8, f\"Expected {max_count_nhits - 8} windows, got {count}\"\n",
+    "\n",
+    "## Test invalid parameters\n",
+    "invalid_cases = [\n",
+    "        (1, \"integer (should be float)\"),\n",
+    "        (-0.1, \"negative value\"),\n",
+    "        (1.5, \"value > 1.0\"),\n",
+    "        ([1, 0.5], \"list with integer\"),\n",
+    "        ([0.5, 1.5], \"list with value > 1.0\"),\n",
+    "        ([0.5], \"list with wrong length\"),\n",
+    "        (\"0.5\", \"string value\")\n",
+    "    ]\n",
+    "\n",
+    "for invalid_value, description in invalid_cases:\n",
+    "    try:\n",
+    "        model = NHITS(h=12, input_size=12, \n",
+    "                     training_data_availability_threshold=invalid_value, \n",
+    "                     max_steps=2)\n",
+    "        assert False, f\"{description} should have failed\"\n",
+    "    except (ValueError, TypeError):\n",
+    "        pass \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fe79d19f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "# We are given m timeseries with n observations each, and we want to create a univariate forecast model with horizon h, input_size s, and step_size k. We will create windows of length h + s to train our model, and we will slide the window with step_size k.\n",
+    "# \n",
+    "# We consider two different cases:\n",
+    "# 1. start_padding_enabled = False: we pad the timeseries with h zeros at the beginning, so that we can create at least one window, because it could be that n < h.\n",
+    "# 2. start_padding_enabled = True: we pad the timeseries with s - 1 zeros at the front, and h zeros at the back, allowing the model to learn from windows with mostly zeros at the beginning.\n",
+    "#\n",
+    "# EXAMPLE. We have 2 (m) timeseries with 132 (n) observations each, and we want to create a univariate forecast model with horizon (h) 12, input_size (s) 24, and step_size (k) 1. Now, we can calculate the number of windows that we can create for each case.\n",
+    "# \n",
+    "# \n",
+    "# Case 1: start_padding_enabled = False\n",
+    "# We first pad the timeseries with h zeros at the beginning. Our window length is h + s, so we can create the following number of windows (see also: https://docs.pytorch.org/docs/stable/generated/torch.Tensor.unfold.html):\n",
+    "#  \n",
+    "# n_windows = m * (((n + h) - (h + s)) / k + 1)\n",
+    "# n_windows = m * ((n + h - h - s) / k + 1)\n",
+    "# n_windows = m * ((n - s) / k + 1)\n",
+    "# n_windows = 2 * ((132 - 24) / 1 + 1)\n",
+    "# n_windows = 218\n",
+    "# \n",
+    "# Case 2: start_padding_enabled = True\n",
+    "# \n",
+    "# n_windows = m * (((n + h + s - 1) - (h + s)) / k + 1)\n",
+    "# n_windows = m * ((n + h + s - 1 - h - s) / k + 1)\n",
+    "# n_windows = m * ((n - 1) / k + 1)\n",
+    "# n_windows = 2 * ((132 - 1) / 1 + 1)\n",
+    "# n_windows = 264\n",
+    "# \n",
+    "# As of this moment, the shape of your data will be [n_windows, s + h, n_channels, 1]. n_channels is the number of channels in your data, which is 2 for univariate data (1 for the target variable, 1 for a helper variable that denotes whether the timeseries is available at a particular timestep). For each exogenous variable added, the number of channels will increase by 1.\n",
+    "# \n",
+    "# We are now interested in filtering the windows. Not all windows will contain valid data points because they might contain zeros that we created due to padding. Thus, we want to filter out windows that do not have enough data points to train the model. We will use the training_data_availability_threshold parameter to filter out windows that do not have enough valid data points.\n",
+    "#\n",
+    "# We have two filtering conditions for the windows:\n",
+    "# 1. An insample condition: this condition checks whether the number of available data points in the insample part of the window is greater than or equal to training_data_availability_threshold * s. If this condition is not met, we will filter out the window.\n",
+    "# 2. An outsample condition: this condition checks whether the number of available data points in the outsample part of the window is greater than or equal to training_data_availability_threshold * h. If this condition is not met, we will filter out the window.\n",
+    "#\n",
+    "# We calculate the minimum number of data points that we need in the insample and outsample parts of the window:\n",
+    "# 1. min_insample_points = max(1, int(min_insample_fraction * s))\n",
+    "# 2. max_outsample_points = max(1, int(min_outsample_fraction * h))\n",
+    "#\n",
+    "# EXAMPLE. We have 2 (m) timeseries with 132 (n) observations each, and we want to create a univariate forecast model with horizon (h) 12, input_size (s) 24, and step_size (k) 1. We set training_data_availability_threshold = 0.0.\n",
+    "#\n",
+    "# min_insample_points = max(1, int(0.0 * 24)) = 1\n",
+    "# min_outsample_points = max(1, int(0.0 * 12)) = 1\n",
+    "#\n",
+    "# Case 1: start_padding_enabled = False\n",
+    "# We will filter out windows that do not have at least 1 valid data point in the insample and outsample parts of the window. Our timeseries originally consisted of 132 observations and we padded it with h=12 zeros at the beginning, so we have 144 observations in total. This means that the last window (of each timeseries) will have 12 zeros at the end, which is the entire forecast horizon, so we will filter it out, because it does not have any valid data points in the outsample part of the window. Thus, we will have 218 - (2 * 1) = 216 valid windows. Note that in this case, the insample condition is not a limiting factor, because we have enough valid data points in the insample part of the window.\n",
+    "#\n",
+    "# Case 2: start_padding_enabled = True\n",
+    "# We will filter out windows that do not have at least 1 valid data point in the insample and outsample parts of the window. Our timeseries originally consisted of 132 observations and we padded it with h=12 zeros at the beginning and (s-1)=23 zeros at the start, so we have 167 observations in total. The last window (of each timeseries) will have 12 zeros at the end, which is the entire forecast horizon, so we will filter it out, because it does not have any valid data points in the outsample part of the window. Thus, we will have 264 - (2 * 1) = 262 valid windows. Again note that in this case, the insample condition is not a limiting factor, because we have at least 1 valid data point in the insample part of each window.\n",
+    "#\n",
+    "# EXAMPLE. We have 2 (m) timeseries with 132 (n) observations each, and we want to create a univariate forecast model with horizon (h) 12, input_size (s) 24, and step_size (k) 1. We set training_data_availability_threshold = 0.2.\n",
+    "#\n",
+    "# min_insample_points = max(1, int(0.2 * 24)) = 4\n",
+    "# min_outsample_points = max(1, int(0.2 * 12)) = 2\n",
+    "#\n",
+    "# Case 1: start_padding_enabled = False\n",
+    "# We will filter out windows that do not have at least 4 valid data points in the insample and 2 valid data points in the outsample parts of the window. Our timeseries originally consisted of 132 observations and we padded it with h=12 zeros at the beginning, so we have 144 observations in total. This means that the last window (of each timeseries) will have 12 zeros at the end, which is the entire forecast horizon, so we will filter it out, because it does not have any valid data points in the outsample part of the window. The penultimate window will have 11 zeros at the end, which is not enough to have 2 valid data points in the outsample part of the window, so we will filter it out as well. Thus, we will have 218 - (2 * 2) = 214 valid windows. Note that in this case, the insample condition is not a limiting factor, because we have enough valid data points in the insample part of the window.\n",
+    "#\n",
+    "# Case 2: start_padding_enabled = True\n",
+    "# We will filter out windows that do not have at least 4 valid data points in the insample and 2 valid data points in the outsample parts of the window. Our timeseries originally consisted of 132 observations and we padded it with h=12 zeros at the beginning and (s-1)=23 zeros at the start, so we have 167 observations in total. This means that the last window (of each timeseries) will have 12 zeros at the end, which is the entire forecast horizon, so we will filter it out, because it does not have any valid data points in the outsample part of the window. The penultimate window will have 11 zeros at the end, which is not enough to have 2 valid data points in the outsample part of the window, so we will filter it out as well. Thus, we will have 264 - (2 * 2) = 260 valid windows based on the outsample condition. The insample condition requires at least 4 valid data points in the insample part of the window. Because we padded each time series with (s - 1) zeros at the beginning, the first window has only 1 valid data point in the insample part of the window, so we will filter it out. The second window has 2 valid data points in the insample part of the window, so it's filtered out as well. The third window has 3 valid data points in the insample part of the window, so it's filtered out as well. The fourth window has 4 valid data points in the insample part of the window, so it's kept. Thus, we further reduce the number of valid windows to 260 - (2 * 3) = 254 valid windows based on the insample condition. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2194248b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "\n",
+    "# Theoretical window count for a multivariate model, start_padding_enabled=False\n",
+    "m = AirPassengersPanel_train['unique_id'].nunique()\n",
+    "n = AirPassengersPanel_train['ds'].nunique()\n",
+    "h = 12\n",
+    "input_size = 24\n",
+    "step_size = 1\n",
+    "n_windows = (n - input_size) / step_size + 1 - (n % h == 0)\n",
+    "\n",
+    "## Test TSMixer\n",
+    "# Get max count from default behavior\n",
+    "tsmixer_model = TSMixer(\n",
+    "    h=h, \n",
+    "    input_size=input_size,\n",
+    "    n_series=m, \n",
+    "    training_data_availability_threshold=0.0,\n",
+    "    max_steps=2)\n",
+    "            \n",
+    "max_count_tsmixer = count_training_windows(tsmixer_model, AirPassengersPanel_train)\n",
+    "assert max_count_tsmixer == n_windows, f\"Expected {n_windows} windows, got {max_count_tsmixer}\"\n",
+    "\n",
+    "# Test with threshold\n",
+    "tsmixer_model = TSMixer(\n",
+    "    h=h, \n",
+    "    input_size=input_size,\n",
+    "    n_series=m, \n",
+    "    training_data_availability_threshold=0.2, # Requires 9 insample points and 4 outsample points\n",
+    "    max_steps=2)\n",
+    "            \n",
+    "count = count_training_windows(tsmixer_model, AirPassengersPanel_train)\n",
+    "assert count == max_count_tsmixer - 1, f\"Expected {max_count_tsmixer - 1} windows, got {count}\"\n",
+    "\n",
+    "# Theoretical window count for a multivariate model, start_padding_enabled=True\n",
+    "n_windows = (n - 1) / step_size + 1 - (n % h == 0)\n",
+    "\n",
+    "## Test TSMixer\n",
+    "# Get max count from default behavior\n",
+    "tsmixer_model = TSMixer(\n",
+    "    h=h, \n",
+    "    input_size=input_size,\n",
+    "    n_series=m, \n",
+    "    training_data_availability_threshold=0.0,\n",
+    "    start_padding_enabled=True,\n",
+    "    max_steps=2)\n",
+    "            \n",
+    "max_count_tsmixer = count_training_windows(tsmixer_model, AirPassengersPanel_train)\n",
+    "assert max_count_tsmixer == n_windows, f\"Expected {n_windows} windows, got {max_count_tsmixer}\"\n",
+    "\n",
+    "# Test with threshold\n",
+    "tsmixer_model = TSMixer(\n",
+    "    h=h, \n",
+    "    input_size=input_size,\n",
+    "    n_series=m, \n",
+    "    training_data_availability_threshold=0.2, # Requires 9 insample points and 4 outsample points\n",
+    "    start_padding_enabled=True,\n",
+    "    max_steps=2)\n",
+    "            \n",
+    "count = count_training_windows(tsmixer_model, AirPassengersPanel_train)\n",
+    "assert count == max_count_tsmixer - 5, f\"Expected {max_count_tsmixer - 5} windows, got {count}\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "897e02e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "# We are given m timeseries with n observations each, and we want to create a multivariate forecast model with horizon h, input_size s, and step_size k. We will create windows of length h + s to train our model, and we will slide the window with step_size k.\n",
+    "# \n",
+    "# We consider two different cases:\n",
+    "# 1. start_padding_enabled = False: we pad the timeseries with h zeros at the beginning, so that we can create at least one window, because it could be that n < h.\n",
+    "# 2. start_padding_enabled = True: we pad the timeseries with s - 1 zeros at the front, and h zeros at the back, allowing the model to learn from windows with mostly zeros at the beginning.\n",
+    "#\n",
+    "# EXAMPLE. We have 2 (m) timeseries with 132 (n) observations each, and we want to create a multivariate forecast model with horizon (h) 12, input_size (s) 24, and step_size (k) 1. Now, we can calculate the number of windows that we can create for each case.\n",
+    "# \n",
+    "# \n",
+    "# Case 1: start_padding_enabled = False\n",
+    "# We first pad the timeseries with h zeros at the beginning. Our window length is h + s, so we can create the following number of windows (see also: https://docs.pytorch.org/docs/stable/generated/torch.Tensor.unfold.html):\n",
+    "#  \n",
+    "# n_windows = ((n + h) - (h + s)) / k + 1\n",
+    "# n_windows = ((n + h - h - s) / k + 1\n",
+    "# n_windows = (n - s) / k + 1\n",
+    "# n_windows = (132 - 24) / 1 + 1\n",
+    "# n_windows = 109\n",
+    "# \n",
+    "# Case 2: start_padding_enabled = True\n",
+    "# \n",
+    "# n_windows = ((n + h + s - 1) - (h + s)) / k + 1\n",
+    "# n_windows = (n + h + s - 1 - h - s) / k + 1\n",
+    "# n_windows = (n - 1) / k + 1\n",
+    "# n_windows = (132 - 1) / 1 + 1\n",
+    "# n_windows = 132\n",
+    "# \n",
+    "# As of this moment, the shape of your data will be [n_windows, s + h, n_channels, n_series]. n_channels is the number of channels in your data, which is 2 for univariate data (1 for the target variable, 1 for a helper variable that denotes whether the timeseries is available at a particular timestep). For each exogenous variable added, the number of channels will increase by 1.\n",
+    "# \n",
+    "# We are now interested in filtering the windows. Not all windows will contain valid data points because they might contain zeros that we created due to padding. Thus, we want to filter out windows that do not have enough data points to train the model. We will use the training_data_availability_threshold parameter to filter out windows that do not have enough valid data points.\n",
+    "#\n",
+    "# We have two filtering conditions for the windows:\n",
+    "# 1. An insample condition: this condition checks whether the number of available data points in the insample part of the window is greater than or equal to training_data_availability_threshold * s. If this condition is not met, we will filter out the window.\n",
+    "# 2. An outsample condition: this condition checks whether the number of available data points in the outsample part of the window is greater than or equal to training_data_availability_threshold * h. If this condition is not met, we will filter out the window.\n",
+    "#\n",
+    "# We calculate the minimum number of data points that we need in the insample and outsample parts of the window:\n",
+    "# 1. min_insample_points = max(1, int(min_insample_fraction * s * n_series))\n",
+    "# 2. max_outsample_points = max(1, int(min_outsample_fraction * h * n_series))\n",
+    "#\n",
+    "# EXAMPLE. We have 2 (m) timeseries with 132 (n) observations each, and we want to create a multivariate forecast model with horizon (h) 12, input_size (s) 24, and step_size (k) 1. We set training_data_availability_threshold = 0.0.\n",
+    "#\n",
+    "# min_insample_points = max(1, int(0.0 * 24 * 2)) = 1\n",
+    "# min_outsample_points = max(1, int(0.0 * 12 * 2)) = 1\n",
+    "#\n",
+    "# Case 1: start_padding_enabled = False\n",
+    "# We will filter out windows that do not have at least 1 valid data point in the insample and outsample parts of the window. Our timeseries originally consisted of 132 observations and we padded it with h=12 zeros at the beginning, so we have 144 observations in total. This means that the last window (of each timeseries) will have 12 zeros at the end, which is the entire forecast horizon, so we will filter it out, because it does not have any valid data points in the outsample part of the window. Thus, we will have 109 - 1 = 108 valid windows. Note that in this case, the insample condition is not a limiting factor, because we have enough valid data points in the insample part of the window.\n",
+    "#\n",
+    "# Case 2: start_padding_enabled = True\n",
+    "# We will filter out windows that do not have at least 1 valid data point in the insample and outsample parts of the window. Our timeseries originally consisted of 132 observations and we padded it with h=12 zeros at the beginning and (s-1)=23 zeros at the start, so we have 167 observations in total. The last window (of each timeseries) will have 12 zeros at the end, which is the entire forecast horizon, so we will filter it out, because it does not have any valid data points in the outsample part of the window. Thus, we will have 132 - 1 = 131 valid windows. Again note that in this case, the insample condition is not a limiting factor, because we have at least 1 valid data point in the insample part of each window.\n",
+    "#\n",
+    "# EXAMPLE. We have 2 (m) timeseries with 132 (n) observations each, and we want to create a univariate forecast model with horizon (h) 12, input_size (s) 24, and step_size (k) 1. We set training_data_availability_threshold = 0.2.\n",
+    "#\n",
+    "# min_insample_points = max(1, int(0.2 * 24 * 2)) = 9\n",
+    "# min_outsample_points = max(1, int(0.2 * 12 * 2)) = 4\n",
+    "#\n",
+    "# Case 1: start_padding_enabled = False\n",
+    "# We will filter out windows that do not have at least 9 valid data points in the insample and 4 valid data points in the outsample parts of the window. Our timeseries originally consisted of 132 observations and we padded it with h=12 zeros at the beginning, so we have 144 observations in total. This means that the last window (of each timeseries) will have 12 zeros at the end, which is the entire forecast horizon, so we will filter it out, because it does not have any valid data points in the outsample part of the window. The penultimate window will have 11 zeros at the end, which is not enough to have 2 valid data points in the outsample part of the window, so we will filter it out as well. Thus, we will have 109 - 2 = 107 valid windows. Note that in this case, the insample condition is not a limiting factor, because we have enough valid data points in the insample part of the window.\n",
+    "#\n",
+    "# Case 2: start_padding_enabled = True\n",
+    "# We will filter out windows that do not have at least 9 valid data points in the insample and 4 valid data points in the outsample parts of the window. Our timeseries originally consisted of 132 observations and we padded it with h=12 zeros at the beginning and (s-1)=23 zeros at the start, so we have 167 observations in total. This means that the last window (of each timeseries) will have 12 zeros at the end, which is the entire forecast horizon, so we will filter it out, because it does not have any valid data points in the outsample part of the window. The penultimate window will have 11 zeros at the end, which is not enough to have 2 valid data points in the outsample part of the window, so we will filter it out as well. Thus, we will have 132 - 2 = 130 valid windows based on the outsample condition. The insample condition requires at least 9 valid data points in the insample part of the window. Because we padded each time series with (s - 1) zeros at the beginning, the first window has only 2 valid data points (1 for each series) in the insample part of the window, so we will filter it out. The second window has 4 valid data points (2 for each series) in the insample part of the window, so it's filtered out as well. The third window has 6 valid data points (3 for each series) in the insample part of the window, so it's filtered out as well. The fourth window has 8 valid data points (4 for each series) in the insample part of the window, so it's filtered out as well. Thus, we further reduce the number of valid windows to 130 - 4 = 126 valid windows based on the insample condition. "
+   ]
   }
  ],
  "metadata": {
diff --git a/nbs/models.autoformer.ipynb b/nbs/models.autoformer.ipynb
index 11b76a171..66fe10de8 100644
--- a/nbs/models.autoformer.ipynb
+++ b/nbs/models.autoformer.ipynb
@@ -458,6 +458,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
     "    `drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.<br>\n",
@@ -508,6 +509,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -537,6 +539,7 @@
     "                                       windows_batch_size=windows_batch_size,\n",
     "                                       inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                       start_padding_enabled = start_padding_enabled,\n",
+    "                                       training_data_availability_threshold = training_data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       random_seed=random_seed,\n",
diff --git a/nbs/models.bitcn.ipynb b/nbs/models.bitcn.ipynb
index 55ae082f4..dc6ec30fa 100644
--- a/nbs/models.bitcn.ipynb
+++ b/nbs/models.bitcn.ipynb
@@ -175,6 +175,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -219,6 +220,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -249,6 +251,7 @@
     "            windows_batch_size=windows_batch_size,\n",
     "            inference_windows_batch_size=inference_windows_batch_size,\n",
     "            start_padding_enabled=start_padding_enabled,\n",
+    "            training_data_availability_threshold = training_data_availability_threshold,\n",
     "            step_size=step_size,\n",
     "            scaler_type=scaler_type,\n",
     "            random_seed=random_seed,\n",
diff --git a/nbs/models.deepar.ipynb b/nbs/models.deepar.ipynb
index b2af4d6b3..00637cdd2 100644
--- a/nbs/models.deepar.ipynb
+++ b/nbs/models.deepar.ipynb
@@ -177,6 +177,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -227,6 +228,7 @@
     "                 windows_batch_size: int = 1024,\n",
     "                 inference_windows_batch_size: int = -1,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -262,6 +264,7 @@
     "                                    windows_batch_size=windows_batch_size,\n",
     "                                    inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                    start_padding_enabled=start_padding_enabled,\n",
+    "                                    training_data_availability_threshold = training_data_availability_threshold,\n",
     "                                    step_size=step_size,\n",
     "                                    scaler_type=scaler_type,\n",
     "                                    random_seed=random_seed,\n",
diff --git a/nbs/models.deepnpts.ipynb b/nbs/models.deepnpts.ipynb
index a2855c8a8..118408f72 100644
--- a/nbs/models.deepnpts.ipynb
+++ b/nbs/models.deepnpts.ipynb
@@ -117,6 +117,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -163,6 +164,7 @@
     "                 windows_batch_size: int = 1024,\n",
     "                 inference_windows_batch_size: int = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'standard',\n",
     "                 random_seed: int = 1,\n",
@@ -203,6 +205,7 @@
     "                                    windows_batch_size=windows_batch_size,\n",
     "                                    inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                    start_padding_enabled=start_padding_enabled,\n",
+    "                                    training_data_availability_threshold = training_data_availability_threshold,\n",
     "                                    step_size=step_size,\n",
     "                                    scaler_type=scaler_type,\n",
     "                                    random_seed=random_seed,\n",
diff --git a/nbs/models.dilated_rnn.ipynb b/nbs/models.dilated_rnn.ipynb
index 4f42d5ed7..635eced80 100644
--- a/nbs/models.dilated_rnn.ipynb
+++ b/nbs/models.dilated_rnn.ipynb
@@ -400,6 +400,7 @@
     "    `windows_batch_size`: int=128, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>    \n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -445,6 +446,7 @@
     "                 windows_batch_size = 128,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'robust',\n",
     "                 random_seed: int = 1,\n",
@@ -476,6 +478,7 @@
     "            windows_batch_size=windows_batch_size,\n",
     "            inference_windows_batch_size=inference_windows_batch_size,\n",
     "            start_padding_enabled=start_padding_enabled,\n",
+    "            training_data_availability_threshold=training_data_availability_threshold,\n",
     "            step_size=step_size,\n",
     "            scaler_type=scaler_type,\n",
     "            random_seed=random_seed,\n",
diff --git a/nbs/models.dlinear.ipynb b/nbs/models.dlinear.ipynb
index 9a3c85165..a7574fecf 100644
--- a/nbs/models.dlinear.ipynb
+++ b/nbs/models.dlinear.ipynb
@@ -161,6 +161,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -203,6 +204,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -232,6 +234,7 @@
     "                                       valid_batch_size=valid_batch_size,\n",
     "                                       inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                       start_padding_enabled = start_padding_enabled,\n",
+    "                                       training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       drop_last_loader=drop_last_loader,\n",
diff --git a/nbs/models.fedformer.ipynb b/nbs/models.fedformer.ipynb
index 30c5337f8..79d45d507 100644
--- a/nbs/models.fedformer.ipynb
+++ b/nbs/models.fedformer.ipynb
@@ -460,6 +460,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>    \n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -510,6 +511,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -538,6 +540,7 @@
     "                                       windows_batch_size=windows_batch_size,\n",
     "                                       inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                       start_padding_enabled=start_padding_enabled,\n",
+    "                                       training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       random_seed=random_seed,\n",
diff --git a/nbs/models.gru.ipynb b/nbs/models.gru.ipynb
index 6c0d49a27..0679a623a 100644
--- a/nbs/models.gru.ipynb
+++ b/nbs/models.gru.ipynb
@@ -138,6 +138,7 @@
     "    `windows_batch_size`: int=128, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -187,6 +188,7 @@
     "                 windows_batch_size = 128,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str='robust',\n",
     "                 random_seed=1,\n",
@@ -222,6 +224,7 @@
     "            windows_batch_size=windows_batch_size,\n",
     "            inference_windows_batch_size=inference_windows_batch_size,\n",
     "            start_padding_enabled=start_padding_enabled,\n",
+    "            training_data_availability_threshold=training_data_availability_threshold,\n",
     "            step_size=step_size,\n",
     "            scaler_type=scaler_type,\n",
     "            random_seed=random_seed,\n",
diff --git a/nbs/models.informer.ipynb b/nbs/models.informer.ipynb
index 9139d6b56..72ad256c5 100644
--- a/nbs/models.informer.ipynb
+++ b/nbs/models.informer.ipynb
@@ -305,6 +305,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -356,6 +357,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -385,6 +387,7 @@
     "                                       windows_batch_size=windows_batch_size,\n",
     "                                       inference_windows_batch_size = inference_windows_batch_size,\n",
     "                                       start_padding_enabled=start_padding_enabled,\n",
+    "                                       training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       drop_last_loader=drop_last_loader,\n",
diff --git a/nbs/models.itransformer.ipynb b/nbs/models.itransformer.ipynb
index 9e1f394e3..1979d5993 100644
--- a/nbs/models.itransformer.ipynb
+++ b/nbs/models.itransformer.ipynb
@@ -129,6 +129,7 @@
     "    `windows_batch_size`: int=32, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -180,6 +181,7 @@
     "                 windows_batch_size = 32,\n",
     "                 inference_windows_batch_size = 32,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -211,6 +213,7 @@
     "                                           windows_batch_size=windows_batch_size,\n",
     "                                           inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                           start_padding_enabled=start_padding_enabled,\n",
+    "                                           training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                           step_size=step_size,\n",
     "                                           scaler_type=scaler_type,\n",
     "                                           random_seed=random_seed,\n",
diff --git a/nbs/models.kan.ipynb b/nbs/models.kan.ipynb
index 6c61bb4ac..cfcc87b8b 100644
--- a/nbs/models.kan.ipynb
+++ b/nbs/models.kan.ipynb
@@ -359,6 +359,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -409,6 +410,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = -1,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -438,6 +440,7 @@
     "                                  windows_batch_size=windows_batch_size,\n",
     "                                  inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                  start_padding_enabled=start_padding_enabled,\n",
+    "                                  training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                  step_size=step_size,\n",
     "                                  scaler_type=scaler_type,\n",
     "                                  drop_last_loader=drop_last_loader,\n",
diff --git a/nbs/models.lstm.ipynb b/nbs/models.lstm.ipynb
index 30fdf5f04..ac5a4ec8a 100644
--- a/nbs/models.lstm.ipynb
+++ b/nbs/models.lstm.ipynb
@@ -126,6 +126,7 @@
     "    `windows_batch_size`: int=128, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>    \n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -174,6 +175,7 @@
     "                 windows_batch_size = 128,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'robust',\n",
     "                 random_seed = 1,\n",
@@ -209,6 +211,7 @@
     "            windows_batch_size=windows_batch_size,\n",
     "            inference_windows_batch_size=inference_windows_batch_size,\n",
     "            start_padding_enabled=start_padding_enabled,\n",
+    "            training_data_availability_threshold=training_data_availability_threshold,\n",
     "            step_size=step_size,\n",
     "            scaler_type=scaler_type,\n",
     "            random_seed=random_seed,\n",
diff --git a/nbs/models.mlp.ipynb b/nbs/models.mlp.ipynb
index bde51b6f1..d6821872d 100644
--- a/nbs/models.mlp.ipynb
+++ b/nbs/models.mlp.ipynb
@@ -111,6 +111,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -151,6 +152,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = -1,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -182,6 +184,7 @@
     "                                  windows_batch_size=windows_batch_size,\n",
     "                                  inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                  start_padding_enabled=start_padding_enabled,\n",
+    "                                  training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                  step_size=step_size,\n",
     "                                  scaler_type=scaler_type,\n",
     "                                  random_seed=random_seed,\n",
diff --git a/nbs/models.mlpmultivariate.ipynb b/nbs/models.mlpmultivariate.ipynb
index 2a5511890..1e35cfca5 100644
--- a/nbs/models.mlpmultivariate.ipynb
+++ b/nbs/models.mlpmultivariate.ipynb
@@ -110,6 +110,7 @@
     "    `windows_batch_size`: int=32, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -151,6 +152,7 @@
     "                 windows_batch_size = 32,\n",
     "                 inference_windows_batch_size = 32,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -183,6 +185,7 @@
     "                                  windows_batch_size=windows_batch_size,\n",
     "                                  inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                  start_padding_enabled=start_padding_enabled,\n",
+    "                                  training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                  step_size=step_size,\n",
     "                                  scaler_type=scaler_type,\n",
     "                                  random_seed=random_seed,\n",
diff --git a/nbs/models.nbeats.ipynb b/nbs/models.nbeats.ipynb
index 5f6c97967..a2fb8bc71 100644
--- a/nbs/models.nbeats.ipynb
+++ b/nbs/models.nbeats.ipynb
@@ -409,6 +409,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -457,6 +458,7 @@
     "                 windows_batch_size: int = 1024,\n",
     "                 inference_windows_batch_size: int = -1,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str ='identity',\n",
     "                 random_seed: int = 1,\n",
@@ -490,6 +492,7 @@
     "                                     valid_batch_size=valid_batch_size,\n",
     "                                     inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                     start_padding_enabled=start_padding_enabled,\n",
+    "                                     training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                     step_size=step_size,\n",
     "                                     scaler_type=scaler_type,\n",
     "                                     drop_last_loader=drop_last_loader,\n",
diff --git a/nbs/models.nbeatsx.ipynb b/nbs/models.nbeatsx.ipynb
index 7b187268e..d9ee38754 100644
--- a/nbs/models.nbeatsx.ipynb
+++ b/nbs/models.nbeatsx.ipynb
@@ -421,6 +421,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int, random seed initialization for replicability.<br>\n",
@@ -473,6 +474,7 @@
     "        windows_batch_size: int = 1024,\n",
     "        inference_windows_batch_size: int = -1,\n",
     "        start_padding_enabled: bool = False,\n",
+    "        training_data_availability_threshold = 0.0,\n",
     "        step_size: int = 1,\n",
     "        scaler_type: str = \"identity\",\n",
     "        random_seed: int = 1,\n",
@@ -510,6 +512,7 @@
     "                                      windows_batch_size = windows_batch_size,\n",
     "                                      inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                      start_padding_enabled=start_padding_enabled,\n",
+    "                                      training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                      step_size = step_size,\n",
     "                                      scaler_type=scaler_type,\n",
     "                                      random_seed=random_seed,\n",
diff --git a/nbs/models.nhits.ipynb b/nbs/models.nhits.ipynb
index 133de6953..6df3d4a28 100644
--- a/nbs/models.nhits.ipynb
+++ b/nbs/models.nhits.ipynb
@@ -298,6 +298,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -350,6 +351,7 @@
     "                 windows_batch_size: int = 1024,\n",
     "                 inference_windows_batch_size: int = -1,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -381,6 +383,7 @@
     "                                    windows_batch_size=windows_batch_size,\n",
     "                                    inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                    start_padding_enabled=start_padding_enabled,\n",
+    "                                    training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                    step_size=step_size,\n",
     "                                    scaler_type=scaler_type,\n",
     "                                    random_seed=random_seed,\n",
diff --git a/nbs/models.nlinear.ipynb b/nbs/models.nlinear.ipynb
index c619267df..551f13bba 100644
--- a/nbs/models.nlinear.ipynb
+++ b/nbs/models.nlinear.ipynb
@@ -101,6 +101,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>    \n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -142,6 +143,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -171,6 +173,7 @@
     "                                       valid_batch_size=valid_batch_size,\n",
     "                                       inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                       start_padding_enabled = start_padding_enabled,\n",
+    "                                       training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       random_seed=random_seed,\n",
diff --git a/nbs/models.patchtst.ipynb b/nbs/models.patchtst.ipynb
index dfa44b155..fc8b194e2 100644
--- a/nbs/models.patchtst.ipynb
+++ b/nbs/models.patchtst.ipynb
@@ -676,6 +676,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -734,6 +735,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size: int = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -763,6 +765,7 @@
     "                                       windows_batch_size=windows_batch_size,\n",
     "                                       inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                       start_padding_enabled=start_padding_enabled,\n",
+    "                                       training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       random_seed=random_seed,\n",
diff --git a/nbs/models.rmok.ipynb b/nbs/models.rmok.ipynb
index f76f5edba..aeeddeb10 100644
--- a/nbs/models.rmok.ipynb
+++ b/nbs/models.rmok.ipynb
@@ -363,6 +363,7 @@
     "    `windows_batch_size`: int=32, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -410,6 +411,7 @@
     "                 windows_batch_size = 32,\n",
     "                 inference_windows_batch_size = 32,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -440,6 +442,7 @@
     "                                   windows_batch_size=windows_batch_size,\n",
     "                                   inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                   start_padding_enabled=start_padding_enabled,\n",
+    "                                   training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                   step_size=step_size,\n",
     "                                   scaler_type=scaler_type,\n",
     "                                   random_seed=random_seed,\n",
diff --git a/nbs/models.rnn.ipynb b/nbs/models.rnn.ipynb
index 820907ff4..f924c1017 100644
--- a/nbs/models.rnn.ipynb
+++ b/nbs/models.rnn.ipynb
@@ -130,6 +130,7 @@
     "    `windows_batch_size`: int=128, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>    \n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -180,6 +181,7 @@
     "                 windows_batch_size = 128,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str='robust',\n",
     "                 random_seed=1,\n",
@@ -215,6 +217,7 @@
     "            windows_batch_size=windows_batch_size,\n",
     "            inference_windows_batch_size=inference_windows_batch_size,\n",
     "            start_padding_enabled=start_padding_enabled,\n",
+    "            training_data_availability_threshold=training_data_availability_threshold,\n",
     "            step_size=step_size,\n",
     "            scaler_type=scaler_type,\n",
     "            random_seed=random_seed,\n",
diff --git a/nbs/models.softs.ipynb b/nbs/models.softs.ipynb
index f550de109..74895cfee 100644
--- a/nbs/models.softs.ipynb
+++ b/nbs/models.softs.ipynb
@@ -203,6 +203,7 @@
     "    `windows_batch_size`: int=32, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -252,6 +253,7 @@
     "                 windows_batch_size = 32,\n",
     "                 inference_windows_batch_size = 32,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -283,6 +285,7 @@
     "                                    windows_batch_size=windows_batch_size,\n",
     "                                    inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                    start_padding_enabled=start_padding_enabled,\n",
+    "                                    training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                    step_size=step_size,\n",
     "                                    scaler_type=scaler_type,\n",
     "                                    random_seed=random_seed,\n",
diff --git a/nbs/models.stemgnn.ipynb b/nbs/models.stemgnn.ipynb
index 698232009..cba0e67fe 100644
--- a/nbs/models.stemgnn.ipynb
+++ b/nbs/models.stemgnn.ipynb
@@ -206,6 +206,7 @@
     "    `windows_batch_size`: int=32, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -249,6 +250,7 @@
     "                 windows_batch_size = 32,\n",
     "                 inference_windows_batch_size = 32,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'robust',\n",
     "                 random_seed: int = 1,\n",
@@ -281,6 +283,7 @@
     "                                      windows_batch_size=windows_batch_size,\n",
     "                                      inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                      start_padding_enabled=start_padding_enabled,\n",
+    "                                      training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                      step_size=step_size,\n",
     "                                      scaler_type=scaler_type,\n",
     "                                      random_seed=random_seed,\n",
diff --git a/nbs/models.tcn.ipynb b/nbs/models.tcn.ipynb
index be34b2ac9..deb88487e 100644
--- a/nbs/models.tcn.ipynb
+++ b/nbs/models.tcn.ipynb
@@ -127,6 +127,7 @@
     "    `windows_batch_size`: int=128, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>    \n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -172,6 +173,7 @@
     "                 windows_batch_size = 128,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,                 \n",
     "                 scaler_type: str ='robust',\n",
     "                 random_seed: int = 1,\n",
@@ -202,6 +204,7 @@
     "            windows_batch_size=windows_batch_size,\n",
     "            inference_windows_batch_size=inference_windows_batch_size,\n",
     "            start_padding_enabled=start_padding_enabled,\n",
+    "            training_data_availability_threshold=training_data_availability_threshold,\n",
     "            step_size=step_size,\n",
     "            scaler_type=scaler_type,\n",
     "            random_seed=random_seed,\n",
diff --git a/nbs/models.tft.ipynb b/nbs/models.tft.ipynb
index a994c741a..7da67cf73 100644
--- a/nbs/models.tft.ipynb
+++ b/nbs/models.tft.ipynb
@@ -832,6 +832,7 @@
     "    `windows_batch_size`: int=None, windows sampled from rolled data, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int, random seed initialization for replicability.<br>\n",
@@ -884,6 +885,7 @@
     "        windows_batch_size: int = 1024,\n",
     "        inference_windows_batch_size: int = 1024,\n",
     "        start_padding_enabled=False,\n",
+    "        training_data_availability_threshold = 0.0,\n",
     "        step_size: int = 1,\n",
     "        scaler_type: str = \"robust\",\n",
     "        random_seed: int = 1,\n",
@@ -915,6 +917,7 @@
     "            windows_batch_size=windows_batch_size,\n",
     "            inference_windows_batch_size=inference_windows_batch_size,\n",
     "            start_padding_enabled=start_padding_enabled,\n",
+    "            training_data_availability_threshold=training_data_availability_threshold,\n",
     "            step_size=step_size,\n",
     "            scaler_type=scaler_type,\n",
     "            random_seed=random_seed,\n",
diff --git a/nbs/models.tide.ipynb b/nbs/models.tide.ipynb
index 93fde4d9b..619b36f02 100644
--- a/nbs/models.tide.ipynb
+++ b/nbs/models.tide.ipynb
@@ -169,6 +169,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -219,6 +220,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -251,6 +253,7 @@
     "            windows_batch_size=windows_batch_size,\n",
     "            inference_windows_batch_size=inference_windows_batch_size,\n",
     "            start_padding_enabled=start_padding_enabled,\n",
+    "            training_data_availability_threshold=training_data_availability_threshold,\n",
     "            step_size=step_size,\n",
     "            scaler_type=scaler_type,\n",
     "            random_seed=random_seed,\n",
diff --git a/nbs/models.timellm.ipynb b/nbs/models.timellm.ipynb
index 48946a2a2..6d2872095 100644
--- a/nbs/models.timellm.ipynb
+++ b/nbs/models.timellm.ipynb
@@ -286,6 +286,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.<br>\n",
     "    `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.<br>\n",
@@ -344,6 +345,7 @@
     "                 windows_batch_size: int = 1024,\n",
     "                 inference_windows_batch_size: int = 1024,\n",
     "                 start_padding_enabled: bool = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 num_lr_decays: int = 0,\n",
     "                 early_stop_patience_steps: int = -1,\n",
@@ -374,6 +376,7 @@
     "                                      windows_batch_size=windows_batch_size,\n",
     "                                      inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                      start_padding_enabled=start_padding_enabled,\n",
+    "                                      training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                      step_size=step_size,\n",
     "                                      scaler_type=scaler_type,\n",
     "                                      drop_last_loader=drop_last_loader,\n",
diff --git a/nbs/models.timemixer.ipynb b/nbs/models.timemixer.ipynb
index bf22f4a11..85e76f838 100644
--- a/nbs/models.timemixer.ipynb
+++ b/nbs/models.timemixer.ipynb
@@ -361,6 +361,7 @@
     "    `windows_batch_size`: int=32, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -416,6 +417,7 @@
     "                 windows_batch_size = 32,\n",
     "                 inference_windows_batch_size = 32,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -446,6 +448,7 @@
     "                                    windows_batch_size=windows_batch_size,\n",
     "                                    inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                    start_padding_enabled=start_padding_enabled,\n",
+    "                                    training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                    step_size=step_size,\n",
     "                                    scaler_type=scaler_type,\n",
     "                                    random_seed=random_seed,\n",
diff --git a/nbs/models.timesnet.ipynb b/nbs/models.timesnet.ipynb
index 2bb9bb691..1235c3cea 100644
--- a/nbs/models.timesnet.ipynb
+++ b/nbs/models.timesnet.ipynb
@@ -233,6 +233,7 @@
     "    `windows_batch_size` : int (default=64), Number of windows to sample in each training batch.<br>\n",
     "    `inference_windows_batch_size` : int (default=256), Number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled` : bool (default=False), If True, the model will pad the time series with zeros at the beginning by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size` : int (default=1), Step size between each window of temporal data.<br>\n",
     "    `scaler_type` : str (default='standard'), Type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed` : int (default=1), Random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -281,6 +282,7 @@
     "                 windows_batch_size = 64,\n",
     "                 inference_windows_batch_size = 256,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'standard',\n",
     "                 random_seed: int = 1,\n",
@@ -310,6 +312,7 @@
     "                                       valid_batch_size=valid_batch_size,\n",
     "                                       inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                       start_padding_enabled = start_padding_enabled,\n",
+    "                                       training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       drop_last_loader=drop_last_loader,\n",
diff --git a/nbs/models.timexer.ipynb b/nbs/models.timexer.ipynb
index 3a594091a..69235fcab 100644
--- a/nbs/models.timexer.ipynb
+++ b/nbs/models.timexer.ipynb
@@ -260,6 +260,7 @@
     "    `windows_batch_size`: int=32, number of windows in each batch.<br>    \n",
     "    `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -313,6 +314,7 @@
     "                 windows_batch_size = 32,\n",
     "                 inference_windows_batch_size = 32,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -344,6 +346,7 @@
     "                                    windows_batch_size=windows_batch_size,\n",
     "                                    inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                    start_padding_enabled=start_padding_enabled,\n",
+    "                                    training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                    step_size=step_size,\n",
     "                                    scaler_type=scaler_type,\n",
     "                                    random_seed=random_seed,\n",
diff --git a/nbs/models.tsmixer.ipynb b/nbs/models.tsmixer.ipynb
index a1a4837f9..db18a7d42 100644
--- a/nbs/models.tsmixer.ipynb
+++ b/nbs/models.tsmixer.ipynb
@@ -204,6 +204,7 @@
     "    `windows_batch_size`: int=32, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -251,6 +252,7 @@
     "                 windows_batch_size = 32,\n",
     "                 inference_windows_batch_size = 32,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -283,6 +285,7 @@
     "                                    windows_batch_size=windows_batch_size,\n",
     "                                    inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                    start_padding_enabled=start_padding_enabled,\n",
+    "                                    training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                    step_size=step_size,\n",
     "                                    scaler_type=scaler_type,\n",
     "                                    random_seed=random_seed,\n",
diff --git a/nbs/models.tsmixerx.ipynb b/nbs/models.tsmixerx.ipynb
index 2b8e51bc6..82f8bad51 100644
--- a/nbs/models.tsmixerx.ipynb
+++ b/nbs/models.tsmixerx.ipynb
@@ -278,6 +278,7 @@
     "    `windows_batch_size`: int=32, number of windows to sample in each training batch. <br>\n",
     "    `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -325,6 +326,7 @@
     "                 windows_batch_size = 32,\n",
     "                 inference_windows_batch_size = 32,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -357,6 +359,7 @@
     "                                    windows_batch_size=windows_batch_size,\n",
     "                                    inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                    start_padding_enabled=start_padding_enabled,\n",
+    "                                    training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                    step_size=step_size,\n",
     "                                    scaler_type=scaler_type,\n",
     "                                    random_seed=random_seed,\n",
diff --git a/nbs/models.vanillatransformer.ipynb b/nbs/models.vanillatransformer.ipynb
index 0d350bda9..8e9822b34 100644
--- a/nbs/models.vanillatransformer.ipynb
+++ b/nbs/models.vanillatransformer.ipynb
@@ -140,6 +140,7 @@
     "    `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>\n",
     "    `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>\n",
     "    `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>\n",
+    "    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>\n",
     "    `step_size`: int=1, step size between each window of temporal data.<br>\n",
     "    `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>\n",
     "    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>\n",
@@ -189,6 +190,7 @@
     "                 windows_batch_size = 1024,\n",
     "                 inference_windows_batch_size: int = 1024,\n",
     "                 start_padding_enabled = False,\n",
+    "                 training_data_availability_threshold = 0.0,\n",
     "                 step_size: int = 1,\n",
     "                 scaler_type: str = 'identity',\n",
     "                 random_seed: int = 1,\n",
@@ -218,6 +220,7 @@
     "                                       windows_batch_size=windows_batch_size,\n",
     "                                       inference_windows_batch_size=inference_windows_batch_size,\n",
     "                                       start_padding_enabled=start_padding_enabled,\n",
+    "                                       training_data_availability_threshold=training_data_availability_threshold,\n",
     "                                       step_size=step_size,\n",
     "                                       scaler_type=scaler_type,\n",
     "                                       drop_last_loader=drop_last_loader,\n",
diff --git a/neuralforecast/common/_base_model.py b/neuralforecast/common/_base_model.py
index 4bba3e154..c75809a80 100644
--- a/neuralforecast/common/_base_model.py
+++ b/neuralforecast/common/_base_model.py
@@ -99,6 +99,7 @@ def __init__(
         windows_batch_size: int,
         inference_windows_batch_size: Union[int, None],
         start_padding_enabled: bool,
+        training_data_availability_threshold: Union[float, List[float]] = 0.0,
         n_series: Union[int, None] = None,
         n_samples: Union[int, None] = 100,
         h_train: int = 1,
@@ -336,6 +337,46 @@ def __init__(
         else:
             self.inference_windows_batch_size = inference_windows_batch_size
 
+        # Filtering training windows by available sample fractions
+        if isinstance(training_data_availability_threshold, int):
+            raise ValueError(
+                "training_data_availability_threshold cannot be an integer - must be a float"
+            )
+        elif isinstance(training_data_availability_threshold, float):
+            if (
+                training_data_availability_threshold < 0.0
+                or training_data_availability_threshold > 1.0
+            ):
+                raise ValueError(
+                    f"training_data_availability_threshold must be between 0.0 and 1.0, got {training_data_availability_threshold}"
+                )
+            self.min_insample_fraction = training_data_availability_threshold
+            self.min_outsample_fraction = training_data_availability_threshold
+        elif (
+            isinstance(training_data_availability_threshold, (list, tuple))
+            and len(training_data_availability_threshold) == 2
+        ):
+            for i, value in enumerate(training_data_availability_threshold):
+                if isinstance(value, int):
+                    raise ValueError(
+                        f"training_data_availability_threshold[{i}] cannot be an integer - must be a float"
+                    )
+                if not isinstance(value, float):
+                    raise ValueError(
+                        f"training_data_availability_threshold[{i}] must be a float"
+                    )
+                if value < 0.0 or value > 1.0:
+                    raise ValueError(
+                        f"training_data_availability_threshold[{i}] must be between 0.0 and 1.0, got {value}"
+                    )
+
+            self.min_insample_fraction = training_data_availability_threshold[0]
+            self.min_outsample_fraction = training_data_availability_threshold[1]
+        else:
+            raise ValueError(
+                "training_data_availability_threshold must be a float or a list/tuple of two floats"
+            )
+
         # Optimization
         self.learning_rate = learning_rate
         self.max_steps = max_steps
@@ -678,20 +719,30 @@ def _create_windows(self, batch, step):
                 windows = windows.flatten(0, 1)
                 windows = windows.unsqueeze(-1)
 
-            # Sample and Available conditions
+            # Calculate minimum required available points based on fractions
+            min_insample_points = max(
+                1, int(self.input_size * self.min_insample_fraction * self.n_series)
+            )
+            min_outsample_points = max(
+                1, int(self.h * self.min_outsample_fraction * self.n_series)
+            )
+
+            # Sample based on available conditions
             available_idx = temporal_cols.get_loc("available_mask")
-            available_condition = windows[:, : self.input_size, available_idx]
-            available_condition = torch.sum(
-                available_condition, axis=(1, -1)
+            insample_condition = windows[:, : self.input_size, available_idx]
+            insample_condition = torch.sum(
+                insample_condition, axis=(1, -1)
             )  # Sum over time & series dimension
-            final_condition = available_condition > 0
+            final_condition = insample_condition >= min_insample_points
 
             if self.h > 0:
-                sample_condition = windows[:, self.input_size :, available_idx]
-                sample_condition = torch.sum(
-                    sample_condition, axis=(1, -1)
+                outsample_condition = windows[:, self.input_size :, available_idx]
+                outsample_condition = torch.sum(
+                    outsample_condition, axis=(1, -1)
                 )  # Sum over time & series dimension
-                final_condition = (sample_condition > 0) & (available_condition > 0)
+                final_condition = (outsample_condition >= min_outsample_points) & (
+                    insample_condition >= min_insample_points
+                )
 
             windows = windows[final_condition]
 
diff --git a/neuralforecast/models/autoformer.py b/neuralforecast/models/autoformer.py
index bd94a614b..0b99b6ee3 100644
--- a/neuralforecast/models/autoformer.py
+++ b/neuralforecast/models/autoformer.py
@@ -438,6 +438,7 @@ class Autoformer(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
     `drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.<br>
@@ -492,6 +493,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -523,6 +525,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/bitcn.py b/neuralforecast/models/bitcn.py
index 580c21479..b39e17fc3 100644
--- a/neuralforecast/models/bitcn.py
+++ b/neuralforecast/models/bitcn.py
@@ -110,6 +110,7 @@ class BiTCN(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -158,6 +159,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -189,6 +191,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/deepar.py b/neuralforecast/models/deepar.py
index 186cc2ff4..dd2861f6a 100644
--- a/neuralforecast/models/deepar.py
+++ b/neuralforecast/models/deepar.py
@@ -80,6 +80,7 @@ class DeepAR(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>
@@ -134,6 +135,7 @@ def __init__(
         windows_batch_size: int = 1024,
         inference_windows_batch_size: int = -1,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -171,6 +173,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/deepnpts.py b/neuralforecast/models/deepnpts.py
index e0e32fce1..780f5f318 100644
--- a/neuralforecast/models/deepnpts.py
+++ b/neuralforecast/models/deepnpts.py
@@ -43,6 +43,7 @@ class DeepNPTS(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>
@@ -93,6 +94,7 @@ def __init__(
         windows_batch_size: int = 1024,
         inference_windows_batch_size: int = 1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "standard",
         random_seed: int = 1,
@@ -139,6 +141,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/dilated_rnn.py b/neuralforecast/models/dilated_rnn.py
index 57428c678..00378c594 100644
--- a/neuralforecast/models/dilated_rnn.py
+++ b/neuralforecast/models/dilated_rnn.py
@@ -315,6 +315,7 @@ class DilatedRNN(BaseModel):
     `windows_batch_size`: int=128, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -364,6 +365,7 @@ def __init__(
         windows_batch_size=128,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "robust",
         random_seed: int = 1,
@@ -396,6 +398,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/dlinear.py b/neuralforecast/models/dlinear.py
index 8cbab511f..8678035c9 100644
--- a/neuralforecast/models/dlinear.py
+++ b/neuralforecast/models/dlinear.py
@@ -71,6 +71,7 @@ class DLinear(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -117,6 +118,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -148,6 +150,7 @@ def __init__(
             valid_batch_size=valid_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             drop_last_loader=drop_last_loader,
diff --git a/neuralforecast/models/fedformer.py b/neuralforecast/models/fedformer.py
index d4fb518e1..711cb25ea 100644
--- a/neuralforecast/models/fedformer.py
+++ b/neuralforecast/models/fedformer.py
@@ -435,6 +435,7 @@ class FEDformer(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -489,6 +490,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -519,6 +521,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/gru.py b/neuralforecast/models/gru.py
index 3129ab522..7ba94bf04 100644
--- a/neuralforecast/models/gru.py
+++ b/neuralforecast/models/gru.py
@@ -53,6 +53,7 @@ class GRU(BaseModel):
     `windows_batch_size`: int=128, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -106,6 +107,7 @@ def __init__(
         windows_batch_size=128,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "robust",
         random_seed=1,
@@ -142,6 +144,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/informer.py b/neuralforecast/models/informer.py
index 00c26869f..9c136141a 100644
--- a/neuralforecast/models/informer.py
+++ b/neuralforecast/models/informer.py
@@ -222,6 +222,7 @@ class Informer(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -275,6 +276,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -306,6 +308,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             drop_last_loader=drop_last_loader,
diff --git a/neuralforecast/models/itransformer.py b/neuralforecast/models/itransformer.py
index 53d1035d1..aef703f75 100644
--- a/neuralforecast/models/itransformer.py
+++ b/neuralforecast/models/itransformer.py
@@ -52,6 +52,7 @@ class iTransformer(BaseModel):
     `windows_batch_size`: int=32, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -104,6 +105,7 @@ def __init__(
         windows_batch_size=32,
         inference_windows_batch_size=32,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -137,6 +139,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/kan.py b/neuralforecast/models/kan.py
index e9e0cfcdb..3934776b9 100644
--- a/neuralforecast/models/kan.py
+++ b/neuralforecast/models/kan.py
@@ -278,6 +278,7 @@ class KAN(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -331,6 +332,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=-1,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -362,6 +364,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             drop_last_loader=drop_last_loader,
diff --git a/neuralforecast/models/lstm.py b/neuralforecast/models/lstm.py
index 5c59f97ce..c826cf364 100644
--- a/neuralforecast/models/lstm.py
+++ b/neuralforecast/models/lstm.py
@@ -52,6 +52,7 @@ class LSTM(BaseModel):
     `windows_batch_size`: int=128, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -104,6 +105,7 @@ def __init__(
         windows_batch_size=128,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "robust",
         random_seed=1,
@@ -140,6 +142,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/mlp.py b/neuralforecast/models/mlp.py
index 81dc04c78..7c13ba0f7 100644
--- a/neuralforecast/models/mlp.py
+++ b/neuralforecast/models/mlp.py
@@ -43,6 +43,7 @@ class MLP(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -87,6 +88,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=-1,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -120,6 +122,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/mlpmultivariate.py b/neuralforecast/models/mlpmultivariate.py
index f3af74899..40d3904d8 100644
--- a/neuralforecast/models/mlpmultivariate.py
+++ b/neuralforecast/models/mlpmultivariate.py
@@ -42,6 +42,7 @@ class MLPMultivariate(BaseModel):
     `windows_batch_size`: int=32, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -87,6 +88,7 @@ def __init__(
         windows_batch_size=32,
         inference_windows_batch_size=32,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -121,6 +123,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/nbeats.py b/neuralforecast/models/nbeats.py
index 02469345b..149f33a3d 100644
--- a/neuralforecast/models/nbeats.py
+++ b/neuralforecast/models/nbeats.py
@@ -359,6 +359,7 @@ class NBEATS(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>
@@ -411,6 +412,7 @@ def __init__(
         windows_batch_size: int = 1024,
         inference_windows_batch_size: int = -1,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -446,6 +448,7 @@ def __init__(
             valid_batch_size=valid_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             drop_last_loader=drop_last_loader,
diff --git a/neuralforecast/models/nbeatsx.py b/neuralforecast/models/nbeatsx.py
index 8d3543ecd..4400fa329 100644
--- a/neuralforecast/models/nbeatsx.py
+++ b/neuralforecast/models/nbeatsx.py
@@ -313,6 +313,7 @@ class NBEATSx(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int, random seed initialization for replicability.<br>
@@ -367,6 +368,7 @@ def __init__(
         windows_batch_size: int = 1024,
         inference_windows_batch_size: int = -1,
         start_padding_enabled: bool = False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -405,6 +407,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/nhits.py b/neuralforecast/models/nhits.py
index 457673d33..ec8cf089f 100644
--- a/neuralforecast/models/nhits.py
+++ b/neuralforecast/models/nhits.py
@@ -220,6 +220,7 @@ class NHITS(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>
@@ -276,6 +277,7 @@ def __init__(
         windows_batch_size: int = 1024,
         inference_windows_batch_size: int = -1,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -309,6 +311,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/nlinear.py b/neuralforecast/models/nlinear.py
index 89cbdce3b..799eaff0c 100644
--- a/neuralforecast/models/nlinear.py
+++ b/neuralforecast/models/nlinear.py
@@ -35,6 +35,7 @@ class NLinear(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -80,6 +81,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -111,6 +113,7 @@ def __init__(
             valid_batch_size=valid_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/patchtst.py b/neuralforecast/models/patchtst.py
index c0977cbbc..3a29d2b3a 100644
--- a/neuralforecast/models/patchtst.py
+++ b/neuralforecast/models/patchtst.py
@@ -850,6 +850,7 @@ class PatchTST(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>
@@ -912,6 +913,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size: int = 1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -943,6 +945,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/rmok.py b/neuralforecast/models/rmok.py
index ecf105f09..c48007cef 100644
--- a/neuralforecast/models/rmok.py
+++ b/neuralforecast/models/rmok.py
@@ -285,6 +285,7 @@ class RMoK(BaseModel):
     `windows_batch_size`: int=32, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -335,6 +336,7 @@ def __init__(
         windows_batch_size=32,
         inference_windows_batch_size=32,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -367,6 +369,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/rnn.py b/neuralforecast/models/rnn.py
index e9b4804c0..5d08328cb 100644
--- a/neuralforecast/models/rnn.py
+++ b/neuralforecast/models/rnn.py
@@ -53,6 +53,7 @@ class RNN(BaseModel):
     `windows_batch_size`: int=128, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -107,6 +108,7 @@ def __init__(
         windows_batch_size=128,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "robust",
         random_seed=1,
@@ -143,6 +145,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/softs.py b/neuralforecast/models/softs.py
index e9364758b..a2e479473 100644
--- a/neuralforecast/models/softs.py
+++ b/neuralforecast/models/softs.py
@@ -109,6 +109,7 @@ class SOFTS(BaseModel):
     `windows_batch_size`: int=32, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -159,6 +160,7 @@ def __init__(
         windows_batch_size=32,
         inference_windows_batch_size=32,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -192,6 +194,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/stemgnn.py b/neuralforecast/models/stemgnn.py
index b0cb4c9ec..8eef8e3f3 100644
--- a/neuralforecast/models/stemgnn.py
+++ b/neuralforecast/models/stemgnn.py
@@ -168,6 +168,7 @@ class StemGNN(BaseModel):
     `windows_batch_size`: int=32, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int, random_seed for pytorch initializer and numpy generators.<br>
@@ -215,6 +216,7 @@ def __init__(
         windows_batch_size=32,
         inference_windows_batch_size=32,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "robust",
         random_seed: int = 1,
@@ -249,6 +251,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/tcn.py b/neuralforecast/models/tcn.py
index 5e3226c63..af20da197 100644
--- a/neuralforecast/models/tcn.py
+++ b/neuralforecast/models/tcn.py
@@ -47,6 +47,7 @@ class TCN(BaseModel):
     `windows_batch_size`: int=128, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -96,6 +97,7 @@ def __init__(
         windows_batch_size=128,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "robust",
         random_seed: int = 1,
@@ -127,6 +129,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/tft.py b/neuralforecast/models/tft.py
index 67af0883f..5d5b62b81 100644
--- a/neuralforecast/models/tft.py
+++ b/neuralforecast/models/tft.py
@@ -546,6 +546,7 @@ class TFT(BaseModel):
     `windows_batch_size`: int=None, windows sampled from rolled data, default uses all.<br>
     `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int, random seed initialization for replicability.<br>
@@ -600,6 +601,7 @@ def __init__(
         windows_batch_size: int = 1024,
         inference_windows_batch_size: int = 1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "robust",
         random_seed: int = 1,
@@ -631,6 +633,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/tide.py b/neuralforecast/models/tide.py
index 617857c93..28f6daee3 100644
--- a/neuralforecast/models/tide.py
+++ b/neuralforecast/models/tide.py
@@ -80,6 +80,7 @@ class TiDE(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -134,6 +135,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size=1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -167,6 +169,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/timellm.py b/neuralforecast/models/timellm.py
index 2694f0885..6b392f632 100644
--- a/neuralforecast/models/timellm.py
+++ b/neuralforecast/models/timellm.py
@@ -206,6 +206,7 @@ class TimeLLM(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.<br>
     `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.<br>
@@ -267,6 +268,7 @@ def __init__(
         windows_batch_size: int = 1024,
         inference_windows_batch_size: int = 1024,
         start_padding_enabled: bool = False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         num_lr_decays: int = 0,
         early_stop_patience_steps: int = -1,
@@ -299,6 +301,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             drop_last_loader=drop_last_loader,
diff --git a/neuralforecast/models/timemixer.py b/neuralforecast/models/timemixer.py
index 6215a0dfe..76164e9be 100644
--- a/neuralforecast/models/timemixer.py
+++ b/neuralforecast/models/timemixer.py
@@ -283,6 +283,7 @@ class TimeMixer(BaseModel):
     `windows_batch_size`: int=32, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -341,6 +342,7 @@ def __init__(
         windows_batch_size=32,
         inference_windows_batch_size=32,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -373,6 +375,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/timesnet.py b/neuralforecast/models/timesnet.py
index c6ecb2af8..0fdf8926e 100644
--- a/neuralforecast/models/timesnet.py
+++ b/neuralforecast/models/timesnet.py
@@ -149,6 +149,7 @@ class TimesNet(BaseModel):
     `windows_batch_size` : int (default=64), Number of windows to sample in each training batch.<br>
     `inference_windows_batch_size` : int (default=256), Number of windows to sample in each inference batch.<br>
     `start_padding_enabled` : bool (default=False), If True, the model will pad the time series with zeros at the beginning by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size` : int (default=1), Step size between each window of temporal data.<br>
     `scaler_type` : str (default='standard'), Type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed` : int (default=1), Random_seed for pytorch initializer and numpy generators.<br>
@@ -201,6 +202,7 @@ def __init__(
         windows_batch_size=64,
         inference_windows_batch_size=256,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "standard",
         random_seed: int = 1,
@@ -232,6 +234,7 @@ def __init__(
             valid_batch_size=valid_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             drop_last_loader=drop_last_loader,
diff --git a/neuralforecast/models/timexer.py b/neuralforecast/models/timexer.py
index 9aa73fe93..671b8d060 100644
--- a/neuralforecast/models/timexer.py
+++ b/neuralforecast/models/timexer.py
@@ -163,6 +163,7 @@ class TimeXer(BaseModel):
     `windows_batch_size`: int=32, number of windows in each batch.<br>
     `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -219,6 +220,7 @@ def __init__(
         windows_batch_size=32,
         inference_windows_batch_size=32,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -252,6 +254,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/tsmixer.py b/neuralforecast/models/tsmixer.py
index 7565c18cf..dad47ee36 100644
--- a/neuralforecast/models/tsmixer.py
+++ b/neuralforecast/models/tsmixer.py
@@ -123,6 +123,7 @@ class TSMixer(BaseModel):
     `windows_batch_size`: int=32, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -174,6 +175,7 @@ def __init__(
         windows_batch_size=32,
         inference_windows_batch_size=32,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -208,6 +210,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/tsmixerx.py b/neuralforecast/models/tsmixerx.py
index 12f3de87d..1577513c9 100644
--- a/neuralforecast/models/tsmixerx.py
+++ b/neuralforecast/models/tsmixerx.py
@@ -189,6 +189,7 @@ class TSMixerx(BaseModel):
     `windows_batch_size`: int=32, number of windows to sample in each training batch. <br>
     `inference_windows_batch_size`: int=32, number of windows to sample in each inference batch, -1 uses all.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -240,6 +241,7 @@ def __init__(
         windows_batch_size=32,
         inference_windows_batch_size=32,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -274,6 +276,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             random_seed=random_seed,
diff --git a/neuralforecast/models/vanillatransformer.py b/neuralforecast/models/vanillatransformer.py
index 04d67550c..c90c15579 100644
--- a/neuralforecast/models/vanillatransformer.py
+++ b/neuralforecast/models/vanillatransformer.py
@@ -65,6 +65,7 @@ class VanillaTransformer(BaseModel):
     `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.<br>
     `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch.<br>
     `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.<br>
+    `training_data_availability_threshold`: Union[float, List[float]]=0.0, minimum fraction of valid data points required for training windows. Single float applies to both insample and outsample; list of two floats specifies [insample_fraction, outsample_fraction]. Default 0.0 allows windows with only 1 valid data point (current behavior).<br>
     `step_size`: int=1, step size between each window of temporal data.<br>
     `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
     `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
@@ -118,6 +119,7 @@ def __init__(
         windows_batch_size=1024,
         inference_windows_batch_size: int = 1024,
         start_padding_enabled=False,
+        training_data_availability_threshold=0.0,
         step_size: int = 1,
         scaler_type: str = "identity",
         random_seed: int = 1,
@@ -149,6 +151,7 @@ def __init__(
             windows_batch_size=windows_batch_size,
             inference_windows_batch_size=inference_windows_batch_size,
             start_padding_enabled=start_padding_enabled,
+            training_data_availability_threshold=training_data_availability_threshold,
             step_size=step_size,
             scaler_type=scaler_type,
             drop_last_loader=drop_last_loader,