Update fine-tuning-paligemma.ipynb

joefernandez · web-flow · commit eabab9fa9c7a · 2024-12-12T10:19:51.000-08:00
Removing second person ("we" language) that is causing lint error and failure to sync with DevSite.
diff --git a/site/en/gemma/docs/paligemma/fine-tuning-paligemma.ipynb b/site/en/gemma/docs/paligemma/fine-tuning-paligemma.ipynb
@@ -365,7 +365,7 @@
       "source": [
         "# Define model\n",
         "\n",
-        "# IMPORTANT: Gemma-2 has a \"final_logits_softcap\" property, we set it to 0.0\n",
+        "# IMPORTANT: Gemma-2 has a \"final_logits_softcap\" property. Set it to 0.0\n",
         "# for better transfer results.\n",
         "model_config = ml_collections.FrozenConfigDict({\n",
         "    \"llm\": {\"vocab_size\": 257_152, \"variant\": LLM_VARIANT, \"final_logits_softcap\": 0.0},\n",
@@ -434,7 +434,7 @@
         "\n",
         "# Loading all params in simultaneous - albeit much faster and more succinct -\n",
         "# requires more RAM than the T4 colab runtimes have by default.\n",
-        "# Instead we do it param by param.\n",
+        "# Instead, do it param by param.\n",
         "params, treedef = jax.tree.flatten(params)\n",
         "sharding_leaves = jax.tree.leaves(params_sharding)\n",
         "trainable_leaves = jax.tree.leaves(trainable_mask)\n",
@@ -708,7 +708,7 @@
         "    targets = jax.nn.one_hot(txts[:, 1:], text_logits.shape[-1])\n",
         "\n",
         "    # Compute the loss per example. i.e. the mean of per token pplx.\n",
-        "    # Since each example has a different number of tokens we normalize it.\n",
+        "    # Since each example has a different number of tokens, normalize it.\n",
         "    token_pplx = jnp.sum(logp * targets, axis=-1)  # sum across vocab_size.\n",
         "    example_loss = -jnp.sum(token_pplx * mask_loss, axis=-1)  # sum across seq_len.\n",
         "    example_loss /= jnp.clip(jnp.sum(mask_loss, -1), 1)  # weight by num of tokens.\n",