diff --git a/notebooks/instructlab-knowledge/instructlab-knowledge.ipynb b/notebooks/instructlab-knowledge/instructlab-knowledge.ipynb index 7ebde02..61cabfb 100644 --- a/notebooks/instructlab-knowledge/instructlab-knowledge.ipynb +++ b/notebooks/instructlab-knowledge/instructlab-knowledge.ipynb @@ -260,7 +260,9 @@ "source": [ "### View the Chunks\n", "\n", - "To view the chunks, run through the following cell. As you can see the document is broken into small pieces with metadata about the chunk based on the document's format" + "To view a random sample of the generated chunks, run through the following cell. Re-run the cell to see another random sample. If there are chunking issues, try different conversion and/or chunking configurations.\n", + "\n", + "As you can see, the document is broken into small pieces with metadata about the chunk based on the document's format." ] }, { @@ -270,8 +272,12 @@ "metadata": {}, "outputs": [], "source": [ - "#print(all_chunks)\n", - "print(chunks[0])" + "import random\n", + "import json\n", + "\n", + "sample = random.sample(all_chunks, min(len(all_chunks), 5))\n", + "for chunk in sample:\n", + " print(f\"{chunk['chunk']}\\n\\n====\\n\")" ] }, { @@ -681,7 +687,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -695,7 +701,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.9" } }, "nbformat": 4,