|
25 | 25 | " or sign in to your existing Atlas account.\n",
|
26 | 26 | "\n",
|
27 | 27 | "2. [Follow the instructions](https://www.mongodb.com/docs/atlas/tutorial/deploy-free-tier-cluster/)\n",
|
28 |
| - " (select Atlas UI as the procedure) to deploy your first cluster.\n", |
| 28 | + " (select Atlas UI as the procedure) to deploy your first cluster, which distributes your data across multiple servers for improved performance and redundancy.\n", |
29 | 29 | "\n",
|
30 |
| - "3. Create the database: `movies`.\n", |
| 30 | + "3. Create the database: `knowledge_base`, and collection `research_papers`\n", |
31 | 31 | "\n",
|
32 | 32 | "\n"
|
33 | 33 | ]
|
|
81 | 81 | "# Load Dataset\n",
|
82 | 82 | "from datasets import load_dataset\n",
|
83 | 83 | "import pandas as pd\n",
|
| 84 | + "import os\n", |
84 | 85 | "\n",
|
85 | 86 | "# Make sure you have an Hugging Face token(HF_TOKEN) in your development environemnt before runing the code below\n",
|
86 | 87 | "# How to get a token: https://huggingface.co/docs/hub/en/security-tokens\n",
|
87 | 88 | "# Dataset Location: https://huggingface.co/datasets/MongoDB/subset_arxiv_papers_with_embeddings\n",
|
| 89 | + "os.environ[\"HF_TOKEN\"] = \"place_hugging_face_access_token here\" # Do not use this in production environment, use a .env file instead\n", |
| 90 | + "\n", |
88 | 91 | "dataset = load_dataset(\"MongoDB/subset_arxiv_papers_with_embeddings\")\n",
|
89 | 92 | "\n",
|
90 | 93 | "# Convert the dataset to a pandas dataframe\n",
|
|
662 | 665 | "mongo_client = get_mongo_client(mongo_uri)\n",
|
663 | 666 | "\n",
|
664 | 667 | "# Ingest data into MongoDB\n",
|
665 |
| - "db = mongo_client['movies']\n", |
666 |
| - "collection = db['movie_collection_2']" |
| 668 | + "db = mongo_client['knowledge_base']\n", |
| 669 | + "collection = db['research_papers']" |
667 | 670 | ]
|
668 | 671 | },
|
669 | 672 | {
|
|
0 commit comments