Skip to content

Commit 2581b47

Browse files
jasonpriemclaude
andcommitted
#49.5 separate-vector-index: update ES targets to works-v33
sync_works.ipynb: ELASTIC_INDEX works-v32 → works-v33 sync_vector_index.ipynb: comment references works-v32 → works-v33 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 8814f8c commit 2581b47

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

notebooks/elastic/sync_vector_index.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@
2020
"Sync Vector Index (works-vectors-v1)\n",
2121
"\n",
2222
"Bulk loads embeddings + 14 flat filter fields into a dedicated lightweight\n",
23-
"ES index optimized for kNN vector search (12 shards vs 72 on works-v32).\n",
23+
"ES index optimized for kNN vector search (12 shards vs 72 on works-v33).\n",
2424
"\n",
25-
"Two-phase semantic search: kNN here returns IDs → mget full docs from works-v32.\n",
25+
"Two-phase semantic search: kNN here returns IDs → mget full docs from works-v33.\n",
2626
"\n",
2727
"Run modes:\n",
2828
"- is_full_sync=true: Load all ~413M embeddings (initial load, ~6-8 hours)\n",

notebooks/elastic/sync_works.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
}
4040
},
4141
"outputs": [],
42-
"source": "import uuid\nfrom datetime import datetime\nfrom pyspark.sql import functions as F\nfrom pyspark.sql.types import *\nfrom dataclasses import dataclass\n\nfrom elasticsearch import Elasticsearch, helpers\nimport logging\nimport json\n\nlogging.basicConfig(level=logging.WARNING, format='[%(asctime)s]: %(message)s')\nlog = logging.getLogger(__name__)\n\nELASTIC_INDEX = \"works-v32\"\nELASTIC_URL = dbutils.secrets.get(scope=\"elastic\", key=\"elastic_url\")\nMAX_LENGTH = 32000 # Slightly below the 32766 limit\n\nIS_FULL_SYNC = dbutils.widgets.get(\"is_full_sync\").lower() == \"true\" # default is incremental\n\nprint(f\"IS_FULL_SYNC: {IS_FULL_SYNC}\")"
42+
"source": "import uuid\nfrom datetime import datetime\nfrom pyspark.sql import functions as F\nfrom pyspark.sql.types import *\nfrom dataclasses import dataclass\n\nfrom elasticsearch import Elasticsearch, helpers\nimport logging\nimport json\n\nlogging.basicConfig(level=logging.WARNING, format='[%(asctime)s]: %(message)s')\nlog = logging.getLogger(__name__)\n\nELASTIC_INDEX = \"works-v33\"\nELASTIC_URL = dbutils.secrets.get(scope=\"elastic\", key=\"elastic_url\")\nMAX_LENGTH = 32000 # Slightly below the 32766 limit\n\nIS_FULL_SYNC = dbutils.widgets.get(\"is_full_sync\").lower() == \"true\" # default is incremental\n\nprint(f\"IS_FULL_SYNC: {IS_FULL_SYNC}\")"
4343
},
4444
{
4545
"cell_type": "code",

0 commit comments

Comments
 (0)