|
12 | 12 | import os |
13 | 13 | from abc import ABC, abstractmethod |
14 | 14 |
|
15 | | -from opensearchpy import OpenSearchException |
| 15 | +import opensearchpy.exceptions |
16 | 16 |
|
| 17 | +from osbenchmark import exceptions |
17 | 18 | from osbenchmark.utils import console |
18 | 19 | from osbenchmark.workload_generator.config import CustomWorkload |
19 | 20 |
|
@@ -41,8 +42,10 @@ def extract_indices(self, workload_path): |
41 | 42 | try: |
42 | 43 | for index in self.custom_workload.indices: |
43 | 44 | extracted_indices += self.extract(workload_path, index.name) |
44 | | - except OpenSearchException: |
45 | | - self.logger("Failed at extracting index [%s]", index) |
| 45 | + except opensearchpy.exceptions.NotFoundError: |
| 46 | + raise exceptions.SystemSetupError(f"Index [{index.name}] does not exist.") |
| 47 | + except opensearchpy.OpenSearchException: |
| 48 | + self.logger.error("Failed at extracting index [%s]", index) |
46 | 49 | failed_indices += index |
47 | 50 |
|
48 | 51 | return extracted_indices, failed_indices |
@@ -138,6 +141,9 @@ def extract_documents(self, index, documents_limit=None): |
138 | 141 |
|
139 | 142 |
|
140 | 143 | class SequentialCorpusExtractor(CorpusExtractor): |
| 144 | + DEFAULT_TEST_MODE_DOC_COUNT = 1000 |
| 145 | + DEFAULT_TEST_MODE_SUFFIX = "-1k" |
| 146 | + |
141 | 147 | def __init__(self, custom_workload, client): |
142 | 148 | self.custom_workload: CustomWorkload = custom_workload |
143 | 149 | self.client = client |
@@ -173,15 +179,30 @@ def extract_documents(self, index, documents_limit=None): |
173 | 179 |
|
174 | 180 | documents_to_extract = total_documents if not documents_limit else min(total_documents, documents_limit) |
175 | 181 |
|
| 182 | + if documents_limit: |
| 183 | + # Only time when documents-1k.json will be less than 1K documents is |
| 184 | + # when the documents_limit is < 1k documents or source index has less than 1k documents |
| 185 | + if documents_limit < self.DEFAULT_TEST_MODE_DOC_COUNT: |
| 186 | + test_mode_warning_msg = "Due to --number-of-docs set by user, " + \ |
| 187 | + f"test-mode docs will be less than the default {self.DEFAULT_TEST_MODE_DOC_COUNT} documents." |
| 188 | + console.warn(test_mode_warning_msg) |
| 189 | + |
| 190 | + # Notify users when they specified more documents than available in index |
| 191 | + if documents_limit > total_documents: |
| 192 | + documents_to_extract_warning_msg = f"User requested extraction of {documents_limit} documents " + \ |
| 193 | + f"but there are only {total_documents} documents in {index}. " + \ |
| 194 | + f"Will only extract {total_documents} documents from {index}." |
| 195 | + console.warn(documents_to_extract_warning_msg) |
| 196 | + |
176 | 197 | if documents_to_extract > 0: |
177 | 198 | logger.info("[%d] total docs in index [%s]. Extracting [%s] docs.", total_documents, index, documents_to_extract) |
178 | 199 | docs_path = self._get_doc_outpath(self.custom_workload.workload_path, index) |
179 | 200 | # Create test mode corpora |
180 | 201 | self.dump_documents( |
181 | 202 | self.client, |
182 | 203 | index, |
183 | | - self._get_doc_outpath(self.custom_workload.workload_path, index, "-1k"), |
184 | | - min(documents_to_extract, 1000), |
| 204 | + self._get_doc_outpath(self.custom_workload.workload_path, index, self.DEFAULT_TEST_MODE_SUFFIX), |
| 205 | + min(documents_to_extract, self.DEFAULT_TEST_MODE_DOC_COUNT), |
185 | 206 | " for test mode") |
186 | 207 | # Create full corpora |
187 | 208 | self.dump_documents(self.client, index, docs_path, documents_to_extract) |
|
0 commit comments