From 09296eeb89a900790aa8f1efd1f2761a75124ec6 Mon Sep 17 00:00:00 2001 From: blagoev Date: Tue, 7 Oct 2025 10:50:09 +0300 Subject: [PATCH] add sharded support --- pytpcc/coordinator.py | 9 ++-- pytpcc/drivers/mongodbdriver.py | 74 +++++++++++++++++++++------------ pytpcc/runtime/executor.py | 10 ++--- pytpcc/tpcc.py | 23 +++++----- pytpcc/util/nurand.py | 17 ++++---- pytpcc/util/results.py | 13 ++++-- pytpcc/worker.py | 22 +++++----- shardColl.js | 53 +++++++++++++++++++++++ shardColl.sh | 43 +++++++++++++++++++ 9 files changed, 195 insertions(+), 69 deletions(-) create mode 100644 shardColl.js create mode 100644 shardColl.sh diff --git a/pytpcc/coordinator.py b/pytpcc/coordinator.py index 412b2c2..1668170 100755 --- a/pytpcc/coordinator.py +++ b/pytpcc/coordinator.py @@ -132,6 +132,8 @@ def startExecution(scaleParameters, args, config,channels): aparser.add_argument('--clientprocs', default=1, type=int, metavar='N', help='Number of processes on each client node.') + aparser.add_argument('--samewh', default=85, type=float, metavar='PP', + help='Percent paying same warehouse') aparser.add_argument('--stop-on-error', action='store_true', help='Stop the transaction execution when the driver throws an exception.') aparser.add_argument('--no-load', action='store_true', @@ -160,7 +162,7 @@ def startExecution(scaleParameters, args, config,channels): ## Load Configuration file if args['config']: logging.debug("Loading configuration file '%s'" % args['config']) - cparser = ConfigParser() + cparser = SafeConfigParser() cparser.read(os.path.realpath(args['config'].name)) config = dict(cparser.items(args['system'])) else: @@ -171,6 +173,7 @@ def startExecution(scaleParameters, args, config,channels): config['load'] = False config['execute'] = False if config['reset']: logging.info("Reseting database") + config['warehouses'] = args['warehouses'] driver.loadConfig(config) logging.info("Initializing TPC-C benchmark using %s" % driver) @@ -208,8 +211,8 @@ def startExecution(scaleParameters, args, config,channels): if not args['no_execute']: results = startExecution(scaleParameters, args, config,channels) assert results - logging.info(results.show(load_time, driver, len(channels))) - print results.show(load_time, driver, len(channels)) + logging.info(results.show(load_time, driver, len(channels), args['samewh'])) + print(results.show(load_time, driver, len(channels), args['samewh'])) ## IF ## MAIN diff --git a/pytpcc/drivers/mongodbdriver.py b/pytpcc/drivers/mongodbdriver.py index df8fe53..9691823 100644 --- a/pytpcc/drivers/mongodbdriver.py +++ b/pytpcc/drivers/mongodbdriver.py @@ -38,14 +38,9 @@ from pprint import pformat from time import sleep import pymongo -from pymongo.client_session import TransactionOptions - -# Import TransactionOptions from pymongo.client_session or -# pymongo.synchronous.client_session depending on the version of pymongo -from pymongo.client_session import TransactionOptions import constants -from .abstractdriver import AbstractDriver +from abstractdriver import AbstractDriver TABLE_COLUMNS = { constants.TABLENAME_ITEM: [ @@ -206,7 +201,8 @@ class MongodbDriver(AbstractDriver): "secondary_reads": ("If true, we will allow secondary reads", True), "retry_writes": ("If true, we will enable retryable writes", True), "causal_consistency": ("If true, we will perform causal reads ", True), - "shards": ("If >1 then sharded", "1") + "no_global_items": ("If true, we will have use only one 'unsharded' items collection", False), + "shards": ("If >0 then sharded", "0") } DENORMALIZED_TABLES = [ constants.TABLENAME_ORDERS, @@ -237,7 +233,9 @@ def __init__(self, ddl): self.output = open('results.json','a') self.result_doc = {} self.warehouses = 0 - self.shards = 1 + self.no_global_items = False + self.shards = 0 + self.sshost = None ## Create member mapping to collections for name in constants.ALL_TABLES: @@ -270,6 +268,7 @@ def loadConfig(self, config): self.warehouses = config['warehouses'] self.find_and_modify = config['findandmodify'] == 'True' self.causal_consistency = config['causal_consistency'] == 'True' + self.no_global_items = config['no_global_items'] == 'True' self.retry_writes = config['retry_writes'] == 'True' self.secondary_reads = config['secondary_reads'] == 'True' self.agg = config['agg'] == 'True' @@ -305,12 +304,19 @@ def loadConfig(self, config): real_uri = uri[0:pindex]+userpassword+uri[pindex:] display_uri = uri[0:pindex]+usersecret+uri[pindex:] + # for extra URL to mongos + if userpassword == "" and ':' in uri[pindex:] and '@' in uri[pindex:]: + at = uri.index('@',pindex) + userpassword = uri[(pindex):(at+1)] self.client = pymongo.MongoClient(real_uri, retryWrites=self.retry_writes, readPreference=self.read_preference, readConcernLevel=self.read_concern) self.result_doc['before']=self.get_server_status() + ssURI="mongodb://"+userpassword+self.result_doc['before']['host']+"/test?ssl=true&authSource=admin" + logging.debug("%s %s %s", userpassword, self.result_doc['before']['host'], ssURI) + self.sshost = pymongo.MongoClient(ssURI) # set default writeConcern on the database self.database = self.client.get_database(name=str(config['name']), write_concern=self.write_concern) @@ -402,10 +408,11 @@ def loadTuples(self, tableName, tuples): else: if tableName == constants.TABLENAME_ITEM: tuples3 = [] - if self.shards > 1: - ww = range(1,self.warehouses+1) + if self.shards > 0: + ww = range(1,self.warehouses+1, int(self.warehouses/self.shards)) else: ww = [0] + # print self.shards, self.warehouses, ww for t in tuples: for w in ww: t2 = list(t) @@ -415,18 +422,23 @@ def loadTuples(self, tableName, tuples): for t in tuples: tuple_dicts.append(dict([(columns[i], t[i]) for i in num_columns])) ## FOR - self.database[tableName].insert_many(tuple_dicts) + + self.database[tableName].insert_many(tuple_dicts, ordered=False) ## IF return def loadFinishDistrict(self, w_id, d_id): + logging.debug("LoadFinishDistrict") if self.denormalize: logging.debug("Pushing %d denormalized ORDERS records for WAREHOUSE %d DISTRICT %d into MongoDB", len(self.w_orders), w_id, d_id) - self.database[constants.TABLENAME_ORDERS].insert_many(self.w_orders.values()) + self.database[constants.TABLENAME_ORDERS].insert_many(self.w_orders.values(), ordered=False) self.w_orders.clear() ## IF + def loadFinish(self): + logging.debug("load finish: ") + def executeStart(self): """Optional callback before the execution for each client starts""" return None @@ -614,8 +626,10 @@ def _doNewOrderTxn(self, s, params): d_next_o_id = d["D_NEXT_O_ID"] # fetch matching items and see if they are all valid - if self.shards > 1: i_w_id = w_id + if self.shards > 0: i_w_id = w_id-(w_id-1)%(self.warehouses/self.shards) # get_i_w(w_id) else: i_w_id = 0 + if self.no_global_items: + i_w_id = 1 items = list(self.item.find({"I_ID": {"$in": i_ids}, "I_W_ID": i_w_id, "$comment": comment}, {"_id":0, "I_ID": 1, "I_PRICE": 1, "I_NAME": 1, "I_DATA": 1}, session=s)) @@ -628,8 +642,7 @@ def _doNewOrderTxn(self, s, params): #print constants.INVALID_ITEM_MESSAGE + ", Aborting transaction (ok for 1%)" return None ## IF - xxi_ids = tuple(map(lambda o: o['I_ID'], items)) - items = sorted(items, key=lambda x: xxi_ids.index(x['I_ID'])) + items = sorted(items, key=lambda x: i_ids.index(x['I_ID'])) # getWarehouseTaxRate w = self.warehouse.find_one({"W_ID": w_id, "$comment": comment}, {"_id":0, "W_TAX": 1}, session=s) @@ -684,8 +697,7 @@ def _doNewOrderTxn(self, s, params): session=s)) ## IF assert len(all_stocks) == ol_cnt, "all_stocks len %d != ol_cnt %d" % (len(all_stocks), ol_cnt) - xxxi_ids = tuple(map(lambda o: (o['S_I_ID'], o['S_W_ID']), all_stocks)) - all_stocks = sorted(all_stocks, key=lambda x: xxxi_ids.index((x['S_I_ID'], x["S_W_ID"]))) + all_stocks = sorted(all_stocks, key=lambda x: item_w_list.index((x['S_I_ID'], x["S_W_ID"]))) ## ---------------- ## Insert Order Line, Stock Item Information @@ -784,7 +796,7 @@ def _doNewOrderTxn(self, s, params): if self.batch_writes: if not self.denormalize: - self.order_line.insert_many(order_line_writes, session=s) + self.order_line.insert_many(order_line_writes, ordered=False, session=s) self.stock.bulk_write(stock_writes, session=s) ## IF @@ -936,7 +948,7 @@ def _doPaymentTxn(self, s, params): session=s) ## IF - search_fields = {"C_W_ID": w_id, "C_D_ID": d_id, "$comment": comment} + search_fields = {"C_W_ID": c_w_id, "C_D_ID": c_d_id, "$comment": comment} return_fields = {"C_BALANCE": 0, "C_YTD_PAYMENT": 0, "C_PAYMENT_CNT": 0} if c_id != None: @@ -1084,9 +1096,9 @@ def _doStockLevelTxn(self, s, params): ol_ids.add(ol["OL_I_ID"]) ## FOR - result = self.stock.count_documents({"S_W_ID": w_id, + result = self.stock.find({"S_W_ID": w_id, "S_I_ID": {"$in": list(ol_ids)}, - "S_QUANTITY": {"$lt": threshold}, "$comment": comment}) + "S_QUANTITY": {"$lt": threshold}, "$comment": comment}).count() return int(result) @@ -1115,7 +1127,7 @@ def run_transaction(self, txn_callback, session, name, params): # Should we retry txns within the same session or start a new one? def run_transaction_with_retries(self, txn_callback, name, params): txn_retry_counter = 0 - to = TransactionOptions( + to = pymongo.client_session.TransactionOptions( read_concern=None, #read_concern=pymongo.read_concern.ReadConcern("snapshot"), write_concern=self.write_concern, @@ -1137,8 +1149,12 @@ def run_transaction_with_retries(self, txn_callback, name, params): sleep(txn_retry_counter * .1) logging.debug("txn retry number for %s: %d", name, txn_retry_counter) ## WHILE - def get_server_status(self): - ss=self.client.admin.command('serverStatus') + + def get_server_status(self, otherClient=None): + if otherClient and self.sshost: + ss=self.sshost.admin.command('serverStatus') + else: + ss=self.client.admin.command('serverStatus') if "$configServerState" in ss: del ss["$configServerState"] if "$gleStats" in ss: @@ -1157,8 +1173,12 @@ def get_server_status(self): def save_result(self, result_doc): self.result_doc.update(result_doc) - self.result_doc['after']=self.get_server_status() - # saving test results and server statuses ('before' and 'after') into MongoDB as a single document - self.client.test.results.insert_one(self.result_doc) + self.result_doc['after']=self.get_server_status(self.sshost) + # save cache size, instance type, version + self.result_doc['version']=self.result_doc['after']['version'][0:3] +# {$trunc:{$divide:["$before.wiredTiger.cache.maximum bytes configured",1024*1024*1024]}},72]}}, {$set:{cacheGB:NumberLong(72) + #self.result_doc['cacheGB']=int(self.result_doc['after']['wiredTiger']['cache']['maximum bytes configured']/1073741824) + #self.result_doc['instance']={18:"M50",36:"M60",72:"M80"}.get(self.result_doc['cacheGB'], 'unknown') + self.client.test.results.save(self.result_doc) ## CLASS diff --git a/pytpcc/runtime/executor.py b/pytpcc/runtime/executor.py index c065e93..be5a5a9 100644 --- a/pytpcc/runtime/executor.py +++ b/pytpcc/runtime/executor.py @@ -44,10 +44,11 @@ class Executor: - def __init__(self, driver, scaleParameters, stop_on_error = False): + def __init__(self, driver, scaleParameters, stop_on_error = False, sameWH = 85): self.driver = driver self.scaleParameters = scaleParameters self.stop_on_error = stop_on_error + self.same_wh = sameWH ## DEF def execute(self, duration): @@ -76,8 +77,7 @@ def execute(self, duration): batch_result.abortTransaction(batch_txn_id) if self.stop_on_error: raise continue - - # This will happen on all failing 1% of the transactions + if val is None: global_result.abortTransaction(global_txn_id, retries) batch_result.abortTransaction(batch_txn_id, retries) @@ -86,7 +86,7 @@ def execute(self, duration): batch_result.stopTransaction(batch_txn_id, retries) global_result.stopTransaction(global_txn_id, retries) - if time.time() - start_batch > 900: # every 15 minutes + if time.time() - start_batch > 1800: # every 30 minutes batch_result.stopBenchmark() logging.info(batch_result.show()) batch_result = results.Results() @@ -221,7 +221,7 @@ def generatePaymentParams(self): h_date = datetime.now() ## 85%: paying through own warehouse (or there is only 1 warehouse) - if self.scaleParameters.warehouses == 1 or x <= 85: + if self.scaleParameters.warehouses == 1 or x <= self.same_wh: c_w_id = w_id c_d_id = d_id ## 15%: paying through another warehouse: diff --git a/pytpcc/tpcc.py b/pytpcc/tpcc.py index 98a8885..3a243e2 100755 --- a/pytpcc/tpcc.py +++ b/pytpcc/tpcc.py @@ -36,7 +36,8 @@ import time import multiprocessing import subprocess -from configparser import ConfigParser +import random +from ConfigParser import SafeConfigParser from pprint import pprint, pformat from util import results, scaleparameters @@ -99,7 +100,10 @@ def getDrivers(): ## DEF ## ============================================== -## startLoading +## startLoading. +# This intentionally uses multiprocess pool and intentionally stats new processes for each batch +# becuase for long running, many hour long loads, the connection between the child process and the parent process is lost +# and the parent block indefinitelly waiting for the result. ## ============================================== def startLoading(driverClass, scaleParameters, args, config): """ @@ -199,10 +203,7 @@ def startExecution(driverClass, scaleParameters, args, config): logging.debug("Creating client pool with %d processes", args['clients']) pool = multiprocessing.Pool(args['clients']) debug = logging.getLogger().isEnabledFor(logging.DEBUG) - try: - del args['config'] - except KeyError: - print() + worker_results = [] for _ in range(args['clients']): r = pool.apply_async(executorFunc, (driverClass, scaleParameters, args, config, debug,)) @@ -236,7 +237,7 @@ def executorFunc(driverClass, scaleParameters, args, config, debug): config['reset'] = False driver.loadConfig(config) - e = executor.Executor(driver, scaleParameters, stop_on_error=args['stop_on_error']) + e = executor.Executor(driver, scaleParameters, stop_on_error=args['stop_on_error'], sameWH=args['samewh']) driver.executeStart() results = e.execute(args['duration']) driver.executeFinish() @@ -257,6 +258,8 @@ def executorFunc(driverClass, scaleParameters, args, config, debug): help='Instruct the driver to reset the contents of the database') aparser.add_argument('--scalefactor', default=1, type=float, metavar='SF', help='Benchmark scale factor') + aparser.add_argument('--samewh', default=85, type=float, metavar='PP', + help='Percent paying same warehouse') aparser.add_argument('--warehouses', default=4, type=int, metavar='W', help='Number of Warehouses') aparser.add_argument('--duration', default=60, type=int, metavar='D', @@ -295,7 +298,7 @@ def executorFunc(driverClass, scaleParameters, args, config, debug): ## Load Configuration file if args['config']: logging.debug("Loading configuration file '%s'", args['config']) - cparser = ConfigParser() + cparser = SafeConfigParser() cparser.read(os.path.realpath(args['config'].name)) config = dict(cparser.items(args['system'])) else: @@ -342,7 +345,7 @@ def executorFunc(driverClass, scaleParameters, args, config, debug): if not args['no_execute']: noftifyDsiOfPhaseStart("TPC-C_workload") if args['clients'] == 1: - e = executor.Executor(driver, scaleParameters, stop_on_error=args['stop_on_error']) + e = executor.Executor(driver, scaleParameters, stop_on_error=args['stop_on_error'], sameWH=args['samewh']) driver.executeStart() results = e.execute(args['duration']) driver.executeFinish() @@ -351,7 +354,7 @@ def executorFunc(driverClass, scaleParameters, args, config, debug): assert results, "No results from execution for %d client!" % args['clients'] logging.info("Final Results") logging.info("Threads: %d", args['clients']) - logging.info(results.show(load_time, driver, args['clients'])) + logging.info(results.show(load_time, driver, args['clients'], args['samewh'])) noftifyDsiOfPhaseEnd("TPC-C_workload") ## IF diff --git a/pytpcc/util/nurand.py b/pytpcc/util/nurand.py index 09fa55c..361cec7 100644 --- a/pytpcc/util/nurand.py +++ b/pytpcc/util/nurand.py @@ -29,14 +29,13 @@ # OTHER DEALINGS IN THE SOFTWARE. # ----------------------------------------------------------------------- -import random +import rand def makeForLoad(): """Create random NURand constants, appropriate for loading the database.""" - cLast = random.randint(0, 255) - cId = random.randint(0, 1023) - orderLineItemId = random.randint(0, 8191) - return NURandC(cLast, cId, orderLineItemId) + cLast = rand.number(0, 255) + cId = rand.number(0, 1023) + orderLineItemId = rand.number(0, 8191) return NURandC(cLast, cId, orderLineItemId) def validCRun(cRun, cLoad): @@ -46,13 +45,13 @@ def validCRun(cRun, cLoad): def makeForRun(loadC): """Create random NURand constants for running TPC-C. TPC-C 2.1.6.1. (page 20) specifies the valid range for these constants.""" - cRun = random.randint(0, 255) + cRun = rand.number(0, 255) while validCRun(cRun, loadC.cLast) == False: - cRun = random.randint(0, 255) + cRun = rand.number(0, 255) assert validCRun(cRun, loadC.cLast) - cId = random.randint(0, 1023) - orderLineItemId = random.randint(0, 8191) + cId = rand.number(0, 1023) + orderLineItemId = rand.number(0, 8191) return NURandC(cRun, cId, orderLineItemId) class NURandC: diff --git a/pytpcc/util/results.py b/pytpcc/util/results.py index ff24f9f..248491f 100644 --- a/pytpcc/util/results.py +++ b/pytpcc/util/results.py @@ -26,6 +26,7 @@ import logging import time +import os from collections import Counter class Results: @@ -142,7 +143,7 @@ def append(self, r): def __str__(self): return self.show() - def show(self, load_time=None, driver=None, threads=1): + def show(self, load_time=None, driver=None, threads=1, samewh=85): if not self.start: return "Benchmark not started" if not self.stop: @@ -223,15 +224,19 @@ def show(self, load_time=None, driver=None, threads=1): result_doc['batch_writes'] = driver.batch_writes result_doc['find_and_modify'] = driver.find_and_modify result_doc['read_preference'] = driver.read_preference - result_doc['write_concern'] = driver.write_concern.document['w'] + result_doc['write_concern'] = str(driver.write_concern.document['w']) result_doc['causal'] = driver.causal_consistency + result_doc['no_global_items'] = driver.no_global_items result_doc['all_in_one_txn'] = driver.all_in_one_txn result_doc['retry_writes'] = driver.retry_writes result_doc['read_concern'] = driver.read_concern + result_doc['shards'] = driver.shards result_doc['total_retries'] = total_retries + result_doc['samewh'] = samewh result_doc['total'] = total_cnt result_doc['aborts'] = total_aborts - ret += "\n%s TpmC for %s %s thr %s txn %d WH: %d %d total %d durSec, batch %s %d retries %s%% %s fnM %s p50 %s p75 %s p90 %s p95 %s p99 %s max %s WC %s causal %s 10in1 %s retry %s %d %d" % ( + result_doc['instance'] = os.getenv('INSTANCE') + ret += "\n%s TpmC for %s %s thr %s txn %d WH: %d %d total %d durSec, batch %s %d retries %s%% %s fnM %s p50 %s p75 %s p90 %s p95 %s p99 %s max %s WC %s causal %s 10in1 %s retry %s %d %d correct %d noGlobalItems %s" % ( time.strftime("%Y-%m-%d %H:%M:%S"), ("normal", "denorm")[driver.denormalize], threads, @@ -246,7 +251,7 @@ def show(self, load_time=None, driver=None, threads=1): u"%6.2f" % (1000*lat[int(samples/100.0*99)]), u"%6.2f" % (1000.0*lat[-1]), str(driver.write_concern), ('false', 'true')[driver.causal_consistency], - ('false', 'true')[driver.all_in_one_txn], ('false', 'true')[driver.retry_writes],total_cnt,total_aborts) + ('false', 'true')[driver.all_in_one_txn], ('false', 'true')[driver.retry_writes],total_cnt,total_aborts, samewh, ('false', 'true')[driver.no_global_items]) driver.save_result(result_doc) print(result_doc) # PostgreSQL driver returns a shorter version of the summary without extra configuration data diff --git a/pytpcc/worker.py b/pytpcc/worker.py index d689307..6a3e55e 100755 --- a/pytpcc/worker.py +++ b/pytpcc/worker.py @@ -75,7 +75,7 @@ def loaderFunc(driverClass, scaleParameters, args, config, w_ids, debug): driver.loadFinish() except KeyboardInterrupt: return -1 - except (Exception, AssertionError), ex: + except (Exception, AssertionError) as ex: logging.warn("Failed to load data: %s" % (ex)) #if debug: traceback.print_exc(file=sys.stdout) @@ -96,7 +96,7 @@ def executorFunc(driverClass, scaleParameters, args, config, debug): config['reset'] = False driver.loadConfig(config) - e = executor.Executor(driver, scaleParameters, stop_on_error=args['stop_on_error']) + e = executor.Executor(driver, scaleParameters, stop_on_error=args['stop_on_error'], sameWH=args['samewh']) driver.executeStart() results = e.execute(args['duration']) driver.executeFinish() @@ -116,14 +116,14 @@ def executorFunc(driverClass, scaleParameters, args, config, debug): w_ids=command.data[3] ## Create a handle to the target client driver at the client side - driverClass = createDriverClass(args['system']) - assert driverClass != None, "Failed to find '%s' class" % args['system'] - driver = driverClass(args['ddl']) - assert driver != None, "Failed to create '%s' driver" % args['system'] + driverClass = createDriverClass(args['system']) + assert driverClass != None, "Failed to find '%s' class" % args['system'] + driver = driverClass(args['ddl']) + assert driver != None, "Failed to create '%s' driver" % args['system'] - loaderFunc(driverClass,scaleParameters,args,config,w_ids,True) + loaderFunc(driverClass,scaleParameters,args,config,w_ids,True) m=message.Message(header=message.LOAD_COMPLETED) - channel.send(pickle.dumps(m,-1)) + channel.send(pickle.dumps(m,-1)) elif command.header==message.CMD_EXECUTE: scaleParameters=command.data[0] args=command.data[1] @@ -136,9 +136,9 @@ def executorFunc(driverClass, scaleParameters, args, config, debug): driver = driverClass(args['ddl']) assert driver != None, "Failed to create '%s' driver" % args['system'] - results=executorFunc(driverClass,scaleParameters,args,config,True) - m=message.Message(header=message.EXECUTE_COMPLETED,data=results) - channel.send(pickle.dumps(m,-1)) + results=executorFunc(driverClass,scaleParameters,args,config,True) + m=message.Message(header=message.EXECUTE_COMPLETED,data=results) + channel.send(pickle.dumps(m,-1)) elif command.header==message.CMD_STOP: pass diff --git a/shardColl.js b/shardColl.js new file mode 100644 index 0000000..1206f03 --- /dev/null +++ b/shardColl.js @@ -0,0 +1,53 @@ +// Run this before loading the data +sh.setBalancerState(false); +db.getSiblingDB("_DBNAME_").dropDatabase(); +sleep(10000); +sh.enableSharding("_DBNAME_"); +sh.shardCollection("_DBNAME_.ITEM",{"I_W_ID":1, "I_ID":1},true); +db.getSiblingDB("_DBNAME_").WAREHOUSE.createIndex({"W_ID":1, "W_TAX":1}, {unique:true}); +sh.shardCollection("_DBNAME_.WAREHOUSE",{"W_ID":1}); +db.getSiblingDB("_DBNAME_").DISTRICT.createIndex({"D_W_ID":1, "D_ID":1,"D_NEXT_O_ID" : 1,"D_TAX":1}, {unique:true}); +sh.shardCollection("_DBNAME_.DISTRICT",{"D_W_ID":1,"D_ID":1}); +sh.shardCollection("_DBNAME_.CUSTOMER", {"C_W_ID":1, "C_D_ID":1, "C_ID":1}, true); +sh.shardCollection("_DBNAME_.HISTORY", {"H_W_ID":1}); +sh.shardCollection("_DBNAME_.STOCK", {"S_W_ID":1, "S_I_ID":1},true); +db.getSiblingDB("_DBNAME_").NEW_ORDER.createIndex({"NO_W_ID":1, "NO_D_ID":1, "NO_O_ID":1}, {unique:true}); +sh.shardCollection("_DBNAME_.NEW_ORDER", {"NO_W_ID":1, "NO_D_ID":1}); +db.getSiblingDB("_DBNAME_").ORDERS.createIndex({"O_W_ID":1, "O_D_ID":1, "O_ID":1, "O_C_ID":1}, {unique: true} ); +sh.shardCollection("_DBNAME_.ORDERS", {"O_W_ID":1, "O_D_ID":1, "O_ID":1}); +// this is for 6 WH on 3 shards +// make sure that number of WH is a multiple of number of shards +// nWH/nshards=whole number (2 here) +var numShards = _SHARDS_; +if (! numShards >= 0) numShards = db.getSiblingDB("config").shards.count(); +var shards=db.getSiblingDB("config").shards.distinct("_id"); +var numWH = _NUMWAREHOUSES_; /* must be multiple of 3 */ +var whPerShard= numWH/numShards; +print(whPerShard, numShards, numWH); +// do splits +for (i=1+whPerShard; i shardTemp.js +$MONGO $MONGOURI shardTemp.js +echo "Ran shardColl script with $1 $2 $3 - ready to load"