mongodb-labs · blagoev · Oct 7, 2025
diff --git a/pytpcc/coordinator.py b/pytpcc/coordinator.py
@@ -132,6 +132,8 @@ def startExecution(scaleParameters, args, config,channels):
     aparser.add_argument('--clientprocs', default=1, type=int, metavar='N',
                          help='Number of processes on each client node.')
 
+    aparser.add_argument('--samewh', default=85, type=float, metavar='PP',
+                         help='Percent paying same warehouse')
     aparser.add_argument('--stop-on-error', action='store_true',
                          help='Stop the transaction execution when the driver throws an exception.')
     aparser.add_argument('--no-load', action='store_true',
@@ -160,7 +162,7 @@ def startExecution(scaleParameters, args, config,channels):
     ## Load Configuration file
     if args['config']:
         logging.debug("Loading configuration file '%s'" % args['config'])
-        cparser = ConfigParser()
+        cparser = SafeConfigParser()
         cparser.read(os.path.realpath(args['config'].name))
         config = dict(cparser.items(args['system']))
     else:
@@ -171,6 +173,7 @@ def startExecution(scaleParameters, args, config,channels):
     config['load'] = False
     config['execute'] = False
     if config['reset']: logging.info("Reseting database")
+    config['warehouses'] = args['warehouses']
     driver.loadConfig(config)
     logging.info("Initializing TPC-C benchmark using %s" % driver)
 
@@ -208,8 +211,8 @@ def startExecution(scaleParameters, args, config,channels):
     if not args['no_execute']:
         results = startExecution(scaleParameters, args, config,channels)
         assert results
-        logging.info(results.show(load_time, driver, len(channels)))
-        print results.show(load_time, driver, len(channels))
+        logging.info(results.show(load_time, driver, len(channels), args['samewh']))
+        print(results.show(load_time, driver, len(channels), args['samewh']))
     ## IF
 
 ## MAIN
diff --git a/pytpcc/drivers/mongodbdriver.py b/pytpcc/drivers/mongodbdriver.py
@@ -38,14 +38,9 @@
 from pprint import pformat
 from time import sleep
 import pymongo
-from pymongo.client_session import TransactionOptions
-
-# Import TransactionOptions from pymongo.client_session or
-# pymongo.synchronous.client_session depending on the version of pymongo
-from pymongo.client_session import TransactionOptions
 
 import constants
-from .abstractdriver import AbstractDriver
+from abstractdriver import AbstractDriver
 
 TABLE_COLUMNS = {
     constants.TABLENAME_ITEM: [
@@ -206,7 +201,8 @@ class MongodbDriver(AbstractDriver):
         "secondary_reads":  ("If true, we will allow secondary reads", True),
         "retry_writes":     ("If true, we will enable retryable writes", True),
         "causal_consistency":  ("If true, we will perform causal reads ", True),
-        "shards":          ("If >1 then sharded", "1")
+        "no_global_items":  ("If true, we will have use only one 'unsharded' items collection", False),
+        "shards":           ("If >0 then sharded", "0")
     }
     DENORMALIZED_TABLES = [
         constants.TABLENAME_ORDERS,
@@ -237,7 +233,9 @@ def __init__(self, ddl):
         self.output = open('results.json','a')
         self.result_doc = {}
         self.warehouses = 0
-        self.shards = 1
+        self.no_global_items = False
+        self.shards = 0
+        self.sshost = None
 
         ## Create member mapping to collections
         for name in constants.ALL_TABLES:
@@ -270,6 +268,7 @@ def loadConfig(self, config):
         self.warehouses = config['warehouses']
         self.find_and_modify = config['findandmodify'] == 'True'
         self.causal_consistency = config['causal_consistency'] == 'True'
+        self.no_global_items = config['no_global_items'] == 'True'
         self.retry_writes = config['retry_writes'] == 'True'
         self.secondary_reads = config['secondary_reads'] == 'True'
         self.agg = config['agg'] == 'True'
@@ -305,12 +304,19 @@ def loadConfig(self, config):
         real_uri = uri[0:pindex]+userpassword+uri[pindex:]
         display_uri = uri[0:pindex]+usersecret+uri[pindex:]
 
+        # for extra URL to mongos
+        if userpassword == "" and ':' in uri[pindex:] and '@' in uri[pindex:]:
+            at = uri.index('@',pindex)
+            userpassword = uri[(pindex):(at+1)]
         self.client = pymongo.MongoClient(real_uri,
                                           retryWrites=self.retry_writes,
                                           readPreference=self.read_preference,
                                           readConcernLevel=self.read_concern)
 
         self.result_doc['before']=self.get_server_status()
+        ssURI="mongodb://"+userpassword+self.result_doc['before']['host']+"/test?ssl=true&authSource=admin"
+        logging.debug("%s %s %s", userpassword, self.result_doc['before']['host'], ssURI)
+        self.sshost = pymongo.MongoClient(ssURI)
 
         # set default writeConcern on the database
         self.database = self.client.get_database(name=str(config['name']), write_concern=self.write_concern)
@@ -402,10 +408,11 @@ def loadTuples(self, tableName, tuples):
         else:
             if tableName == constants.TABLENAME_ITEM:
                 tuples3 = []
-                if self.shards > 1:
-                    ww = range(1,self.warehouses+1)
+                if self.shards > 0:
+                    ww = range(1,self.warehouses+1, int(self.warehouses/self.shards))
                 else:
                     ww = [0]
+                # print self.shards, self.warehouses, ww
                 for t in tuples:
                     for w in ww:
                        t2 = list(t)
@@ -415,18 +422,23 @@ def loadTuples(self, tableName, tuples):
             for t in tuples:
                 tuple_dicts.append(dict([(columns[i], t[i]) for i in num_columns]))
             ## FOR
-            self.database[tableName].insert_many(tuple_dicts)
+
+            self.database[tableName].insert_many(tuple_dicts, ordered=False)
         ## IF
 
         return
 
     def loadFinishDistrict(self, w_id, d_id):
+        logging.debug("LoadFinishDistrict")
         if self.denormalize:
             logging.debug("Pushing %d denormalized ORDERS records for WAREHOUSE %d DISTRICT %d into MongoDB", len(self.w_orders), w_id, d_id)
-            self.database[constants.TABLENAME_ORDERS].insert_many(self.w_orders.values())
+            self.database[constants.TABLENAME_ORDERS].insert_many(self.w_orders.values(), ordered=False)
             self.w_orders.clear()
         ## IF
 
+    def loadFinish(self):
+        logging.debug("load finish: ")
+
     def executeStart(self):
         """Optional callback before the execution for each client starts"""
         return None
@@ -614,8 +626,10 @@ def _doNewOrderTxn(self, s, params):
         d_next_o_id = d["D_NEXT_O_ID"]
 
         # fetch matching items and see if they are all valid
-        if self.shards > 1: i_w_id = w_id
+        if self.shards > 0: i_w_id = w_id-(w_id-1)%(self.warehouses/self.shards) # get_i_w(w_id)
         else: i_w_id = 0
+        if self.no_global_items:
+            i_w_id = 1
         items = list(self.item.find({"I_ID": {"$in": i_ids}, "I_W_ID": i_w_id, "$comment": comment},
                                     {"_id":0, "I_ID": 1, "I_PRICE": 1, "I_NAME": 1, "I_DATA": 1},
                                     session=s))
@@ -628,8 +642,7 @@ def _doNewOrderTxn(self, s, params):
             #print constants.INVALID_ITEM_MESSAGE + ", Aborting transaction (ok for 1%)"
             return None
         ## IF
-        xxi_ids = tuple(map(lambda o: o['I_ID'], items))
-        items = sorted(items, key=lambda x: xxi_ids.index(x['I_ID']))
+        items = sorted(items, key=lambda x: i_ids.index(x['I_ID']))
 
         # getWarehouseTaxRate
         w = self.warehouse.find_one({"W_ID": w_id, "$comment": comment}, {"_id":0, "W_TAX": 1}, session=s)
@@ -684,8 +697,7 @@ def _doNewOrderTxn(self, s, params):
                                               session=s))
         ## IF
         assert len(all_stocks) == ol_cnt, "all_stocks len %d != ol_cnt %d" % (len(all_stocks), ol_cnt)
-        xxxi_ids = tuple(map(lambda o: (o['S_I_ID'], o['S_W_ID']), all_stocks))
-        all_stocks = sorted(all_stocks, key=lambda x: xxxi_ids.index((x['S_I_ID'], x["S_W_ID"])))
+        all_stocks = sorted(all_stocks, key=lambda x: item_w_list.index((x['S_I_ID'], x["S_W_ID"])))
 
         ## ----------------
         ## Insert Order Line, Stock Item Information
@@ -784,7 +796,7 @@ def _doNewOrderTxn(self, s, params):
 
         if self.batch_writes:
             if not self.denormalize:
-                self.order_line.insert_many(order_line_writes, session=s)
+                self.order_line.insert_many(order_line_writes, ordered=False, session=s)
             self.stock.bulk_write(stock_writes, session=s)
         ## IF
 
@@ -936,7 +948,7 @@ def _doPaymentTxn(self, s, params):
                                       session=s)
         ## IF
 
-        search_fields = {"C_W_ID": w_id, "C_D_ID": d_id, "$comment": comment}
+        search_fields = {"C_W_ID": c_w_id, "C_D_ID": c_d_id, "$comment": comment}
         return_fields = {"C_BALANCE": 0, "C_YTD_PAYMENT": 0, "C_PAYMENT_CNT": 0}
 
         if c_id != None:
@@ -1084,9 +1096,9 @@ def _doStockLevelTxn(self, s, params):
             ol_ids.add(ol["OL_I_ID"])
         ## FOR
 
-        result = self.stock.count_documents({"S_W_ID": w_id,
+        result = self.stock.find({"S_W_ID": w_id,
                                   "S_I_ID": {"$in": list(ol_ids)},
-                                  "S_QUANTITY": {"$lt": threshold}, "$comment": comment})
+                                  "S_QUANTITY": {"$lt": threshold}, "$comment": comment}).count()
 
         return int(result)
 
@@ -1115,7 +1127,7 @@ def run_transaction(self, txn_callback, session, name, params):
     # Should we retry txns within the same session or start a new one?
     def run_transaction_with_retries(self, txn_callback, name, params):
         txn_retry_counter = 0
-        to = TransactionOptions(
+        to = pymongo.client_session.TransactionOptions(
             read_concern=None,
             #read_concern=pymongo.read_concern.ReadConcern("snapshot"),
             write_concern=self.write_concern,
@@ -1137,8 +1149,12 @@ def run_transaction_with_retries(self, txn_callback, name, params):
                 sleep(txn_retry_counter * .1)
                 logging.debug("txn retry number for %s: %d", name, txn_retry_counter)
             ## WHILE
-    def get_server_status(self):
-        ss=self.client.admin.command('serverStatus')
+
+    def get_server_status(self, otherClient=None):
+        if otherClient and self.sshost:
+           ss=self.sshost.admin.command('serverStatus')
+        else:
+           ss=self.client.admin.command('serverStatus')
         if "$configServerState" in ss:
            del ss["$configServerState"]
         if "$gleStats" in ss:
@@ -1157,8 +1173,12 @@ def get_server_status(self):
 
     def save_result(self, result_doc):
         self.result_doc.update(result_doc)
-        self.result_doc['after']=self.get_server_status()
-        # saving test results and server statuses ('before' and 'after') into MongoDB as a single document
-        self.client.test.results.insert_one(self.result_doc)
+        self.result_doc['after']=self.get_server_status(self.sshost)
+        # save cache size, instance type, version
+        self.result_doc['version']=self.result_doc['after']['version'][0:3]
+# {$trunc:{$divide:["$before.wiredTiger.cache.maximum bytes configured",1024*1024*1024]}},72]}}, {$set:{cacheGB:NumberLong(72)
+        #self.result_doc['cacheGB']=int(self.result_doc['after']['wiredTiger']['cache']['maximum bytes configured']/1073741824)
+        #self.result_doc['instance']={18:"M50",36:"M60",72:"M80"}.get(self.result_doc['cacheGB'], 'unknown')
+        self.client.test.results.save(self.result_doc)
 
 ## CLASS
diff --git a/pytpcc/runtime/executor.py b/pytpcc/runtime/executor.py
@@ -44,10 +44,11 @@
 
 class Executor:
 
-    def __init__(self, driver, scaleParameters, stop_on_error = False):
+    def __init__(self, driver, scaleParameters, stop_on_error = False, sameWH = 85):
         self.driver = driver
         self.scaleParameters = scaleParameters
         self.stop_on_error = stop_on_error
+        self.same_wh = sameWH
     ## DEF
 
     def execute(self, duration):
@@ -76,8 +77,7 @@ def execute(self, duration):
                 batch_result.abortTransaction(batch_txn_id)
                 if self.stop_on_error: raise
                 continue
-
-            # This will happen on all failing 1% of the transactions
+
             if val is None:
                 global_result.abortTransaction(global_txn_id, retries)
                 batch_result.abortTransaction(batch_txn_id, retries)
@@ -86,7 +86,7 @@ def execute(self, duration):
             batch_result.stopTransaction(batch_txn_id, retries)
             global_result.stopTransaction(global_txn_id, retries)
 
-            if time.time() - start_batch > 900: # every 15 minutes
+            if time.time() - start_batch > 1800: # every 30 minutes
                 batch_result.stopBenchmark()
                 logging.info(batch_result.show())
                 batch_result = results.Results()
@@ -221,7 +221,7 @@ def generatePaymentParams(self):
         h_date = datetime.now()
 
         ## 85%: paying through own warehouse (or there is only 1 warehouse)
-        if self.scaleParameters.warehouses == 1 or x <= 85:
+        if self.scaleParameters.warehouses == 1 or x <= self.same_wh:
             c_w_id = w_id
             c_d_id = d_id
         ## 15%: paying through another warehouse:

diff --git a/pytpcc/tpcc.py b/pytpcc/tpcc.py
@@ -36,7 +36,8 @@
 import time
 import multiprocessing
 import subprocess
-from configparser import ConfigParser
+import random
+from ConfigParser import SafeConfigParser
 from pprint import pprint, pformat
 
 from util import results, scaleparameters
@@ -99,7 +100,10 @@ def getDrivers():
 ## DEF
 
 ## ==============================================
-## startLoading
+## startLoading. 
+# This intentionally uses multiprocess pool and intentionally stats new processes for each batch
+# becuase for long running, many hour long loads, the connection between the child process and the parent process is lost  
+# and the parent block indefinitelly waiting for the result.
 ## ==============================================
 def startLoading(driverClass, scaleParameters, args, config):
     """
@@ -199,10 +203,7 @@ def startExecution(driverClass, scaleParameters, args, config):
     logging.debug("Creating client pool with %d processes", args['clients'])
     pool = multiprocessing.Pool(args['clients'])
     debug = logging.getLogger().isEnabledFor(logging.DEBUG)
-    try:
-        del args['config']
-    except KeyError:
-        print()
+
     worker_results = []
     for _ in range(args['clients']):
         r = pool.apply_async(executorFunc, (driverClass, scaleParameters, args, config, debug,))
@@ -236,7 +237,7 @@ def executorFunc(driverClass, scaleParameters, args, config, debug):
     config['reset'] = False
     driver.loadConfig(config)
 
-    e = executor.Executor(driver, scaleParameters, stop_on_error=args['stop_on_error'])
+    e = executor.Executor(driver, scaleParameters, stop_on_error=args['stop_on_error'], sameWH=args['samewh'])
     driver.executeStart()
     results = e.execute(args['duration'])
     driver.executeFinish()
@@ -257,6 +258,8 @@ def executorFunc(driverClass, scaleParameters, args, config, debug):
                          help='Instruct the driver to reset the contents of the database')
     aparser.add_argument('--scalefactor', default=1, type=float, metavar='SF',
                          help='Benchmark scale factor')
+    aparser.add_argument('--samewh', default=85, type=float, metavar='PP',
+                         help='Percent paying same warehouse')
     aparser.add_argument('--warehouses', default=4, type=int, metavar='W',
                          help='Number of Warehouses')
     aparser.add_argument('--duration', default=60, type=int, metavar='D',
@@ -295,7 +298,7 @@ def executorFunc(driverClass, scaleParameters, args, config, debug):
     ## Load Configuration file
     if args['config']:
         logging.debug("Loading configuration file '%s'", args['config'])
-        cparser = ConfigParser()
+        cparser = SafeConfigParser()
         cparser.read(os.path.realpath(args['config'].name))
         config = dict(cparser.items(args['system']))
     else:
@@ -342,7 +345,7 @@ def executorFunc(driverClass, scaleParameters, args, config, debug):
     if not args['no_execute']:
         noftifyDsiOfPhaseStart("TPC-C_workload")
         if args['clients'] == 1:
-            e = executor.Executor(driver, scaleParameters, stop_on_error=args['stop_on_error'])
+            e = executor.Executor(driver, scaleParameters, stop_on_error=args['stop_on_error'], sameWH=args['samewh'])
             driver.executeStart()
             results = e.execute(args['duration'])
             driver.executeFinish()
@@ -351,7 +354,7 @@ def executorFunc(driverClass, scaleParameters, args, config, debug):
         assert results, "No results from execution for %d client!" % args['clients']
         logging.info("Final Results")
         logging.info("Threads: %d", args['clients'])
-        logging.info(results.show(load_time, driver, args['clients']))
+        logging.info(results.show(load_time, driver, args['clients'], args['samewh']))
         noftifyDsiOfPhaseEnd("TPC-C_workload")
     ## IF