|
| 1 | +#!/usr/bin/env python3 |
| 2 | +import hashlib |
| 3 | + |
| 4 | +import bson |
| 5 | + |
| 6 | +import pymongo |
| 7 | +from pymongo import ASCENDING, MongoClient |
| 8 | +from pymongo.collection import Collection |
| 9 | + |
| 10 | +SRC_URI = "mongodb://adm:pass@rs00:30000" |
| 11 | +TGT_URI = "mongodb://adm:pass@rs10:30100" |
| 12 | + |
| 13 | +SRC_SH_URI = "mongodb://adm:pass@src-mongos:27017" |
| 14 | +TGT_SH_URI = "mongodb://adm:pass@tgt-mongos:29017" |
| 15 | + |
| 16 | +src = pymongo.MongoClient(SRC_SH_URI) |
| 17 | +tgt = pymongo.MongoClient(TGT_SH_URI) |
| 18 | + |
| 19 | + |
| 20 | +def list_databases(client: MongoClient): |
| 21 | + """List all databases in the given MongoClient.""" |
| 22 | + for name in client.list_database_names(): |
| 23 | + if name not in ("admin", "config", "local", "percona_mongolink"): |
| 24 | + yield name |
| 25 | + |
| 26 | + |
| 27 | +def list_collections(client: MongoClient, db: str): |
| 28 | + """List all namespaces in the given database.""" |
| 29 | + for name in client[db].list_collection_names(): |
| 30 | + if not name.startswith("system."): |
| 31 | + yield name |
| 32 | + |
| 33 | + |
| 34 | +def list_all_namespaces(client: MongoClient): |
| 35 | + """Return all namespaces in the target MongoDB.""" |
| 36 | + for db in list_databases(client): |
| 37 | + for coll in list_collections(client, db): |
| 38 | + yield f"{db}.{coll}" |
| 39 | + |
| 40 | + |
| 41 | +def _coll_content(coll: Collection, sort=None): |
| 42 | + """Get the content and hash of the given collection.""" |
| 43 | + if not sort: |
| 44 | + sort = [("_id", ASCENDING)] |
| 45 | + |
| 46 | + count, md5 = 0, hashlib.md5() |
| 47 | + for data in coll.find_raw_batches(sort=sort): |
| 48 | + md5.update(data) |
| 49 | + count += len(bson.decode_all(data)) |
| 50 | + return count, md5.hexdigest() |
| 51 | + |
| 52 | + |
| 53 | +def compare_namespace(source: MongoClient, target: MongoClient, db: str, coll: str, sort=None): |
| 54 | + """Compare the given namespace between source and target MongoDB.""" |
| 55 | + ns = f"{db}.{coll}" |
| 56 | + |
| 57 | + source_options = source[db][coll].options() |
| 58 | + target_options = target[db][coll].options() |
| 59 | + assert source_options == target_options, f"{ns}: {source_options=} != {target_options=}" |
| 60 | + |
| 61 | + if "viewOn" not in source_options: |
| 62 | + source_indexes = source[db][coll].index_information() |
| 63 | + target_indexes = target[db][coll].index_information() |
| 64 | + assert source_indexes == target_indexes, f"{ns}: {source_indexes=} != {target_indexes=}" |
| 65 | + |
| 66 | + source_count, source_hash = _coll_content(source[db][coll], sort) |
| 67 | + target_count, target_hash = _coll_content(target[db][coll], sort) |
| 68 | + assert source_count == target_count, f"{ns}: {source_count=} != {target_count=}" |
| 69 | + assert source_hash == target_hash, f"{ns}: {source_hash=} != {target_hash=}" |
| 70 | + |
| 71 | + |
| 72 | +# Main =========================================================================== |
| 73 | + |
| 74 | +sort = None |
| 75 | + |
| 76 | +source_dbs = set(list_databases(src)) |
| 77 | +target_dbs = set(list_databases(tgt)) |
| 78 | +assert source_dbs == target_dbs, f"{source_dbs} != {target_dbs}" |
| 79 | + |
| 80 | +for db in source_dbs: |
| 81 | + print(f"Comparing {db}...") |
| 82 | + source_colls = set(list_collections(src, db)) |
| 83 | + target_colls = set(list_collections(tgt, db)) |
| 84 | + print(f" {db}: {source_colls} vs {target_colls}") |
| 85 | + assert source_colls == target_colls, f"{db} :: {source_colls} != {target_colls}" |
| 86 | + |
| 87 | + for coll in source_colls: |
| 88 | + print(f" Comparing {db}.{coll}...") |
| 89 | + compare_namespace(src, tgt, db, coll, sort) |
0 commit comments