@@ -10640,8 +10640,10 @@ class CInitGroups
1064010640 // see if identical
1064110641 const char *oldKind = oldClusterGroup->queryProp("@kind");
1064210642 const char *oldDir = oldClusterGroup->queryProp("@dir");
10643+ bool oldHasProtected = oldClusterGroup->hasProp("@protected");
1064310644 const char *newKind = newClusterGroup->queryProp("@kind");
1064410645 const char *newDir = newClusterGroup->queryProp("@dir");
10646+ bool newHasProtected = newClusterGroup->hasProp("@protected");
1064510647 if (oldKind)
1064610648 {
1064710649 if (newKind)
@@ -10666,7 +10668,15 @@ class CInitGroups
1066610668 }
1066710669 else if (NULL!=newDir)
1066810670 return false;
10669-
10671+ if (oldHasProtected != newHasProtected)
10672+ return false;
10673+ else if (oldHasProtected && newHasProtected)
10674+ {
10675+ bool oldProtected = oldClusterGroup->getPropBool("@protected");
10676+ bool newProtected = newClusterGroup->getPropBool("@protected");
10677+ if (oldProtected != newProtected)
10678+ return false;
10679+ }
1067010680 unsigned oldGroupCount = oldClusterGroup->getCount("Node");
1067110681 unsigned newGroupCount = newClusterGroup->getCount("Node");
1067210682 if (oldGroupCount != newGroupCount)
@@ -10912,6 +10922,7 @@ class CInitGroups
1091210922
1091310923 bool constructGroup(const IPropertyTree &cluster, const char *altName, IPropertyTree *oldEnvCluster, GroupType groupType, bool force, StringBuffer &messages)
1091410924 {
10925+ dbgassertex(!isContainerized());
1091510926 /* a 'realCluster' is a cluster who's name matches it's nodeGroup
1091610927 * if the nodeGroup differs it implies it's sharing the nodeGroup with other thor instance(s).
1091710928 */
@@ -10955,6 +10966,10 @@ class CInitGroups
1095510966 IPropertyTree *existingClusterGroup = queryExistingGroup(gname);
1095610967 bool matchOldEnv = false;
1095710968 Owned<IPropertyTree> newClusterGroup = createClusterGroupFromEnvCluster(groupType, cluster, defDir, realCluster, true);
10969+ // All BM groups are protected by default, except dropzones.
10970+ // Meaning, they will not automatically be updated if the Environment cluster definition changes.
10971+ if (newClusterGroup && (grp_dropzone != groupType))
10972+ newClusterGroup->setPropBool("@protected", true);
1095810973 bool matchExisting = !force && clusterGroupCompare(newClusterGroup, existingClusterGroup);
1095910974 if (oldEnvCluster)
1096010975 {
@@ -10993,6 +11008,10 @@ class CInitGroups
1099311008 VStringBuffer msg("New cluster layout for cluster %s", gname.str());
1099411009 UWARNLOG("%s", msg.str());
1099511010 messages.append(msg).newline();
11011+ // All BM groups are protected by default, except dropzones.
11012+ // Meaning, they will not automatically be updated if the Environment cluster definition changes.
11013+ if (grp_dropzone != groupType)
11014+ newClusterGroup->setPropBool("@protected", true);
1099611015 addClusterGroup(gname.str(), newClusterGroup.getClear(), realCluster);
1099711016 return true;
1099811017 }
@@ -11019,6 +11038,7 @@ class CInitGroups
1101911038 if (ins>1)
1102011039 gname.append('_').append(ins);
1102111040 Owned<IPropertyTree> clusterGroup = createClusterGroup(grp_hthor, { na }, nullptr, &cluster, true, false);
11041+ clusterGroup->setPropBool("@protected", true);
1102211042 addClusterGroup(gname.str(), clusterGroup.getClear(), true);
1102311043 }
1102411044 }
@@ -11094,6 +11114,7 @@ class CInitGroups
1109411114 }
1109511115 bool resetClusterGroup(const char *clusterName, const char *type, bool spares, StringBuffer &messages)
1109611116 {
11117+ dbgassertex(!isContainerized());
1109711118 Owned<IRemoteConnection> conn = querySDS().connect("/Environment", myProcessSession(), RTM_LOCK_READ, SDS_CONNECT_TIMEOUT);
1109811119 if (!conn)
1109911120 return false;
@@ -11209,20 +11230,61 @@ class CInitGroups
1120911230 }
1121011231 return true;
1121111232 }
11212- void clearLZGroups ()
11233+ void clearUnprotectedGroups ()
1121311234 {
1121411235 if (!writeLock)
11215- throw makeStringException(0, "CInitGroups::clearLZGroups called in read-only mode");
11216- IPropertyTree *root = groupsconnlock.conn->queryRoot();
11217- std::vector<IPropertyTree *> toDelete;
11218- Owned<IPropertyTreeIterator> groups = root->getElements("Group[@kind='dropzone']");
11236+ throw makeStringException(0, "CInitGroups::clearUnprotectedGroups called in read-only mode");
11237+
11238+ Owned<IPropertyTree> globalConfig = getGlobalConfig();
11239+ IPropertyTree * storage = globalConfig->queryPropTree("storage");
11240+ if (!storage)
11241+ return;
11242+ std::vector<IPropertyTree *> toRemove;
11243+ IPropertyTree *groupsRoot = groupsconnlock.conn->queryRoot();
11244+ Owned<IPropertyTreeIterator> groups = groupsRoot->getElements("Group");
11245+ bool firstBareMetalProtectedRun = true;
11246+ if (isContainerized())
11247+ firstBareMetalProtectedRun = false;
11248+ else
11249+ {
11250+ // check if any protected groups. Unless this is the 1st BM run since this feature was added,
11251+ // there will be >0 (one for each cluster group in the environment from previous runs).
11252+ ForEach(*groups)
11253+ {
11254+ IPropertyTree &group = groups->query();
11255+ if (group.hasProp("@protected"))
11256+ {
11257+ firstBareMetalProtectedRun = false;
11258+ break;
11259+ }
11260+ }
11261+ }
1121911262 ForEach(*groups)
11220- toDelete.push_back(&groups->query());
11221- for (auto &group: toDelete)
11222- root->removeTree(group);
11263+ {
11264+ IPropertyTree &group = groups->query();
11265+ bool doDelete = false;
11266+ if (firstBareMetalProtectedRun)
11267+ {
11268+ doDelete = strsame("dropzone", group.queryProp("@kind"));
11269+ if (!doDelete)
11270+ {
11271+ // Protect all non dropzone BM groups created on 1st run
11272+ // This preserves legacy semantics, where Dali groups will not automatically be
11273+ // overwritten by a change in the topology in the environment.
11274+ group.setPropBool("@protected", true);
11275+ }
11276+ }
11277+ else
11278+ doDelete = !group.getPropBool("@protected");
11279+ if (doDelete)
11280+ toRemove.push_back(&group);
11281+ }
11282+ for (auto &group: toRemove)
11283+ groupsRoot->removeTree(group);
1122311284 }
1122411285 void constructGroups(bool force, StringBuffer &messages, IPropertyTree *oldEnvironment)
1122511286 {
11287+ dbgassertex(!isContainerized());
1122611288 Owned<IRemoteConnection> conn = querySDS().connect("/Environment/Software", myProcessSession(), RTM_LOCK_READ, SDS_CONNECT_TIMEOUT);
1122711289 if (!conn)
1122811290 return;
@@ -11301,11 +11363,13 @@ class CInitGroups
1130111363 return createClusterGroup(grp_unknown, hosts, path, nullptr, false, false);
1130211364 }
1130311365
11304- void ensureConsistentStorageGroup(bool force, const char * name, IPropertyTree * newClusterGroup, StringBuffer & messages)
11366+ void ensureConsistentStorageGroup(const char * name, IPropertyTree * newClusterGroup, StringBuffer & messages)
1130511367 {
1130611368 IPropertyTree *existingClusterGroup = queryExistingGroup(name);
1130711369 bool matchExisting = clusterGroupCompare(newClusterGroup, existingClusterGroup);
11308- if (!existingClusterGroup || !matchExisting)
11370+ bool oldProtected = existingClusterGroup ? existingClusterGroup->getPropBool("@protected") : false;
11371+ bool newProtected = newClusterGroup->getPropBool("@protected");
11372+ if (!existingClusterGroup || !matchExisting || (oldProtected != newProtected))
1130911373 {
1131011374 if (!existingClusterGroup)
1131111375 {
@@ -11314,7 +11378,7 @@ class CInitGroups
1131411378 messages.append(msg).newline();
1131511379 addClusterGroup(name, LINK(newClusterGroup), false);
1131611380 }
11317- else if (force)
11381+ else if (!oldProtected || !newProtected) // i.e. allow overwrite if either old wasn't protected, or if was, but new isn't
1131811382 {
1131911383 VStringBuffer msg("Forcing new group layout for storageplane %s", name);
1132011384 UWARNLOG("%s", msg.str());
@@ -11330,17 +11394,7 @@ class CInitGroups
1133011394 }
1133111395 }
1133211396
11333- void ensureStorageGroup(bool force, const char * name, unsigned numDevices, const char * path, StringBuffer & messages)
11334- {
11335- //Lower case the group name - see CNamedGroupStore::dolookup which lower cases before resolving.
11336- StringBuffer gname;
11337- gname.append(name).toLowerCase();
11338-
11339- Owned<IPropertyTree> newClusterGroup = createStorageGroup(gname, numDevices, path);
11340- ensureConsistentStorageGroup(force, gname, newClusterGroup, messages);
11341- }
11342-
11343- void constructStorageGroups(bool force, StringBuffer &messages)
11397+ void constructStorageGroups(StringBuffer &messages)
1134411398 {
1134511399 Owned<IPropertyTree> globalConfig = getGlobalConfig();
1134611400 IPropertyTree * storage = globalConfig->queryPropTree("storage");
@@ -11394,7 +11448,29 @@ class CInitGroups
1139411448 unsigned numDevices = plane.getPropInt("@numDevices", 1);
1139511449 newClusterGroup.setown(createStorageGroup(gname, numDevices, prefix));
1139611450 }
11397- ensureConsistentStorageGroup(force, gname, newClusterGroup, messages);
11451+ // Storage planes are the single source of truth for storage layout.
11452+ // Dali groups are created to reflect storage plane definitions.
11453+ // If a plane definition changes, the corresponding Dali group is overwritten
11454+ // (with a warning issued).
11455+ //
11456+ // Setting @protectGroup=true on an individual plane prevents a changed plane
11457+ // definition from overwriting the existing Dali group layout.
11458+ //
11459+ // Note: For bare-metal Environment-based groups, the existing group is protected
11460+ // by default. In containerized environments, the plane definition always takes
11461+ // precedence unless it is explicitly protected.
11462+ //
11463+ // The @protectGroup option should only be used if existing logical files reference
11464+ // an old plane layout and there is a need to prevent them from pointing to a new group
11465+ // layout (which could make physical file parts inaccessible). However, such plane
11466+ // topology changes should be avoided. Instead, a new plane should be defined, with the
11467+ // existing plane definition being left untouched, so that existing files that reference
11468+ // it are unaffected.
11469+ if (plane.getPropBool("@protectGroup"))
11470+ newClusterGroup->setPropBool("@protected", true);
11471+ else
11472+ newClusterGroup->removeProp("@protected");
11473+ ensureConsistentStorageGroup(gname, newClusterGroup, messages);
1139811474 }
1139911475 }
1140011476 }
@@ -11411,31 +11487,29 @@ class CInitGroups
1141111487 }
1141211488};
1141311489
11414- void initClusterGroups(bool force, StringBuffer &response, IPropertyTree *oldEnvironment, unsigned timems)
11490+ void initClusterAndStoragePlaneGroups( StringBuffer &response, bool force , IPropertyTree *oldEnvironment, unsigned timems)
1141511491{
1141611492 CInitGroups init(timems, true);
11417- init.clearLZGroups(); // clear existing LZ groups, current ones will be recreated
11418- init.constructGroups(force, response, oldEnvironment);
11419- }
11420-
11421- void initClusterAndStoragePlaneGroups(bool force, IPropertyTree *oldEnvironment, unsigned timems)
11422- {
11423- CInitGroups init(timems, true);
11424- init.clearLZGroups(); // clear existing LZ groups, current ones will be recreated
11425-
11426- StringBuffer response;
11427- init.constructGroups(force, response, oldEnvironment);
11428- if (response.length())
11429- MLOG("DFS group initialization : %s", response.str()); // should this be a syslog?
11430-
11431- response.clear();
11432- init.constructStorageGroups(false, response);
11433- if (response.length())
11434- MLOG("StoragePlane group initialization : %s", response.str()); // should this be a syslog?
11493+ // clearUnprotectedGroups clears all unprotected groups - BM hthor, thor, roxie Environment groups are
11494+ // protected to maintain existing semantics, and to protect logical files which reference them.
11495+ init.clearUnprotectedGroups();
11496+ if (!isContainerized())
11497+ {
11498+ // Create groups based on the Environment
11499+ // Detects mismatches between existing Dali groups and new Environment definitions
11500+ // and avoids replacing them unless forced. This is to avoid situations where existing
11501+ // logical files reference existing groups, changing their definition may render the
11502+ // logical file parts inaccessible.
11503+ // NB: these groups derived from Environment, are tagged with @kind thor, roxie, hthor, dropzone etc.
11504+ init.constructGroups(force, response, oldEnvironment);
11505+ }
11506+ // Create storage plane groups based on the global config storage/planes definitions
11507+ init.constructStorageGroups(response);
1143511508}
1143611509
1143711510bool resetClusterGroup(const char *clusterName, const char *type, bool spares, StringBuffer &response, unsigned timems)
1143811511{
11512+ dbgassertex(!isContainerized());
1143911513 CInitGroups init(timems, true);
1144011514 return init.resetClusterGroup(clusterName, type, spares, response);
1144111515}
0 commit comments