Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
d65d4df
Add master cluster autoscale support
dwoz May 27, 2025
991d1b4
Fix critical bugs in cluster join-reply handler
dwoz Jan 1, 2026
15a8dec
Add signature verification and security hardening to cluster join pro…
dwoz Jan 1, 2026
29e8dbd
Use Salt's clean_join() for path traversal protection
dwoz Jan 1, 2026
1da7bd2
Add comprehensive integration tests for cluster autoscale
dwoz Jan 1, 2026
6c9d37d
Add comprehensive unit tests for cluster autoscale join handlers
dwoz Jan 1, 2026
0b9d3ff
Fix unit test failures in cluster autoscale tests
dwoz Jan 1, 2026
124a1f6
Add signature verification to JOIN-NOTIFY handler
dwoz Jan 1, 2026
7c2baba
Enable token validation in discover-reply handler
dwoz Jan 2, 2026
29d626a
Fix config key typo: clsuter_pub_signature → cluster_pub_signature
dwoz Jan 2, 2026
49f7cf2
Replace SHA-1 with SHA-256 for cluster_pub digest
dwoz Jan 2, 2026
dcfee4b
Add cluster_pub_signature_required config option (secure by default)
dwoz Jan 2, 2026
a97b6e5
Add tests for cluster_pub_signature_required (secure by default)
dwoz Jan 2, 2026
bd2286a
Fix master startup crash when master_pub not yet in cache
dwoz Jan 2, 2026
eeb61e5
Fix AttributeError: remove broken __get_keys() call in MasterKeys.__i…
dwoz Jan 2, 2026
6624395
Fix test: check peer key instead of cluster.pem for join rejection
dwoz Jan 2, 2026
4d00259
Fix test_autoscale_rejects_join_with_wrong_cluster_pub_signature
dwoz Jan 2, 2026
b0782fb
Fix linting errors in autoscale code
dwoz Jan 2, 2026
5c51dfe
Replace broad exception handlers with specific exception types
dwoz Jan 2, 2026
632d1ab
Add changelog entry for cluster autoscale feature
dwoz Jan 3, 2026
ff1bcec
Fix isort and black formatting in test files
dwoz Jan 3, 2026
1532320
Fix black formatting in source files
dwoz Jan 3, 2026
b87c558
Fix black formatting in test_autoscale_functional.py
dwoz Jan 3, 2026
d3ec03f
Fix black formatting in earlier autoscale commits
dwoz Jan 3, 2026
424df4c
Fix PublicKey class to accept key_bytes instead of path
dwoz Jan 3, 2026
b7299e6
Fix integration tests to read log files instead of calling non-existe…
dwoz Jan 4, 2026
838cf14
Run black formatter on test_autoscale_security.py
dwoz Jan 4, 2026
2b1b6dc
Fix pylint encoding warnings in _get_log_contents()
dwoz Jan 4, 2026
34e9e68
Fix TypeError in cluster autoscale: use BaseKey.from_file() instead o…
dwoz Jan 4, 2026
ab63897
Fix TypeError: use PublicKey.from_file() instead of BaseKey.from_file()
dwoz Jan 5, 2026
84923c4
Fix handle_pool_publish() signature: remove extra parameter
dwoz Jan 6, 2026
22814fa
Add exception handling for InvalidKeyError in cluster discover/join h…
dwoz Jan 6, 2026
ea760c9
Run black formatter on server.py
dwoz Jan 9, 2026
a68c409
Fix event routing and KeyError issues in cluster autoscale
dwoz Jan 11, 2026
882d5ca
Add automatic peer discovery mechanism (partial implementation)
dwoz Jan 14, 2026
ac23230
Fix pre-commit
dwoz Apr 2, 2026
bdf4a23
Fix Master Cluster autoscale protocol and transport initialization
dwoz Apr 3, 2026
e830aac
Fix Master Cluster autoscale transport initialization and key retrieval
dwoz Apr 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/68576.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add cluster autoscale support with comprehensive security hardening including signature verification, token validation, path traversal protection, and secure-by-default configuration.
914 changes: 897 additions & 17 deletions salt/channel/server.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion salt/cli/daemons.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def verify_environment(self):
if (
self.config["cluster_id"]
and self.config["cluster_pki_dir"]
and self.config["cluster_pki_dir"] != self.config["pki_dir"]
# and self.config["cluster_pki_dir"] != self.config["pki_dir"]
):
v_dirs.extend(
[
Expand Down
2 changes: 1 addition & 1 deletion salt/client/ssh/ssh_py_shim.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class OptionsContainer:
# The below line is where OPTIONS can be redefined with internal options
# (rather than cli arguments) when the shim is bundled by
# client.ssh.Single._cmd_str()
#%%OPTS
# %%OPTS


def get_system_encoding():
Expand Down
8 changes: 7 additions & 1 deletion salt/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,10 @@ def _gather_buffer_space():
"cluster_pki_dir": str,
# The port required to be open for a master cluster to properly function
"cluster_pool_port": int,
# SHA-256 hash of the cluster public key for verification during autoscale join
"cluster_pub_signature": str,
# Require cluster_pub_signature to be configured for autoscale joins (recommended)
"cluster_pub_signature_required": bool,
# Use a module function to determine the unique identifier. If this is
# set and 'id' is not set, it will allow invocation of a module function
# to determine the value of 'id'. For simple invocations without function
Expand Down Expand Up @@ -1723,6 +1727,8 @@ def _gather_buffer_space():
"cluster_peers": [],
"cluster_pki_dir": None,
"cluster_pool_port": 4520,
"cluster_pub_signature": None,
"cluster_pub_signature_required": True,
"features": {},
"publish_signing_algorithm": "PKCS1v15-SHA1",
"keys.cache_driver": "localfs_key",
Expand Down Expand Up @@ -4186,7 +4192,7 @@ def apply_master_config(overrides=None, defaults=None):
if "cluster_id" not in opts:
opts["cluster_id"] = None
if opts["cluster_id"] is not None:
if not opts.get("cluster_peers", None):
if not opts.get("cluster_peers", None) and not opts.get("cluster_secret", None):
log.warning("Cluster id defined without defining cluster peers")
opts["cluster_peers"] = []
if not opts.get("cluster_pki_dir", None):
Expand Down
149 changes: 148 additions & 1 deletion salt/crypt.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import base64
import binascii
import copy
import getpass
import hashlib
import hmac
import logging
Expand Down Expand Up @@ -145,6 +146,57 @@ def dropfile(cachedir, user=None, master_id=""):
os.rename(dfn_next, dfn)


def _write_private(keydir, keyname, key, passphrase=None):
base = os.path.join(keydir, keyname)
priv = f"{base}.pem"
# Do not try writing anything, if directory has no permissions.
if not os.access(keydir, os.W_OK):
raise OSError(
'Write access denied to "{}" for user "{}".'.format(
os.path.abspath(keydir), getpass.getuser()
)
)
if pathlib.Path(priv).exists():
# XXX
# raise RuntimeError()
log.error("Key should not exist")
with salt.utils.files.set_umask(0o277):
with salt.utils.files.fopen(priv, "wb+") as f:
if passphrase:
enc = serialization.BestAvailableEncryption(passphrase.encode())
_format = serialization.PrivateFormat.TraditionalOpenSSL
if fips_enabled():
_format = serialization.PrivateFormat.PKCS8
else:
enc = serialization.NoEncryption()
_format = serialization.PrivateFormat.TraditionalOpenSSL
pem = key.private_bytes(
encoding=serialization.Encoding.PEM,
format=_format,
encryption_algorithm=enc,
)
f.write(pem)


def _write_public(keydir, keyname, key):
base = os.path.join(keydir, keyname)
pub = f"{base}.pub"
# Do not try writing anything, if directory has no permissions.
if not os.access(keydir, os.W_OK):
raise OSError(
'Write access denied to "{}" for user "{}".'.format(
os.path.abspath(keydir), getpass.getuser()
)
)
pubkey = key.public_key()
with salt.utils.files.fopen(pub, "wb+") as f:
pem = pubkey.public_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PublicFormat.SubjectPublicKeyInfo,
)
f.write(pem)


def gen_keys(keysize, passphrase=None, e=65537):
"""
Generate a RSA public keypair for use with salt
Expand Down Expand Up @@ -183,6 +235,47 @@ def gen_keys(keysize, passphrase=None, e=65537):
)


def write_keys(keydir, keyname, keysize, user=None, passphrase=None, e=65537):
"""
Generate and write a RSA public keypair for use with salt

:param str keydir: The directory to write the keypair to
:param str keyname: The type of salt server for whom this key should be written. (i.e. 'master' or 'minion')
:param int keysize: The number of bits in the key
:param str user: The user on the system who should own this keypair
:param str passphrase: The passphrase which should be used to encrypt the private key

:rtype: str
:return: Path on the filesystem to the RSA private key
"""
base = os.path.join(keydir, keyname)
priv = f"{base}.pem"
pub = f"{base}.pub"

gen = rsa.generate_private_key(e, keysize)

if os.path.isfile(priv):
# Between first checking and the generation another process has made
# a key! Use the winner's key
return priv

_write_private(keydir, keyname, gen, passphrase)
_write_public(keydir, keyname, gen)
os.chmod(priv, 0o400)
if user:
try:
import pwd

uid = pwd.getpwnam(user).pw_uid
os.chown(priv, uid, -1)
os.chown(pub, uid, -1)
except (KeyError, ImportError, OSError):
# The specified user was not found, allow the backup systems to
# report the error
pass
return priv


class BaseKey:

@classmethod
Expand Down Expand Up @@ -278,14 +371,29 @@ def decrypt(self, data, algorithm=OAEP_SHA1):
except cryptography.exceptions.UnsupportedAlgorithm:
raise UnsupportedAlgorithm(f"Unsupported algorithm: {algorithm}")

def write_private(self, keydir, name, passphrase=None):
_write_private(keydir, name, self.key, passphrase)

def write_public(self, keydir, name):
_write_public(keydir, name, self.key)

def public_key(self):
"""
proxy to PrivateKey.public_key()
"""
return self.key.public_key()


class PrivateKeyString(PrivateKey):
def __init__(self, data, password=None): # pylint: disable=super-init-not-called
self.key = serialization.load_pem_private_key(
data.encode(),
password=password,
)


class PublicKey(BaseKey):

def __init__(self, key_bytes):
log.debug("Loading public key")
try:
Expand All @@ -298,7 +406,10 @@ def __init__(self, key_bytes):
def encrypt(self, data, algorithm=OAEP_SHA1):
_padding = self.parse_padding_for_encryption(algorithm)
_hash = self.parse_hash(algorithm)
bdata = salt.utils.stringutils.to_bytes(data)
if type(data) == "bytes":
bdata = data
else:
bdata = salt.utils.stringutils.to_bytes(data)
try:
return self.key.encrypt(
bdata,
Expand Down Expand Up @@ -334,6 +445,14 @@ def decrypt(self, data):
return verifier.verify(data)


class PublicKeyString(PublicKey):
def __init__(self, data): # pylint: disable=super-init-not-called
try:
self.key = serialization.load_pem_public_key(data.encode())
except ValueError as exc:
raise InvalidKeyError("Invalid key")


@salt.utils.decorators.memoize
def get_rsa_key(path, passphrase):
"""
Expand Down Expand Up @@ -399,6 +518,27 @@ def __init__(self, opts, autocreate=True):
# master.pem/pub can be removed
self.master_id = self.opts["id"].removesuffix("_master")

self.cluster_pub_path = None
self.cluster_rsa_path = None
self.cluster_key = None
# XXX
if self.opts["cluster_id"]:
self.cluster_pub_path = os.path.join(
self.opts["cluster_pki_dir"], "cluster.pub"
)
self.cluster_rsa_path = os.path.join(
self.opts["cluster_pki_dir"], "cluster.pem"
)
if self.opts["cluster_pki_dir"] != self.opts["pki_dir"]:
self.cluster_shared_path = os.path.join(
self.opts["cluster_pki_dir"],
"peers",
f"{self.opts['id']}.pub",
)
# Note: cluster_key setup moved to _setup_keys() (line 614-620)
# to ensure it happens after master keys are initialized
self.pub_signature = None

# set names for the signing key-pairs
self.pubkey_signature = None
self.master_pubkey_signature = (
Expand Down Expand Up @@ -597,6 +737,13 @@ def check_master_shared_pub(self):
if not master_pub:
master_pub = self.cache.fetch("master_keys", "master.pub")

# If master_pub is still None, the keys haven't been set up yet
# This can happen if check_master_shared_pub() is called before _setup_keys()
# Just return early and let the later call (after key setup) handle it
if not master_pub:
log.debug("Master public key not yet available, skipping shared key check")
return

if shared_pub:
if shared_pub != master_pub:
message = (
Expand Down
21 changes: 14 additions & 7 deletions salt/ext/saslprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,14 @@ def saslprep(data):
if isinstance(data, str):
raise TypeError(
"The stringprep module is not available. Usernames and "
"passwords must be ASCII strings.")
"passwords must be ASCII strings."
)
return data

else:
HAVE_STRINGPREP = True
import unicodedata

# RFC4013 section 2.3 prohibited output.
_PROHIBITED = (
# A strict reading of RFC 4013 requires table c12 here, but
Expand All @@ -44,7 +47,8 @@ def saslprep(data):
stringprep.in_table_c6,
stringprep.in_table_c7,
stringprep.in_table_c8,
stringprep.in_table_c9)
stringprep.in_table_c9,
)

def saslprep(data, prohibit_unassigned_code_points=True):
"""An implementation of RFC4013 SASLprep.
Expand Down Expand Up @@ -77,12 +81,16 @@ def saslprep(data, prohibit_unassigned_code_points=True):
in_table_c12 = stringprep.in_table_c12
in_table_b1 = stringprep.in_table_b1
data = "".join(
["\u0020" if in_table_c12(elt) else elt
for elt in data if not in_table_b1(elt)])
[
"\u0020" if in_table_c12(elt) else elt
for elt in data
if not in_table_b1(elt)
]
)

# RFC3454 section 2, step 2 - Normalize
# RFC4013 section 2.2 normalization
data = unicodedata.ucd_3_2_0.normalize('NFKC', data)
data = unicodedata.ucd_3_2_0.normalize("NFKC", data)

in_table_d1 = stringprep.in_table_d1
if in_table_d1(data[0]):
Expand All @@ -103,7 +111,6 @@ def saslprep(data, prohibit_unassigned_code_points=True):
# RFC3454 section 2, step 3 and 4 - Prohibit and check bidi
for char in data:
if any(in_table(char) for in_table in prohibited):
raise ValueError(
"SASLprep: failed prohibited character check")
raise ValueError("SASLprep: failed prohibited character check")

return data
36 changes: 24 additions & 12 deletions salt/master.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,30 @@ def start(self):
log.info("Creating master process manager")
# Since there are children having their own ProcessManager we should wait for kill more time.
self.process_manager = salt.utils.process.ProcessManager(wait_for_kill=5)

event = multiprocessing.Event()

log.info("Creating master event publisher process")
ipc_publisher = salt.channel.server.MasterPubServerChannel.factory(
self.opts,
_discover_event=event,
)
ipc_publisher.pre_fork(self.process_manager)
if not ipc_publisher.transport.started.wait(30):
raise salt.exceptions.SaltMasterError(
"IPC publish server did not start within 30 seconds. Something went wrong."
)

if self.opts.get("cluster_id", None):
if (
self.opts.get("cluster_peers", [])
and not ipc_publisher.cluster_key()
):
ipc_publisher.discover_peers()
event.wait(timeout=30)

ipc_publisher.send_aes_key_event()

pub_channels = []
log.info("Creating master publisher process")
for _, opts in iter_transport_opts(self.opts):
Expand All @@ -814,15 +838,6 @@ def start(self):
)
pub_channels.append(chan)

log.info("Creating master event publisher process")
ipc_publisher = salt.channel.server.MasterPubServerChannel.factory(
self.opts
)
ipc_publisher.pre_fork(self.process_manager)
if not ipc_publisher.transport.started.wait(30):
raise salt.exceptions.SaltMasterError(
"IPC publish server did not start within 30 seconds. Something went wrong."
)
self.process_manager.add_process(
EventMonitor,
args=[self.opts, ipc_publisher],
Expand Down Expand Up @@ -936,9 +951,6 @@ def start(self):
# No custom signal handling was added, install our own
signal.signal(signal.SIGTERM, self._handle_signals)

if self.opts.get("cluster_id", None):
# Notify the rest of the cluster we're starting.
ipc_publisher.send_aes_key_event()
asyncio.run(self.process_manager.run())

def _handle_signals(self, signum, sigframe):
Expand Down
Loading
Loading