Skip to content

Commit ae83bb0

Browse files
authored
fix krb5_hdfs_storage (#108)
* ppc_model support krb5 auth * fix krb5_hdfs_storage * fix wedpr_ml_toolkit hdfs storage
1 parent 3def5dd commit ae83bb0

File tree

16 files changed

+118
-73
lines changed

16 files changed

+118
-73
lines changed
Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,16 @@
11
# -*- coding: utf-8 -*-
2-
import os
3-
from krbcontext.context import krbContext
42
from hdfs.ext.kerberos import KerberosClient
53
from ppc_common.deps_services.hdfs_storage import HdfsStorage
64
from ppc_common.deps_services.storage_api import HDFSStorageConfig
75

86

97
class Krb5HdfsStorage(HdfsStorage):
10-
def __init__(self, hdfs_config: HDFSStorageConfig):
8+
def __init__(self, hdfs_config: HDFSStorageConfig, logger):
119
super().__init__(hdfs_config, False)
1210
self.hdfs_config = hdfs_config
13-
self.krb5_ctx = krbContext(
14-
using_keytab=True,
15-
principal=self.hdfs_config.hdfs_auth_principal,
16-
keytab_file=self.hdfs_config.hdfs_auth_secret_file_path)
17-
18-
self.client = KerberosClient(self.hdfs_config.hdfs_url)
1911
self.client = KerberosClient(
20-
krb_principal=self.hdfs_config.hdfs_auth_principal,
21-
krb_keytab=self.hdfs_config.hdfs_auth_secret_file_path,
22-
krb_ccache_path="/tmp/hdfs",
23-
hdfs_namenode_address=self.hdfs_config.hdfs_url,
12+
url=self.hdfs_config.hdfs_url,
13+
principal=self.hdfs_config.hdfs_auth_principal,
14+
hostname_override=self.hdfs_config.hdfs_hostname_override,
15+
password=self.hdfs_config.hdfs_auth_password,
2416
timeout=10000)

python/ppc_common/deps_services/storage_api.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,19 @@ def __init__(self, hdfs_url: str = None,
1818
hdfs_home: str = None,
1919
enable_krb5_auth: bool = False,
2020
hdfs_auth_principal: str = None,
21-
hdfs_auth_secret_file_path: str = None):
21+
hdfs_auth_password: str = None,
22+
hdfs_hostname_override: str = None):
2223
self.hdfs_url = hdfs_url
2324
self.hdfs_user = hdfs_user
2425
self.hdfs_home = hdfs_home
2526
self.enable_krb5_auth = enable_krb5_auth
2627
self.hdfs_auth_principal = hdfs_auth_principal
27-
self.hdfs_auth_secret_file_path = hdfs_auth_secret_file_path
28+
self.hdfs_auth_password = hdfs_auth_password
29+
self.hdfs_hostname_override = hdfs_hostname_override
2830

2931
def __repr__(self):
3032
return f"hdfs_user: {self.hdfs_user}, hdfs_home: {self.hdfs_home}, hdfs_url: {self.hdfs_url}, " \
31-
f"enable_krb5_auth: {self.enable_krb5_auth}, hdfs_auth_principal: {self.hdfs_auth_principal}, " \
32-
f"hdfs_auth_secret_file_path: {self.hdfs_auth_secret_file_path}"
33+
f"enable_krb5_auth: {self.enable_krb5_auth}, hdfs_auth_principal: {self.hdfs_auth_principal}"
3334

3435
def load_config(self, config: dict, logger):
3536
self.hdfs_url = common_func.get_config_value(
@@ -38,6 +39,7 @@ def load_config(self, config: dict, logger):
3839
'HDFS_USER', self.DEFAULT_HDFS_USER, config, False)
3940
self.hdfs_home = common_func.get_config_value(
4041
"HDFS_HOME", os.path.join(self.DEFAULT_HDFS_USER_PATH, self.hdfs_user), config, False)
42+
4143
# the auth information
4244
self.enable_krb5_auth = common_func.get_config_value(
4345
"HDFS_ENABLE_AUTH", False, config, False)
@@ -48,14 +50,22 @@ def load_config(self, config: dict, logger):
4850
self.hdfs_auth_principal = common_func.get_config_value(
4951
"HDFS_AUTH_PRINCIPAL", None, config, require_auth_info
5052
)
51-
# the keytab file path
52-
self.hdfs_auth_secret_file_path = common_func.get_config_value(
53-
"HDFS_AUTH_KEYTAB_PATH", None, config, require_auth_info
54-
)
53+
# the password
54+
self.hdfs_auth_password = common_func.get_config_value(
55+
"HDFS_AUTH_PASSWORD", None, config, require_auth_info)
56+
# the hostname override
57+
self.hdfs_hostname_override = common_func.get_config_value(
58+
"HDFS_HOSTNAME_OVERRIDE", None, config, require_auth_info)
5559
if logger is not None:
5660
logger.info(f"*** load hdfs storage config : {self}")
5761
else:
5862
print(f"*** load hdfs storage config : {self}")
63+
self._check()
64+
65+
def _check(self):
66+
common_func.require_non_empty("HDFS_URL", self.hdfs_url)
67+
common_func.require_non_empty("HDFS_USER", self.hdfs_user)
68+
common_func.require_non_empty("HDFS_HOME", self.hdfs_home)
5969

6070

6171
class StorageApi(ABC):

python/ppc_common/deps_services/storage_loader.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@
66

77
class HDFSStorageLoader:
88
@staticmethod
9-
def load(hdfs_config: HDFSStorageConfig):
9+
def load(hdfs_config: HDFSStorageConfig, logger):
1010
if hdfs_config.enable_krb5_auth is False:
1111
return HdfsStorage(hdfs_config)
12-
return Krb5HdfsStorage(hdfs_config)
12+
return Krb5HdfsStorage(hdfs_config, logger)
1313

1414

1515
def load(config: dict, logger):
1616
if config['STORAGE_TYPE'] == StorageType.HDFS.value:
1717
hdfs_config = HDFSStorageConfig()
1818
hdfs_config.load_config(config, logger)
19-
return HDFSStorageLoader.load(hdfs_config)
19+
return HDFSStorageLoader.load(hdfs_config, logger)
2020
else:
2121
raise Exception('unsupported storage type')

python/ppc_common/deps_services/tests/hdfs_storage_test.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,28 @@
22
import unittest
33
from ppc_common.deps_services.storage_api import HDFSStorageConfig
44
from ppc_common.deps_services.storage_loader import HDFSStorageLoader
5+
import logging
56

67

78
class HDFSStorageWrapper:
89
def __init__(self):
10+
self.logger = logging.getLogger("HDFSStorageWrapper")
911
# use the default config
10-
hdfs_url = "hdfs://127.0.0.1:9900"
11-
hdfs_user = "wedpr"
12+
hdfs_url = "http://127.0.0.1:50070"
13+
hdfs_user = "root"
1214
hdfs_home = "/user/ppc"
13-
enable_krb5_auth = False
14-
hdfs_auth_principal = ""
15-
hdfs_auth_secret_file_path = ""
15+
enable_krb5_auth = True
16+
hdfs_auth_principal = "[email protected]"
17+
hdfs_auth_password = "root"
18+
hdfs_hostname_override = "wedpr-0001"
1619
self.hdfs_config = HDFSStorageConfig(
1720
hdfs_url=hdfs_url, hdfs_user=hdfs_user,
1821
hdfs_home=hdfs_home, enable_krb5_auth=enable_krb5_auth,
1922
hdfs_auth_principal=hdfs_auth_principal,
20-
hdfs_auth_secret_file_path=hdfs_auth_secret_file_path)
21-
self.hdfs_storage = HDFSStorageLoader.load(self.hdfs_config)
23+
hdfs_auth_password=hdfs_auth_password,
24+
hdfs_hostname_override=hdfs_hostname_override)
25+
self.hdfs_storage = HDFSStorageLoader.load(
26+
self.hdfs_config, self.logger)
2227

2328
def test_file_op(self, file_path):
2429
hdfs_file_path = f"test/{file_path}"

python/ppc_common/ppc_utils/common_func.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,8 @@ def get_file_encoding(file_path):
2626
raise Exception(f"Unknown File Encoding, file: {file_path}")
2727
encoding = file_chardet["encoding"]
2828
return encoding
29+
30+
31+
def require_non_empty(value_property, value):
32+
if value is None or len(value) == 0:
33+
raise Exception(f"the ${value_property} must non-empty!")

python/ppc_model/conf/application-sample.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,15 @@ SQLALCHEMY_DATABASE_URI: "mysql://[*user_ppcsmodeladm]:[*pass_ppcsmodeladm]@[@43
1616
HDFS_URL: "http://127.0.0.1:50070"
1717
# HDFS,
1818
STORAGE_TYPE: "HDFS"
19-
HDFS_URL: "http://127.0.0.1:9870"
2019
HDFS_USER: "root"
2120
HDFS_HOME: "/user/ppc/model/webank"
2221
HDFS_ENABLE_AUTH: False
2322
# the hdfs auth principal
2423
HDFS_AUTH_PRINCIPAL: "[email protected]"
25-
# the auth key-tab path
26-
HDFS_AUTH_KEYTAB_PATH: "./hdfs-wedpr.keytab"
24+
# the auth password
25+
HDFS_AUTH_PASSWORD: ""
26+
# the host name override
27+
HDFS_HOSTNAME_OVERRIDE: "wedpr-0001"
2728

2829
JOB_TEMP_DIR: ".cache/job"
2930

python/ppc_model/model_crypto/crypto_aes.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,16 @@ def load_key_from_file(filename):
2323
key = file.read()
2424
return key
2525

26-
27-
# key = load_key_from_file('aes_key.bin')
26+
# AES加密函数
2827

2928

30-
# AES加密函数
3129
def encrypt_data(key, plaintext):
3230
# 使用随机生成的初始向量 (IV)
3331
iv = os.urandom(16) # AES块大小为128位(16字节)
34-
32+
3533
# 创建AES加密器
36-
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend())
34+
cipher = Cipher(algorithms.AES(key), modes.CBC(iv),
35+
backend=default_backend())
3736
encryptor = cipher.encryptor()
3837

3938
# 对数据进行填充(AES要求输入的块大小为128位)
@@ -54,11 +53,13 @@ def decrypt_data(key, ciphertext):
5453
actual_ciphertext = ciphertext[16:]
5554

5655
# 创建AES解密器
57-
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend())
56+
cipher = Cipher(algorithms.AES(key), modes.CBC(iv),
57+
backend=default_backend())
5858
decryptor = cipher.decryptor()
5959

6060
# 解密数据
61-
decrypted_padded_data = decryptor.update(actual_ciphertext) + decryptor.finalize()
61+
decrypted_padded_data = decryptor.update(
62+
actual_ciphertext) + decryptor.finalize()
6263

6364
# 去除填充
6465
unpadder = padding.PKCS7(128).unpadder()

python/requirements.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,5 +56,4 @@ MarkupSafe>=2.1.1
5656
urllib3==1.26.18
5757
phe
5858
chardet
59-
krbcontext
60-
requests_kerberos
59+
requests_kerberos>=0.15.0

python/wedpr_ml_toolkit/conf/config.properties

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,9 @@ remote_entrypoints=http://127.0.0.1:16000,http://127.0.0.1:16001
55
agency_name=WeBank
66
workspace_path=/user/wedpr/webank/
77
user=test_user
8+
89
storage_endpoint=http://127.0.0.1:50070
10+
enable_krb5_auth=False
11+
hdfs_auth_principal[email protected]
12+
hdfs_auth_password=root
13+
hdfs_hostname_override=wedpr-0001

python/wedpr_ml_toolkit/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@ setuptools>=70.0.0
22
hdfs>=2.7.2
33
requests~=2.31.0
44
requests_toolbelt==0.9.1
5-
5+
requests_kerberos>=0.15.0

0 commit comments

Comments
 (0)