Skip to content

Commit 1198b57

Browse files
authored
Python: Add ability to specify encoding when adding a plugin. Add tests. (#12797)
### Motivation and Context Right now when adding a plugin, we don't allow one to specify the type of encoding used. <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> ### Description Allow configuration for the type of encoding used. - Adds unit tests - Closes #12440 <!-- Describe your changes, the overall approach, the underlying design. These notes will help understanding how your code works. Thanks! --> ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone 😄
1 parent e5972e2 commit 1198b57

File tree

5 files changed

+280
-5
lines changed

5 files changed

+280
-5
lines changed

python/semantic_kernel/functions/kernel_function_extension.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def add_plugin(
6868
parent_directory: str | None = None,
6969
description: str | None = None,
7070
class_init_arguments: dict[str, dict[str, Any]] | None = None,
71+
encoding: str = "utf-8",
7172
) -> "KernelPlugin":
7273
"""Adds a plugin to the kernel's collection of plugins.
7374
@@ -88,6 +89,7 @@ def add_plugin(
8889
parent_directory: The parent directory path where the plugin directory resides
8990
description: The description of the plugin, used if the plugin is not a KernelPlugin.
9091
class_init_arguments: The class initialization arguments
92+
encoding: The encoding to use when reading text files. Defaults to "utf-8".
9193
9294
Returns:
9395
KernelPlugin: The plugin that was added.
@@ -116,6 +118,7 @@ def add_plugin(
116118
parent_directory=parent_directory,
117119
description=description,
118120
class_init_arguments=class_init_arguments,
121+
encoding=encoding,
119122
)
120123
return self.plugins[plugin_name]
121124
raise ValueError("plugin or parent_directory must be provided.")

python/semantic_kernel/functions/kernel_function_from_prompt.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -355,13 +355,20 @@ def from_yaml(cls, yaml_str: str, plugin_name: str | None = None) -> "KernelFunc
355355
)
356356

357357
@classmethod
358-
def from_directory(cls, path: str, plugin_name: str | None = None) -> "KernelFunctionFromPrompt":
358+
def from_directory(
359+
cls, path: str, plugin_name: str | None = None, encoding: str = "utf-8"
360+
) -> "KernelFunctionFromPrompt":
359361
"""Creates a new instance of the KernelFunctionFromPrompt class from a directory.
360362
361363
The directory needs to contain:
362364
- A prompt file named `skprompt.txt`
363365
- A config file named `config.json`
364366
367+
Args:
368+
path: The path to the directory containing the prompt and config files.
369+
plugin_name: The name of the plugin.
370+
encoding: The encoding to use when reading the files. Defaults to "utf-8".
371+
365372
Returns:
366373
KernelFunctionFromPrompt: The kernel function from prompt
367374
"""
@@ -387,11 +394,11 @@ def from_directory(cls, path: str, plugin_name: str | None = None) -> "KernelFun
387394

388395
function_name = os.path.basename(path)
389396

390-
with open(config_path) as config_file:
397+
with open(config_path, encoding=encoding) as config_file:
391398
prompt_template_config = PromptTemplateConfig.from_json(config_file.read())
392399
prompt_template_config.name = function_name
393400

394-
with open(prompt_path) as prompt_file:
401+
with open(prompt_path, encoding=encoding) as prompt_file:
395402
prompt_template_config.template = prompt_file.read()
396403

397404
prompt_template = TEMPLATE_FORMAT_MAP[prompt_template_config.template_format]( # type: ignore

python/semantic_kernel/functions/kernel_plugin.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ def from_directory(
256256
parent_directory: str,
257257
description: str | None = None,
258258
class_init_arguments: dict[str, dict[str, Any]] | None = None,
259+
encoding: str = "utf-8",
259260
) -> _T:
260261
"""Create a plugin from a specified directory.
261262
@@ -294,6 +295,7 @@ def from_directory(
294295
parent_directory (str): The parent directory path where the plugin directory resides
295296
description (str | None): The description of the plugin
296297
class_init_arguments (dict[str, dict[str, Any]] | None): The class initialization arguments
298+
encoding (str): The encoding to use when reading text files. Defaults to "utf-8".
297299
298300
Returns:
299301
KernelPlugin: The created plugin of type KernelPlugin.
@@ -313,11 +315,11 @@ def from_directory(
313315
if os.path.basename(object).startswith("__"):
314316
continue
315317
try:
316-
functions.append(KernelFunctionFromPrompt.from_directory(path=object))
318+
functions.append(KernelFunctionFromPrompt.from_directory(path=object, encoding=encoding))
317319
except FunctionInitializationError:
318320
logger.warning(f"Failed to create function from directory: {object}")
319321
elif object.endswith(".yaml") or object.endswith(".yml"):
320-
with open(object) as file:
322+
with open(object, encoding=encoding) as file:
321323
try:
322324
functions.append(KernelFunctionFromPrompt.from_yaml(file.read()))
323325
except FunctionInitializationError:

python/tests/unit/functions/test_kernel_function_from_prompt.py

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Copyright (c) Microsoft. All rights reserved.
22

33
import os
4+
import tempfile
45
from unittest.mock import patch
56

67
import pytest
@@ -418,3 +419,210 @@ def test_function_model_dump_json():
418419
model_dump_json = function.model_dump_json()
419420
assert isinstance(model_dump_json, str)
420421
assert "test" in model_dump_json
422+
423+
424+
def test_from_directory_utf8_encoding_default():
425+
"""Test loading plugin with default UTF-8 encoding."""
426+
with tempfile.TemporaryDirectory() as temp_dir:
427+
prompt_path = os.path.join(temp_dir, "skprompt.txt")
428+
config_path = os.path.join(temp_dir, "config.json")
429+
430+
# UTF-8 content with international characters
431+
prompt_content = """Hello! I can help with questions in multiple languages:
432+
English: Hello world!
433+
Spanish: ¡Hola mundo!
434+
Chinese: 你好世界!
435+
Japanese: こんにちは世界!
436+
437+
Question: {{$input}}
438+
"""
439+
440+
config_content = """{
441+
"schema": 1,
442+
"description": "A multilingual assistant function",
443+
"input_variables": [
444+
{
445+
"name": "input",
446+
"description": "User's question",
447+
"required": true
448+
}
449+
]
450+
}"""
451+
452+
# Write files with UTF-8 encoding
453+
with open(prompt_path, "w", encoding="utf-8") as f:
454+
f.write(prompt_content)
455+
with open(config_path, "w", encoding="utf-8") as f:
456+
f.write(config_content)
457+
458+
# Test default behavior (should use UTF-8)
459+
function = KernelFunctionFromPrompt.from_directory(temp_dir)
460+
assert function.name == os.path.basename(temp_dir)
461+
assert function.description == "A multilingual assistant function"
462+
assert "你好世界" in function.prompt_template.prompt_template_config.template
463+
assert "こんにちは世界" in function.prompt_template.prompt_template_config.template
464+
465+
466+
def test_from_directory_explicit_utf8_encoding():
467+
"""Test loading plugin with explicit UTF-8 encoding."""
468+
with tempfile.TemporaryDirectory() as temp_dir:
469+
prompt_path = os.path.join(temp_dir, "skprompt.txt")
470+
config_path = os.path.join(temp_dir, "config.json")
471+
472+
prompt_content = "Hello with UTF-8 characters: ñáéíóú {{$input}}"
473+
config_content = '{"schema": 1, "description": "Test with UTF-8 characters"}'
474+
475+
with open(prompt_path, "w", encoding="utf-8") as f:
476+
f.write(prompt_content)
477+
with open(config_path, "w", encoding="utf-8") as f:
478+
f.write(config_content)
479+
480+
# Test explicit UTF-8 encoding
481+
function = KernelFunctionFromPrompt.from_directory(temp_dir, encoding="utf-8")
482+
assert function.description == "Test with UTF-8 characters"
483+
assert "ñáéíóú" in function.prompt_template.prompt_template_config.template
484+
485+
486+
def test_from_directory_latin1_encoding():
487+
"""Test loading plugin with Latin-1 encoding."""
488+
with tempfile.TemporaryDirectory() as temp_dir:
489+
prompt_path = os.path.join(temp_dir, "skprompt.txt")
490+
config_path = os.path.join(temp_dir, "config.json")
491+
492+
# Content with Latin-1 characters (Western European)
493+
prompt_content = """Assistant for Western European languages:
494+
French: café, naïve, résumé
495+
German: Müller, Größe, weiß
496+
Spanish: niño, señora, años
497+
498+
Question: {{$input}}
499+
"""
500+
501+
config_content = """{
502+
"schema": 1,
503+
"description": "Western European language assistant",
504+
"input_variables": [
505+
{
506+
"name": "input",
507+
"description": "User's question",
508+
"required": true
509+
}
510+
]
511+
}"""
512+
513+
# Write files with Latin-1 encoding
514+
with open(prompt_path, "w", encoding="latin-1") as f:
515+
f.write(prompt_content)
516+
with open(config_path, "w", encoding="latin-1") as f:
517+
f.write(config_content)
518+
519+
# Load with Latin-1 encoding
520+
function = KernelFunctionFromPrompt.from_directory(temp_dir, encoding="latin-1")
521+
assert function.description == "Western European language assistant"
522+
assert "café" in function.prompt_template.prompt_template_config.template
523+
assert "Müller" in function.prompt_template.prompt_template_config.template
524+
assert "niño" in function.prompt_template.prompt_template_config.template
525+
526+
527+
def test_from_directory_cp1252_encoding():
528+
"""Test loading plugin with Windows-1252 encoding."""
529+
with tempfile.TemporaryDirectory() as temp_dir:
530+
prompt_path = os.path.join(temp_dir, "skprompt.txt")
531+
config_path = os.path.join(temp_dir, "config.json")
532+
533+
# Content with Windows-1252 specific characters
534+
prompt_content = """Windows text processing assistant:
535+
Smart quotes: "Hello" and 'world'
536+
Em dash: Yes—absolutely!
537+
Ellipsis: Wait…
538+
539+
Question: {{$input}}
540+
"""
541+
542+
config_content = """{
543+
"schema": 1,
544+
"description": "Windows text processing assistant",
545+
"input_variables": [
546+
{
547+
"name": "input",
548+
"description": "User's question about text processing",
549+
"required": true
550+
}
551+
]
552+
}"""
553+
554+
# Write files with Windows-1252 encoding
555+
with open(prompt_path, "w", encoding="cp1252") as f:
556+
f.write(prompt_content)
557+
with open(config_path, "w", encoding="cp1252") as f:
558+
f.write(config_content)
559+
560+
# Load with Windows-1252 encoding
561+
function = KernelFunctionFromPrompt.from_directory(temp_dir, encoding="cp1252")
562+
assert function.description == "Windows text processing assistant"
563+
assert '"Hello"' in function.prompt_template.prompt_template_config.template
564+
assert "Yes—absolutely" in function.prompt_template.prompt_template_config.template
565+
assert "Wait…" in function.prompt_template.prompt_template_config.template
566+
567+
568+
def test_from_directory_with_plugin_name_and_encoding():
569+
"""Test loading plugin with both plugin name and encoding specified."""
570+
with tempfile.TemporaryDirectory() as temp_dir:
571+
prompt_path = os.path.join(temp_dir, "skprompt.txt")
572+
config_path = os.path.join(temp_dir, "config.json")
573+
574+
prompt_content = "Simple assistant: {{$input}}"
575+
config_content = '{"schema": 1, "description": "Simple assistant"}'
576+
577+
with open(prompt_path, "w", encoding="utf-8") as f:
578+
f.write(prompt_content)
579+
with open(config_path, "w", encoding="utf-8") as f:
580+
f.write(config_content)
581+
582+
# Load with both plugin name and encoding specified
583+
function = KernelFunctionFromPrompt.from_directory(
584+
path=temp_dir, plugin_name="MyCustomPlugin", encoding="utf-8"
585+
)
586+
assert function.metadata.plugin_name == "MyCustomPlugin"
587+
assert function.description == "Simple assistant"
588+
assert function.prompt_template.prompt_template_config.template == "Simple assistant: {{$input}}"
589+
590+
591+
def test_from_directory_encoding_error_handling():
592+
"""Test that incorrect encoding raises appropriate error."""
593+
with tempfile.TemporaryDirectory() as temp_dir:
594+
prompt_path = os.path.join(temp_dir, "skprompt.txt")
595+
config_path = os.path.join(temp_dir, "config.json")
596+
597+
# Write UTF-8 content
598+
prompt_content = "Hello with UTF-8: 你好世界 {{$input}}"
599+
config_content = '{"schema": 1, "description": "UTF-8 content"}'
600+
601+
with open(prompt_path, "w", encoding="utf-8") as f:
602+
f.write(prompt_content)
603+
with open(config_path, "w", encoding="utf-8") as f:
604+
f.write(config_content)
605+
606+
# Try to read with ASCII encoding - should fail
607+
with pytest.raises(UnicodeDecodeError):
608+
KernelFunctionFromPrompt.from_directory(temp_dir, encoding="ascii")
609+
610+
611+
def test_from_directory_backward_compatibility():
612+
"""Test that existing code without encoding parameter still works."""
613+
with tempfile.TemporaryDirectory() as temp_dir:
614+
prompt_path = os.path.join(temp_dir, "skprompt.txt")
615+
config_path = os.path.join(temp_dir, "config.json")
616+
617+
prompt_content = "Basic ASCII content: {{$input}}"
618+
config_content = '{"schema": 1, "description": "Basic function"}'
619+
620+
with open(prompt_path, "w", encoding="utf-8") as f:
621+
f.write(prompt_content)
622+
with open(config_path, "w", encoding="utf-8") as f:
623+
f.write(config_content)
624+
625+
# Test that old calling style still works
626+
function = KernelFunctionFromPrompt.from_directory(temp_dir)
627+
assert function.description == "Basic function"
628+
assert function.prompt_template.prompt_template_config.template == "Basic ASCII content: {{$input}}"

python/tests/unit/kernel/test_kernel.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Copyright (c) Microsoft. All rights reserved.
22

33
import os
4+
import tempfile
45
from collections.abc import Callable
56
from dataclasses import dataclass
67
from pathlib import Path
@@ -597,6 +598,60 @@ def test_add_plugin_from_directory(kernel: Kernel):
597598
assert func_handlebars is not None
598599

599600

601+
def test_add_plugin_from_directory_with_encoding(kernel: Kernel):
602+
"""Test kernel.add_plugin with custom encoding parameter."""
603+
with tempfile.TemporaryDirectory() as temp_dir:
604+
# Create a plugin directory with UTF-8 content
605+
plugin_dir = os.path.join(temp_dir, "test_encoding_plugin")
606+
os.makedirs(plugin_dir)
607+
608+
function_dir = os.path.join(plugin_dir, "test_function")
609+
os.makedirs(function_dir)
610+
611+
prompt_path = os.path.join(function_dir, "skprompt.txt")
612+
config_path = os.path.join(function_dir, "config.json")
613+
614+
# UTF-8 content with international characters
615+
# Hello World Test
616+
prompt_content = """Multi-language assistant:
617+
Chinese: 你好世界!
618+
Japanese: こんにちは世界!
619+
Question: {{$input}}
620+
"""
621+
622+
config_content = """{
623+
"schema": 1,
624+
"description": "Test encoding function",
625+
"input_variables": [
626+
{
627+
"name": "input",
628+
"description": "User's question",
629+
"required": true
630+
}
631+
]
632+
}"""
633+
634+
# Write files with UTF-8 encoding
635+
with open(prompt_path, "w", encoding="utf-8") as f:
636+
f.write(prompt_content)
637+
with open(config_path, "w", encoding="utf-8") as f:
638+
f.write(config_content)
639+
640+
# Test with explicit encoding
641+
plugin = kernel.add_plugin(parent_directory=temp_dir, plugin_name="test_encoding_plugin", encoding="utf-8")
642+
643+
assert plugin is not None
644+
assert plugin.name == "test_encoding_plugin"
645+
assert "test_function" in plugin.functions
646+
647+
function = plugin.functions["test_function"]
648+
template = function.prompt_template.prompt_template_config.template
649+
# Assert "Hello World"
650+
assert "你好世界" in template
651+
assert "こんにちは世界" in template
652+
assert function.description == "Test encoding function"
653+
654+
600655
def test_plugin_no_plugin(kernel: Kernel):
601656
with pytest.raises(ValueError):
602657
kernel.add_plugin(plugin_name="test")

0 commit comments

Comments
 (0)