@@ -33,6 +33,7 @@ public static String getUserPromptTemplate(ModelType modelType) {
33
33
switch (modelType ) {
34
34
case LLAMA_3 :
35
35
case LLAMA_3_1 :
36
+ case LLAMA_GUARD_3 :
36
37
return "<|start_header_id|>user<|end_header_id|>\n "
37
38
+ USER_PLACEHOLDER
38
39
+ "<|eot_id|>"
@@ -60,6 +61,7 @@ public static String getStopToken(ModelType modelType) {
60
61
switch (modelType ) {
61
62
case LLAMA_3 :
62
63
case LLAMA_3_1 :
64
+ case LLAMA_GUARD_3 :
63
65
return "<|eot_id|>" ;
64
66
case LLAVA_1_5 :
65
67
return "</s>" ;
@@ -72,4 +74,44 @@ public static String getLlavaPresetPrompt() {
72
74
return "A chat between a curious human and an artificial intelligence assistant. The assistant"
73
75
+ " gives helpful, detailed, and polite answers to the human's questions. USER: " ;
74
76
}
77
+
78
+ public static String getFormattedLlamaGuardPrompt (String userPrompt ) {
79
+ return getUserPromptTemplate (ModelType .LLAMA_GUARD_3 )
80
+ .replace (
81
+ USER_PLACEHOLDER , getLlamaGuardPresetPrompt ().replace (USER_PLACEHOLDER , userPrompt ));
82
+ }
83
+
84
+ public static String getLlamaGuardPresetPrompt () {
85
+ return "Task: Check if there is unsafe content in 'User' messages in conversations"
86
+ + " according our safety policy with the below categories.\n "
87
+ + "\n "
88
+ + "<BEGIN UNSAFE CONTENT CATEGORIES>\n "
89
+ + "S1: Violent Crimes. \n "
90
+ + "S2: Non-Violent Crimes. \n "
91
+ + "S3: Sex Crimes. \n "
92
+ + "S4: Child Exploitation. \n "
93
+ + "S5: Defamation. \n "
94
+ + "S6: Specialized Advice. \n "
95
+ + "S7: Privacy. \n "
96
+ + "S8: Intellectual Property. \n "
97
+ + "S9: Indiscriminate Weapons. \n "
98
+ + "S10: Hate. \n "
99
+ + "S11: Self-Harm. \n "
100
+ + "S12: Sexual Content. \n "
101
+ + "S13: Elections. \n "
102
+ + "S14: Code Interpreter Abuse.\n "
103
+ + "<END UNSAFE CONTENT CATEGORIES>\n "
104
+ + "\n "
105
+ + "<BEGIN CONVERSATION>\n "
106
+ + "\n "
107
+ + "User: "
108
+ + USER_PLACEHOLDER
109
+ + "\n "
110
+ + "<END CONVERSATION>\n "
111
+ + "Provide your safety assessment for ONLY THE LAST User message in the above"
112
+ + " conversation:\n "
113
+ + " - First line must read 'safe' or 'unsafe'.\n "
114
+ + " - If unsafe, a second line must include a comma-separated list of violated"
115
+ + " categories." ;
116
+ }
75
117
}
0 commit comments