|
10 | 10 | "topic": "Safeguarding Large Language Models in Real-time with Tunable Safety-Performance Trade-offs", |
11 | 11 | "image": "/images/maurodiaz.jpg", |
12 | 12 | "advisor": "Jorge Poco", |
13 | | - "description": "Large Language Models (LLMs) have been shown to be susceptible to jailbreak attacks, or adversarial attacks used to illicit high risk behavior from a model. Jailbreaks have been exploited by cybercriminals and blackhat actors to cause significant harm, highlighting the critical need to safeguard widely-deployed models. Safeguarding approaches, which include fine-tuning models or having LLMs 'self-reflect', may lengthen the inference time of a model, incur a computational penalty, reduce the semantic fluency of an output, and restrict 'normal' model behavior. Importantly, these Safety-Performance Trade-offs (SPTs) remain an understudied area. In this work, we introduce a novel safeguard, called SafeNudge, that combines Controlled Text Generation with 'nudging,' or using text interventions to change the behavior of a model. SafeNudge triggers during text-generation while a jailbreak attack is being executed, and can reduce successful jailbreak attempts by 30% by guiding the LLM towards a safe response. It adds minimal latency to inference and has a negligible impact on the semantic fluency of outputs. Further, we allow for tunable SPTs. SafeNudge is open-source and available through https://pypi.org/, and is compatible with models loaded with the Hugging Face transformers library." |
| 13 | + "description": "Large Language Models (LLMs) have been shown to be susceptible to jailbreak attacks, or adversarial attacks used to illicit high risk behavior from a model. Jailbreaks have been exploited by cybercriminals and blackhat actors to cause significant harm, highlighting the critical need to safeguard widely-deployed models. Safeguarding approaches, which include fine-tuning models or having LLMs 'self-reflect', may lengthen the inference time of a model, incur a computational penalty, reduce the semantic fluency of an output, and restrict 'normal' model behavior. Importantly, these Safety-Performance Trade-offs (SPTs) remain an understudied area. In this work, we introduce a novel safeguard, called SafeNudge, that combines Controlled Text Generation with 'nudging,' or using text interventions to change the behavior of a model. SafeNudge triggers during text-generation while a jailbreak attack is being executed, and can reduce successful jailbreak attempts by 30% by guiding the LLM towards a safe response. It adds minimal latency to inference and has a negligible impact on the semantic fluency of outputs. Further, we allow for tunable SPTs. SafeNudge is open-source and available through https://pypi.org/, and is compatible with models loaded with the Hugging Face transformers library.", |
| 14 | + "email": "email@gmail.com", |
| 15 | + "links": { |
| 16 | + "recording": true, |
| 17 | + "slides": true |
| 18 | + } |
14 | 19 | } |
15 | 20 | }, |
16 | 21 | { |
|
21 | 26 | "topic": "Understanding, Enhancing, and Augmenting Human Interaction through Adaptive Systems", |
22 | 27 | "image": "/images/andresdelapuente.jpeg", |
23 | 28 | "advisor": "Jorge Poco", |
24 | | - "description": "By leveraging computational methods, physiological sensing, and AI-driven adaptation, I develop intelligent interfaces that dynamically respond to users’ cognitive and behavioral states, optimizing experience and performance in real time. My work spans aviation, driving assistance, healthcare, and gaming, demonstrating the impact of adaptive systems across diverse domains." |
| 29 | + "description": "By leveraging computational methods, physiological sensing, and AI-driven adaptation, I develop intelligent interfaces that dynamically respond to users’ cognitive and behavioral states, optimizing experience and performance in real time. My work spans aviation, driving assistance, healthcare, and gaming, demonstrating the impact of adaptive systems across diverse domains.", |
| 30 | + "email": "email@gmail.com", |
| 31 | + "links": { |
| 32 | + "recording": true, |
| 33 | + "slides": true |
| 34 | + } |
25 | 35 | } |
26 | 36 | }, |
27 | 37 | { |
|
32 | 42 | "topic": "Data Visualization for Large-Scale Text Data", |
33 | 43 | "image": "/images/juanpabloheredia.png", |
34 | 44 | "advisor": "Jorge Poco", |
35 | | - "description": "By leveraging computational methods, physiological sensing, and AI-driven adaptation, I develop intelligent interfaces that dynamically respond to users’ cognitive and behavioral states, optimizing experience and performance in real time. My work spans aviation, driving assistance, healthcare, and gaming, demonstrating the impact of adaptive systems across diverse domains." |
| 45 | + "description": "By leveraging computational methods, physiological sensing, and AI-driven adaptation, I develop intelligent interfaces that dynamically respond to users’ cognitive and behavioral states, optimizing experience and performance in real time. My work spans aviation, driving assistance, healthcare, and gaming, demonstrating the impact of adaptive systems across diverse domains.", |
| 46 | + "email": "email@gmail.com", |
| 47 | + "links": { |
| 48 | + "recording": true, |
| 49 | + "slides": true |
| 50 | + } |
36 | 51 | } |
37 | 52 | }, |
38 | 53 | { |
|
43 | 58 | "topic": "Topic Modelling for Large-Scale Text Data", |
44 | 59 | "image": "/images/felipemoreno.png", |
45 | 60 | "advisor": "Jorge Poco", |
46 | | - "description": "By leveraging computational methods, physiological sensing, and AI-driven adaptation, I develop intelligent interfaces that dynamically respond to users’ cognitive and behavioral states, optimizing experience and performance in real time. My work spans aviation, driving assistance, healthcare, and gaming, demonstrating the impact of adaptive systems across diverse domains." |
47 | | - } |
48 | | - } |
49 | | - ], |
50 | | - "upcoming_events": [ |
51 | | - { |
52 | | - "date": "Mar 21, 2025", |
53 | | - "time": "11:00 - 11:30", |
54 | | - "location": "Fundacao Getulio Vargas, Praia de Botafogo, 190, 5th floor, Rio de Janeiro, RJ, Brazil", |
55 | | - "speaker": "Juan Pablo Heredia", |
56 | | - "topic": "Topic Modelling for Large-Scale Text Data" |
57 | | - }, |
58 | | - { |
59 | | - "date": "Mar 28, 2025", |
60 | | - "time": "11:30 - 12:00", |
61 | | - "location": "Fundacao Getulio Vargas, Praia de Botafogo, 190, 5th floor, Rio de Janeiro, RJ, Brazil", |
62 | | - "speaker": "", |
63 | | - "topic": "" |
64 | | - } |
65 | | - ], |
66 | | - "historical_events": [ |
67 | | - { |
68 | | - "date": "Nov 6, 2024", |
69 | | - "presentations": [ |
70 | | - { |
71 | | - "speaker": "Teal Witter", |
72 | | - "title": "Provably Accurate Estimators for Shapley Values", |
73 | | - "links": { |
74 | | - "recording": true, |
75 | | - "slides": true |
76 | | - } |
77 | | - }, |
78 | | - { |
79 | | - "speaker": "Niv Cohen", |
80 | | - "title": "Discovering and Erasing Undesired Concepts", |
81 | | - "links": { |
82 | | - "recording": true, |
83 | | - "slides": true |
84 | | - } |
85 | | - } |
86 | | - ] |
87 | | - }, |
88 | | - { |
89 | | - "date": "Nov 20, 2024", |
90 | | - "presentations": [ |
91 | | - { |
92 | | - "speaker": "Alexander Ratzan", |
93 | | - "title": "Molecular Connectomics from Small to Large Brains", |
94 | | - "links": { |
95 | | - "recording": true, |
96 | | - "slides": true |
97 | | - } |
98 | | - }, |
99 | | - { |
100 | | - "speaker": "Felipe Inagaki de Oliveira", |
101 | | - "title": "TopoMap++: A faster and more space efficient technique to compute projections with topological guarantees", |
102 | | - "links": { |
103 | | - "recording": true, |
104 | | - "slides": true |
105 | | - } |
| 61 | + "description": "By leveraging computational methods, physiological sensing, and AI-driven adaptation, I develop intelligent interfaces that dynamically respond to users’ cognitive and behavioral states, optimizing experience and performance in real time. My work spans aviation, driving assistance, healthcare, and gaming, demonstrating the impact of adaptive systems across diverse domains.", |
| 62 | + "email": "email@gmail.com", |
| 63 | + "links": { |
| 64 | + "recording": true, |
| 65 | + "slides": true |
106 | 66 | } |
107 | | - ] |
108 | | - }, |
109 | | - { |
110 | | - "date": "Dec 4, 2024", |
111 | | - "presentations": [ |
112 | | - { |
113 | | - "speaker": "Guande Wu", |
114 | | - "title": "Evaluating Collaborative Capabilities of Language Models", |
115 | | - "links": { |
116 | | - "recording": true, |
117 | | - "slides": true |
118 | | - } |
119 | | - }, |
120 | | - { |
121 | | - "speaker": "Aécio Santos", |
122 | | - "title": "Sampling-Based Sketching Methods for Correlated Data Discovery", |
123 | | - "links": { |
124 | | - "recording": true, |
125 | | - "slides": true |
126 | | - } |
127 | | - } |
128 | | - ] |
| 67 | + } |
129 | 68 | } |
130 | 69 | ] |
131 | 70 | } |
0 commit comments