|
1 | 1 | { |
2 | 2 | "title": "Visual Data Science Lab - Weekly Seminar", |
3 | | - "recent_talk": { |
4 | | - "date": "Wed, Dec 4, 2024", |
5 | | - "time": "11:00 - 12:00", |
6 | | - "location": "Room 1114, 370 Jay St OR Join Zoom Meeting", |
7 | | - "speakers": [ |
8 | | - { |
| 3 | + "location": "Fundacao Getulio Vargas, Praia de Botafogo, 190, 5th floor, Rio de Janeiro, RJ, Brazil", |
| 4 | + "group_seminar": [ |
| 5 | + { |
| 6 | + "date": "Mon, Feb 24, 2025", |
| 7 | + "time": "11:00 - 12:00", |
| 8 | + "speaker": { |
9 | 9 | "name": "Mauro Diaz", |
10 | | - "topic": "Safeguarding Large Language Models in Real-time with Tunable Safety-Performance Trade-offs" |
11 | | - }, |
12 | | - { |
| 10 | + "topic": "Safeguarding Large Language Models in Real-time with Tunable Safety-Performance Trade-offs", |
| 11 | + "image": "/images/maurodiaz.jpg", |
| 12 | + "advisor": "Jorge Poco", |
| 13 | + "description": "Large Language Models (LLMs) have been shown to be susceptible to jailbreak attacks, or adversarial attacks used to illicit high risk behavior from a model. Jailbreaks have been exploited by cybercriminals and blackhat actors to cause significant harm, highlighting the critical need to safeguard widely-deployed models. Safeguarding approaches, which include fine-tuning models or having LLMs 'self-reflect', may lengthen the inference time of a model, incur a computational penalty, reduce the semantic fluency of an output, and restrict 'normal' model behavior. Importantly, these Safety-Performance Trade-offs (SPTs) remain an understudied area. In this work, we introduce a novel safeguard, called SafeNudge, that combines Controlled Text Generation with 'nudging,' or using text interventions to change the behavior of a model. SafeNudge triggers during text-generation while a jailbreak attack is being executed, and can reduce successful jailbreak attempts by 30% by guiding the LLM towards a safe response. It adds minimal latency to inference and has a negligible impact on the semantic fluency of outputs. Further, we allow for tunable SPTs. SafeNudge is open-source and available through https://pypi.org/, and is compatible with models loaded with the Hugging Face transformers library." |
| 14 | + } |
| 15 | + }, |
| 16 | + { |
| 17 | + "date": "Tue, Feb 25, 2025", |
| 18 | + "time": "11:00 - 12:00", |
| 19 | + "speaker": { |
13 | 20 | "name": "Andres de la Puente", |
14 | | - "topic": "Understanding, Enhancing, and Augmenting Human Interaction through Adaptive Systems" |
| 21 | + "topic": "Understanding, Enhancing, and Augmenting Human Interaction through Adaptive Systems", |
| 22 | + "image": "/images/andresdelapuente.jpeg", |
| 23 | + "advisor": "Jorge Poco", |
| 24 | + "description": "By leveraging computational methods, physiological sensing, and AI-driven adaptation, I develop intelligent interfaces that dynamically respond to users’ cognitive and behavioral states, optimizing experience and performance in real time. My work spans aviation, driving assistance, healthcare, and gaming, demonstrating the impact of adaptive systems across diverse domains." |
15 | 25 | } |
16 | | - ] |
17 | | - }, |
18 | | - "talk_details": [ |
| 26 | + }, |
19 | 27 | { |
20 | | - "topic": "Safeguarding Large Language Models in Real-time with Tunable Safety-Performance Trade-offs", |
21 | | - "image": "/images/maurodiaz.jpg", |
22 | | - "speaker": "Andrew Bell", |
23 | | - "advisor": "Julia Stoyanovich", |
24 | | - "description": "Large Language Models (LLMs) have been shown to be susceptible to jailbreak attacks, or adversarial attacks used to illicit high risk behavior from a model. Jailbreaks have been exploited by cybercriminals and blackhat actors to cause significant harm, highlighting the critical need to safeguard widely-deployed models. Safeguarding approaches, which include fine-tuning models or having LLMs 'self-reflect', may lengthen the inference time of a model, incur a computational penalty, reduce the semantic fluency of an output, and restrict 'normal' model behavior. Importantly, these Safety-Performance Trade-offs (SPTs) remain an understudied area. In this work, we introduce a novel safeguard, called SafeNudge, that combines Controlled Text Generation with 'nudging,' or using text interventions to change the behavior of a model. SafeNudge triggers during text-generation while a jailbreak attack is being executed, and can reduce successful jailbreak attempts by 30% by guiding the LLM towards a safe response. It adds minimal latency to inference and has a negligible impact on the semantic fluency of outputs. Further, we allow for tunable SPTs. SafeNudge is open-source and available through https://pypi.org/, and is compatible with models loaded with the Hugging Face transformers library." |
| 28 | + "date": "Wed, Feb 26, 2025", |
| 29 | + "time": "11:00 - 12:00", |
| 30 | + "speaker": { |
| 31 | + "name": "Juan Pablo Heredia", |
| 32 | + "topic": "Data Visualization for Large-Scale Text Data", |
| 33 | + "image": "/images/juanpabloheredia.png", |
| 34 | + "advisor": "Jorge Poco", |
| 35 | + "description": "By leveraging computational methods, physiological sensing, and AI-driven adaptation, I develop intelligent interfaces that dynamically respond to users’ cognitive and behavioral states, optimizing experience and performance in real time. My work spans aviation, driving assistance, healthcare, and gaming, demonstrating the impact of adaptive systems across diverse domains." |
| 36 | + } |
25 | 37 | }, |
26 | 38 | { |
27 | | - "topic": "Understanding, Enhancing, and Augmenting Human Interaction through Adaptive Systems", |
28 | | - "image": "/images/andresdelapuente.jpeg", |
29 | | - "speaker": "Jewelina Wen", |
30 | | - "advisor": "Claudio Silva", |
31 | | - "description": "By leveraging computational methods, physiological sensing, and AI-driven adaptation, I develop intelligent interfaces that dynamically respond to users’ cognitive and behavioral states, optimizing experience and performance in real time. My work spans aviation, driving assistance, healthcare, and gaming, demonstrating the impact of adaptive systems across diverse domains." |
| 39 | + "date": "Thu, Feb 27, 2025", |
| 40 | + "time": "11:00 - 12:00", |
| 41 | + "speaker": { |
| 42 | + "name": "Adrian Felipe Moreno", |
| 43 | + "topic": "Topic Modelling for Large-Scale Text Data", |
| 44 | + "image": "/images/felipemoreno.png", |
| 45 | + "advisor": "Jorge Poco", |
| 46 | + "description": "By leveraging computational methods, physiological sensing, and AI-driven adaptation, I develop intelligent interfaces that dynamically respond to users’ cognitive and behavioral states, optimizing experience and performance in real time. My work spans aviation, driving assistance, healthcare, and gaming, demonstrating the impact of adaptive systems across diverse domains." |
| 47 | + } |
32 | 48 | } |
33 | 49 | ], |
34 | 50 | "upcoming_events": [ |
35 | 51 | { |
36 | | - "date": "Mar 12, 2025", |
| 52 | + "date": "Mar 21, 2025", |
37 | 53 | "time": "11:00 - 11:30", |
38 | | - "location": "Room 1114, 370 Jay St", |
39 | | - "speaker": "TBD", |
| 54 | + "location": "Fundacao Getulio Vargas, Praia de Botafogo, 190, 5th floor, Rio de Janeiro, RJ, Brazil", |
| 55 | + "speaker": "Juan Pablo Heredia", |
40 | 56 | "topic": "Topic Modelling for Large-Scale Text Data" |
41 | 57 | }, |
42 | 58 | { |
43 | | - "date": "Mar 12, 2025", |
| 59 | + "date": "Mar 28, 2025", |
44 | 60 | "time": "11:30 - 12:00", |
45 | | - "location": "Room 1114, 370 Jay St", |
46 | | - "speaker": "TBD", |
47 | | - "topic": "Data Visualization for Large-Scale Text Data" |
| 61 | + "location": "Fundacao Getulio Vargas, Praia de Botafogo, 190, 5th floor, Rio de Janeiro, RJ, Brazil", |
| 62 | + "speaker": "", |
| 63 | + "topic": "" |
48 | 64 | } |
49 | 65 | ], |
50 | 66 | "historical_events": [ |
|
0 commit comments