v310/reliableai25_cleaned.bib at gh-pages · mlresearch/v310 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
@Proceedings{Reliable-and-Trustworthy-AI-2025,
    booktitle = {Reliable and Trustworthy Artificial Intelligence 2025},
    name      = {Reliable and Trustworthy Artificial Intelligence},
    shortname = {Reliable & Trustworthy AI},
    year      = {2025},
    editor    = {Nguyen, Hoang D. and Le, Duc-Trong and Bj{\"o}rklund, Johanna and Vu, Xuan-Son},
    volume    = {310},
    start     = {2025-12-12},
    end       = {2025-12-12},
    published = {2025-12-31},
    address   = {Multiple},
    conference_url = {https://workshop2025.reliable-ai.org/}
}

@InProceedings{preface,
  author    = {Nguyen, Hoang D. and Le, Duc-Trong and Bj{\"o}rklund, Johanna and Vu, Xuan-Son},
  title     = {Preface},
  pages     = {v-vi},
  abstract  = {Preface}
}

@InProceedings{cai24,
    title    = {Reinforcement Learning in Online Advertising: Challenges, Prospects, and Trust},
    author   = {Cai, Jingwen and Bj{\"o}rklund, Johanna},
    pages    = {1-10},
    abstract = {The central decision-making processes involved in online advertising are often supported
by Reinforcement Learning (RL), which serves to optimise long-term accumulative re-
wards through interactions with evolving environments. While RL’s potential in various
real-world applications has been reviewed in extant survey works, the specific ways RL
algorithms address online advertising challenges remain unchartered. Therefore, this paper
reviews RL applications in this practice area, identifying core challenges and key issues
including trust concerns. We categorize reviewed work based on problem domains and
propose potential directions for future research. Our goal is to bridge the cross-disciplinary
gap in this field, offering perspectives and guidance for researchers and practitioners.}
}

@InProceedings{chai24,
    title    = {AIQTrees: A Drone Imagery Dataset for Tree Segmentation},
    author   = {Chai, Joseph and To, Alex and O'Sullivan, Barry and Nguyen, Hoang D.},
    pages    = {11-18},
    abstract = {The reliability of AI models typically depends on the data they are trained with, and
accurate interpretations require large amounts of data. The scarcity of publicly available
datasets is typically encountered for specific small-scale sustainability projects, making
data accessibility a limiting factor for developing AI models for semantic segmentation
tasks. In sustainability and forestry applications, the usage of UAVs is common due to
their lightweight nature and the ability to provide a huge variety of data. In this paper,
we present a new dataset of realistic and high-quality drone images taken around sites in
Ireland. The images encompass temporal, spatial, and seasonal dimensions, which could
alter the tree appearance or illumination conditions of the images and have to be taken into
consideration. We also included a baseline benchmark for the semantic segmentation task
along with the dataset. It can be accessed at: https://github.com/ReML-AI/AIQTrees.}
}

@InProceedings{ehnemark24,
    title    = {Bridging Human Cognition and AI: Enhancing Transparency and Explainability with Hierarchical Conceptual Graphs and the Knowing Protocol},
    author   = {Ehnemark, Klas},
    pages    = {19-23},
    abstract = {The rapid deployment of large language models (LLMs) in critical domains demands greater
transparency and explainability to build user trust and enable effective collaboration. Cur-
rent AI-human interactions largely rely on unstructured text, often resulting in misunder-
standings and limited insight into AI reasoning. We introduce a Hierarchical Conceptual
Graph Model and the Knowing Communication Protocol to bridge the gap between sym-
bolic human reasoning and sub-symbolic AI processing. Our model combines conceptual
spaces, ontologies, and hierarchical structures to explicitly represent complex knowledge,
while the Knowing Protocol, through the Knowing Markup Language (KML), facilitates
structured, machine-readable interactions. This approach enhances transparency by align-
ing AI-generated content with human cognitive structures, promoting clarity and collabo-
rative knowledge building—ultimately addressing the limitations of traditional text-based
AI tools and advancing trustworthy, explainable AI.}
}

@InProceedings{hoang24,
    title    = {A lightweight and reliable framework toward real-time student engagement predictions in learning analytics},
    author   = {Hoang, Long and Shorten, George and O'Sullivan, Barry and Nguyen, Hoang D.},
    pages    = {24-33},
    abstract = {Learning analytics can enable the provision of meaningful feedback based on the collected data, help
educators to make decisions with and about learners, and improve learner performance. Student engagement
predictions are a key factor in generating feedback for real-time learning analytics applications, such as
dashboards. However, most previous work has been based on a heavy deep learning model, which results in
challenges for deployment in real-time applications (a resource efficiency requirement in reliable AI). This paper
proposes a lightweight deep-learning framework for predicting student engagement in video to address this
limitation. The proposed method uses customized MobileNetV2 as the backbone, with an input size of 32 by 32
by 3, to extract features from consecutive video frames. Multi-Scale attention – Residual (MUSER) is used to
capture global information and contextual representation of the extracted features. Finally, LSTM examines the
temporal variations in video frames and yields the prediction result. We use the DAISEE dataset, the most popular
dataset in the learning analytics community, to evaluate the proposed framework. Experimental results
demonstrate that the proposed method achieves good accuracy while significantly reducing the model size
compared to other approaches.}
}

@InProceedings{le24,
    title    = {Trustworthiness in Multi-Agent UAV Systems: A Scoping Review},
    author   = {Le, Mai and Minghim, Rosane and O'Sullivan, Barry and Nguyen, Hoang D.},
    pages    = {34-44},
    abstract = {The integration of artificial intelligence (AI) into multi-unmanned aerial vehicle-assisted
communication plays a pivotal role in sixth-generation wireless communication and be-
yond. Most AI techniques have primarily focused on AI-based applications and technical
problems, rather than examining the accountability and trustworthiness of AI models, a
crucial evaluation criteria for AI human beings. This work aims to provide a scoping re-
view of the trustworthiness of AI in multi-agent UAV systems. Firstly, we present the
background of multi-agent systems and methods to evaluate, enhance the trustworthiness
of AI systems. Secondly, we review innovative techniques that address trustworthy require-
ments in terms of safety, robustness, privacy, accountability, explainability, and fairness,
along with challenges in multi-agent UAV communications. Finally, we highlight several
promising solutions and future research directions.}
}

@InProceedings{naeem24,
    title    = {Comparing Genetic Algorithms and Principal Component Analysis in Reducing Feature Dimensionality},
    author   = {Naeem, Aiman and Khan, Muhammad Farhan and Rezaei, Saeid and Iqbal, Adeel and Sohail, Muhammad and Jatoi, Munsif and Shakeel, Atif},
    pages    = {45-58},
    abstract = {It is important to do dimensionality reduction or feature selection so that machine learning
models can be built in an efficient and interpretable way, especially with high-dimensional
datasets. Using data up to October 2023, this study compares two methods, namely, Ge-
netic Algorithm (GA) and Principal Component Analysis (PCA), to assess their usefulness
for dimensionality reduction while preserving predictive performance. As a case study, we
applied the feature selection methodology on the Breast Cancer Wisconsin (Diagnostic)
dataset, comprised of 30 real-valued features that describe the characteristics of cell nuclei.
PCA used to reduce dimensional space of dataset by explaining 95% of variance and GA
is used to make a minimal subset of subset of relevant features based on fitness function.
To evaluate the effect of the reduced dimensionality on classification accuracy, a Random
Forest classifier was used. Experimental results shown that GA selected features provide
accuracy from the GA by 98.25% and PCA accuracy from the PCA with 93.86% which
at the cost of high computational cost. Finally, visualizations illustrated the variance re-
tained by PCA, how features provided importance in model performance using GA, and
how both influence the models. The quantitative results of this study can be used to iden-
tify the trade-off between the statistical approaches and heuristic approaches showing what
needs to be prioritized in terms of application specificity when searching for dimensionality
reduction methods.}
}

@InProceedings{nguyen24,
    title    = {Reliable Cultural Knowledge Preservation in Multilingual LLMs through Model Merging},
    author   = {Nguyen, Hoang Quan and Pham, Nhut Huy and Pahani, Maziyar and Bj{\"o}rklund, Johanna and Vu, Xuan-Son},
    pages    = {59-66},
    abstract = {We introduce a reliable approach for enhancing multilingual language models that preserves cultural knowledge while
improving reasoning capabilities, focusing on low-resource languages. Using Qwen as a base model, we demonstrate that trust-aware model merging
can verifiably improve performance without compromising cultural understanding. Our proposed approach achieves quantifiable improvements in both
reasoning tasks and cultural benchmarks while maintaining computational efficiency. Results on Vietnamese and Arabic language tasks show consistent
performance gains while preserving cultural knowledge, offering a reliable path for developing trustworthy multilingual AI systems. Our models are
available at github.com/WARA-ML/waraml-mini-brains.}
}

@InProceedings{varshney24,
    title    = {Recurrence Analysis of Integrally Private Support Vector Machine},
    author   = {Varshney, Ayush K. and Torra, Vicen{\c c}},
    pages    = {67-72},
    abstract = {Integral privacy, an alternative to k-Anonymity and differential privacy, focuses on creating ambiguity
for intruders by considering models generated from diverse datasets as privacy-preserving. Integral privacy calls such models
as recurring models. While prior research has primarily explored recurrence in deep learning models which have large parameter
space, this paper addresses the recurrence analysis of a typical machine learning model with relatively small parameter space like
Support Vector Machine (SVM). Models having small parameter space can have significant impact due to the presence and absence of
a datapoint. Due to this reason, one may intuitively consider that their probability to recur is low. We challenge this hypothesis
with the recurrence analysis of SVM models trained with mean samplers like stochastic gradient descent. We find that under constrained
environment SVM models recurs with high probability. This research enhances our understanding of privacy-preserving models in the context
of SVMs, providing valuable insights into their privacy guarantees.}
}

@InProceedings{nguyen25,
    title    = {Reliable-Data-Split (RDS): Maximizing Model Potential with Reinforced Selection Strategy},
    author   = {Nguyen, Hoang D. and Vu, Xuan-Son and Truong, Quoc-Tuan and Le, Duc-Trong},
    pages    = {73-89},
    abstract = {The nexus between data characteristics and parametric models is fundamental for developing effective and
reliable artificial intelligence (AI) systems. Mismatches in data properties for model development may lead to deleterious
effects on AI model performance in machine learning practice. This paper proposes a Reliable Data Split (RDS) procedure
to learn how to select data points that will generalise the target domain adequately by employing prior knowledge of the data
generative process. We introduce a reinforced selection strategy using deep reinforcement learning with diverse black box predictors in
maximising ensemble rewards as the proxy of model performance potential while maintaining an appropriate proportionate allocation and the
independent and identically distributed (i.i.d.) assumption. A comprehensive evaluation of the RDS procedure is conducted on four real-world datasets,
including Madelon, Drug Reviews, MNIST, and Kalapa Credit Scoring Challenge, with coverage of machine learning tasks such as binary classification,
multi-class classification, and regression on multivariate, textual, and visual data. The experimental results evidently demonstrate consistent
performance improvements of trainable data samples over classical or prior data selection. Hence, we advocate the use of RDS for data splitting in the
early stage of machine learning tasks for parameter tuning, model selection and overfitting prevention, as well as, sampling in large-scale AI competitions for
searching the best possible and shift-stable solutions.}
}

@InProceedings{phan25,
    title    = {Improving Continual Learning Robustness in Medical Imaging via Illumination Adaptive Transformer},
    author   = {Phan, Thanh-Ngoc and Pham, Quynh-Trang Thi and Le, Duc-Trong},
    pages    = {90-101},
    abstract = {Continual learning (CL) refers to the capability of a model to learn progressively from an evolving stream of data, retaining previously
acquired knowledge while integrating new information. This capability is pivotal in advancing medical image classification, especially when data availability
fluctuates. Beyond investigating CL performance under standard clean-data conditions, this paper systematically evaluates the robustness of representative
CL strategies in uncertain imaging contexts, where visual quality is degraded by varying degrees of low-light conditions and over- or under exposure. In this paper, we augment
the training and evaluation data with controlled, simulated low-light and contrast perturbations to model these uncertain conditions, which mimic real-world variability frequently
encountered in clinical acquisition environments. Our method integrates an automatic illumination calibration module, termed the Illumination Adaptive Transformer (IAT), within
existing CL frameworks to mitigate the adverse effects of such degradations. This module dynamically adjusts the image illumination and contrast, aiming to enhance the visibility
of critical features in a data-driven, end-to-end manner without requiring manual tuning or image-specific heuristics. Experiments demonstrate that incorporating the IAT module
consistently improves final classification accuracy and robustness across multiple continual learning strategies under all simulated uncertainty levels on the PathMNIST dataset.}
}

@InProceedings{salim25,
    title    = {Towards a SAFETY-AI framework for Healthcare Education},
    author   = {Salim, Kinza and Nana, Vanita Kouomogne and Marshall, Mark T. and Nguyen, Hoang D.},
    pages    = {102-114},
    abstract = {Safety is an integral part of healthcare professionalism, and with new technological developments, such as Artificial Intelligence (AI), there is an ongoing
need to develop guardrails for healthcare education. The landscape of AI safety frameworks for healthcare education is evolving, with significant development in regulatory
compliance, ethical governance, and practical implementation approaches. This paper addresses the need for building a SAFETY-AI framework for healthcare education and proposes
a solution towards it. It also provides subjective insights regarding trustworthiness, reliability and the existing concepts of safety in healthcare setups. This work stands
as a roadmap for safety in AI practices for healthcare policy makers, educators and clinicians.}
}