21
21
#include <config.h>
22
22
#include "linux.h"
23
23
#include "utils.h"
24
+ #include "status.h"
24
25
#include <string.h>
25
26
#include <sched.h>
26
27
#include <fcntl.h>
@@ -148,6 +149,12 @@ struct private_data_s
148
149
/* Used to save stdin, stdout, stderr during checkpointing to descriptors.json
149
150
* and needed during restore. */
150
151
char * external_descriptors ;
152
+
153
+ /* Cached shared empty directory for masked paths optimization */
154
+ int maskdir_fd ;
155
+ char * maskdir_proc_path ;
156
+ bool maskdir_bind_failed ;
157
+ bool maskdir_warned ;
151
158
};
152
159
153
160
struct linux_namespace_s
@@ -164,6 +171,8 @@ cleanup_private_data (void *private_data)
164
171
165
172
if (p -> rootfsfd >= 0 )
166
173
TEMP_FAILURE_RETRY (close (p -> rootfsfd ));
174
+ if (p -> maskdir_fd >= 0 )
175
+ TEMP_FAILURE_RETRY (close (p -> maskdir_fd ));
167
176
if (p -> mount_fds )
168
177
cleanup_close_mapp (& (p -> mount_fds ));
169
178
if (p -> dev_fds )
@@ -173,6 +182,7 @@ cleanup_private_data (void *private_data)
173
182
free (p -> host_notify_socket_path );
174
183
free (p -> container_notify_socket_path );
175
184
free (p -> external_descriptors );
185
+ free (p -> maskdir_proc_path );
176
186
free (p );
177
187
}
178
188
@@ -185,6 +195,7 @@ get_private_data (struct libcrun_container_s *container)
185
195
container -> private_data = p ;
186
196
p -> rootfsfd = -1 ;
187
197
p -> notify_socket_tree_fd = -1 ;
198
+ p -> maskdir_fd = -1 ;
188
199
container -> cleanup_private_data = cleanup_private_data ;
189
200
}
190
201
return container -> private_data ;
@@ -1058,6 +1069,103 @@ has_mount_for (libcrun_container_t *container, const char *destination)
1058
1069
return false;
1059
1070
}
1060
1071
1072
+ static void
1073
+ warn_tmpfs_fallback_once (struct private_data_s * private_data , const char * reason )
1074
+ {
1075
+ if (! private_data -> maskdir_warned )
1076
+ {
1077
+ libcrun_warning ("Falling back to tmpfs for masked dirs (reason: %s)" , reason );
1078
+ private_data -> maskdir_warned = true;
1079
+ }
1080
+ }
1081
+
1082
+ /* Get or create the cached shared empty directory for masked paths optimization.
1083
+ * Creates directory and FD once per container, caches /proc/self/fd path for fast mounting.
1084
+ */
1085
+ static int
1086
+ get_shared_empty_dir_cached (libcrun_container_t * container , char * * proc_fd_path , libcrun_error_t * err )
1087
+ {
1088
+ struct private_data_s * private_data = get_private_data (container );
1089
+ cleanup_close int fd = -1 ;
1090
+ cleanup_free char * run_dir = NULL ;
1091
+ cleanup_free char * empty_dir_path = NULL ;
1092
+ int ret ;
1093
+
1094
+ /* Fast path: return cached proc fd path if already set up */
1095
+ if (private_data -> maskdir_proc_path != NULL )
1096
+ {
1097
+ * proc_fd_path = private_data -> maskdir_proc_path ;
1098
+ return 0 ;
1099
+ }
1100
+
1101
+ /* Slow path: create directory and cache everything once */
1102
+ ret = get_run_directory (& run_dir , container -> context -> state_root , err );
1103
+ if (UNLIKELY (ret < 0 ))
1104
+ return ret ;
1105
+
1106
+ ret = append_paths (& empty_dir_path , err , run_dir , ".empty-directory" , NULL );
1107
+ if (UNLIKELY (ret < 0 ))
1108
+ return ret ;
1109
+
1110
+ /* Ensure the empty directory exists (once per container) */
1111
+ ret = crun_ensure_directory (empty_dir_path , 0555 , false, err );
1112
+ if (UNLIKELY (ret < 0 ))
1113
+ return ret ;
1114
+
1115
+ /* Open directory and cache FD (once per container) */
1116
+ fd = open (empty_dir_path , O_DIRECTORY | O_RDONLY | O_CLOEXEC );
1117
+ if (fd < 0 )
1118
+ return crun_make_error (err , errno , "open directory `%s`" , empty_dir_path );
1119
+
1120
+ /* Cache the /proc/self/fd path for fast mounting */
1121
+ ret = xasprintf (& private_data -> maskdir_proc_path , "/proc/self/fd/%d" , fd );
1122
+ if (UNLIKELY (ret < 0 ))
1123
+ return crun_make_error (err , errno , "xasprintf failed" );
1124
+
1125
+ private_data -> maskdir_fd = fd ;
1126
+ fd = -1 ; /* Don't auto-close */
1127
+
1128
+ * proc_fd_path = private_data -> maskdir_proc_path ;
1129
+ return 0 ;
1130
+ }
1131
+
1132
+ static int
1133
+ mount_masked_dir (libcrun_container_t * container , int pathfd , const char * rel_path , libcrun_error_t * err )
1134
+ {
1135
+ struct private_data_s * private_data = get_private_data (container );
1136
+ char * proc_fd_path = NULL ;
1137
+ libcrun_error_t tmp_err = NULL ;
1138
+ int ret ;
1139
+
1140
+ if (private_data -> maskdir_bind_failed )
1141
+ goto fallback_to_tmpfs ;
1142
+
1143
+ /* Get cached /proc/self/fd path (fast after first call) */
1144
+ ret = get_shared_empty_dir_cached (container , & proc_fd_path , & tmp_err );
1145
+ if (ret < 0 )
1146
+ {
1147
+ private_data -> maskdir_bind_failed = true;
1148
+ warn_tmpfs_fallback_once (private_data , tmp_err -> msg );
1149
+ crun_error_release (& tmp_err );
1150
+ goto fallback_to_tmpfs ;
1151
+ }
1152
+
1153
+ ret = do_mount (container , proc_fd_path , pathfd , rel_path , NULL , MS_BIND | MS_RDONLY , NULL , LABEL_MOUNT , & tmp_err );
1154
+ if (LIKELY (ret >= 0 ))
1155
+ return ret ;
1156
+
1157
+ /* Bind mount failed - mark as failed and fall back for all future mounts */
1158
+ private_data -> maskdir_bind_failed = true;
1159
+ libcrun_warning ("bind mount failed for %s to %s: %s, falling back to tmpfs" ,
1160
+ proc_fd_path , rel_path , tmp_err -> msg );
1161
+ warn_tmpfs_fallback_once (private_data , tmp_err -> msg );
1162
+ crun_error_release (& tmp_err );
1163
+
1164
+ fallback_to_tmpfs :
1165
+ libcrun_debug ("using tmpfs fallback for %s" , rel_path );
1166
+ return ret = do_mount (container , "tmpfs" , pathfd , rel_path , "tmpfs" , MS_RDONLY , "nr_blocks=1,nr_inodes=1" , LABEL_MOUNT , err );
1167
+ }
1168
+
1061
1169
static int
1062
1170
do_masked_or_readonly_path (libcrun_container_t * container , const char * rel_path , bool readonly , bool keep_flags ,
1063
1171
libcrun_error_t * err )
@@ -1114,7 +1222,7 @@ do_masked_or_readonly_path (libcrun_container_t *container, const char *rel_path
1114
1222
return crun_make_error (err , errno , "cannot stat `%s`" , rel_path );
1115
1223
1116
1224
if ((mode & S_IFMT ) == S_IFDIR )
1117
- ret = do_mount (container , "tmpfs" , pathfd , rel_path , "tmpfs" , MS_RDONLY , "size=0k" , LABEL_MOUNT , err );
1225
+ ret = mount_masked_dir (container , pathfd , rel_path , err );
1118
1226
else
1119
1227
ret = do_mount (container , "/dev/null" , pathfd , rel_path , NULL , MS_BIND | MS_RDONLY , NULL , LABEL_MOUNT , err );
1120
1228
if (UNLIKELY (ret < 0 ))
0 commit comments