Merge pull request #39 from WorksButNotTested/flush

vanhauser-thc · web-flow · commit ff9de4fbeb33 · 2022-10-04T08:35:31.000+02:00
Prevent translation block cache flush from being deferred
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
@@ -601,14 +601,6 @@ void afl_setup(void) {
 
 }
 
-static void afl_flush_tb(void)
-{
-    CPUState *cpu;
-    CPU_FOREACH(cpu) {
-        tb_flush(cpu);
-    }
-}
-
 /* Fork server logic, invoked once we hit _start. */
 
 void afl_forkserver(CPUState *cpu) {
@@ -665,7 +657,7 @@ void afl_forkserver(CPUState *cpu) {
   }
 
   // Flush translation cache just before fork server starts.
-  afl_flush_tb();
+  tb_flush_sync();
 
   /* All right, let's await orders... */
 
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
@@ -1537,6 +1537,44 @@ void tb_flush(CPUState *cpu)
     }
 }
 
+/* 
+ * If we call tb_flush, from inside cpu_exec, then it will queue do_tb_flush to
+ * run asyncronously. Since we wish to do this when we start the forkserver to
+ * flush any translated blocks which may have been translated before the 
+ * configuration from environment variables has been parsed, this will cause the
+ * flush to be deferred and instead performed after the fork server is running
+ * resulting in the flush occurring repeatedly rather than just the once, with 
+ * the obvious resulting performance overhead.
+ * 
+ * However, we know that the fork server should be initialized when the target
+ * application has only a single thread (since the fork syscall will only clone
+ * the calling thread into the child process). Therefore, we don't need any 
+ * synchronization with respect to any other VCPUs and can therefore perform the
+ * flush synchronously instead.
+ */
+void tb_flush_sync(void)
+{
+    CPUState *cpu = NULL;
+    size_t num_cpus = 0;
+    
+    if (!tcg_enabled()) {
+        return;        
+    }
+    
+    CPU_FOREACH(cpu) {
+        num_cpus++;
+    }
+    
+    if (num_cpus != 1) {
+      fprintf(stderr, "Warning: More than one VCPU when attempting to flush "
+        "translation block cache. Skipping since we can't do it synchronously.");
+      return;
+    }
+    
+    unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count);
+    do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
+}
+
 /*
  * Formerly ifdef DEBUG_TB_CHECK. These debug functions are user-mode-only,
  * so in order to prevent bit rot we compile them unconditionally in user-mode,
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
@@ -533,6 +533,7 @@ void tb_invalidate_phys_range(target_ulong start, target_ulong end);
 void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs);
 #endif
 void tb_flush(CPUState *cpu);
+void tb_flush_sync(void);
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
 TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
                                    target_ulong cs_base, uint32_t flags,