@@ -234,6 +234,90 @@ def test_one_core_fail():
234234 core_dev1 .plug ()
235235
236236
237+ @pytest .mark .require_disk ("cache" , DiskTypeSet ([DiskType .optane , DiskType .nand ]))
238+ @pytest .mark .require_disk ("core1" , DiskTypeLowerThan ("cache" ))
239+ @pytest .mark .require_disk ("core2" , DiskTypeLowerThan ("cache" ))
240+ def test_one_core_fail_dirty ():
241+ """
242+ title: Test if OpenCAS correctly handles failure of one of multiple core devices.
243+ description: |
244+ When one core device fails in a single cache instance and all cachelines are dirty and
245+ mapped to the core device, other cores due to that are unable to insert any data and
246+ are serviced in pass-through.
247+ pass_criteria:
248+ - No system crash.
249+ - Second core is able to use OpenCAS.
250+ """
251+ with TestRun .step ("Prepare one cache and two core devices." ):
252+ cache_dev = TestRun .disks ["cache" ]
253+ cache_dev .create_partitions ([Size (1 , Unit .GibiByte )] * 2 )
254+ cache_part = cache_dev .partitions [0 ]
255+ core_dev1 = TestRun .disks ["core1" ] # This device would be unplugged.
256+ core_dev1 .create_partitions ([Size (2 , Unit .GibiByte )])
257+ core_part1 = core_dev1 .partitions [0 ]
258+ core_dev2 = TestRun .disks ["core2" ]
259+ core_dev2 .create_partitions ([Size (2 , Unit .GibiByte )])
260+ core_part2 = core_dev2 .partitions [0 ]
261+ Udev .disable ()
262+
263+ with TestRun .step ("Start cache" ):
264+ cache_mode = CacheMode .WB
265+ cache = casadm .start_cache (cache_part , cache_mode = cache_mode , force = True )
266+
267+ with TestRun .step ("Add both core devices to cache." ):
268+ core1 = cache .add_core (core_part1 )
269+ core2 = cache .add_core (core_part2 )
270+
271+ with TestRun .step ("Change sequential cutoff policy." ):
272+ cache .set_seq_cutoff_policy (SeqCutOffPolicy .never )
273+
274+ with TestRun .step ("Fill cache with pages from the first core." ):
275+ blocks = int (cache .size / block_size .value )
276+ dd = (
277+ Dd ()
278+ .block_size (block_size )
279+ .count (blocks )
280+ .input ("/dev/urandom" )
281+ .output (core1 .path )
282+ .oflag ("direct" )
283+ ).run ()
284+ cache_occupancy_before = cache .get_statistics (percentage_val = True ).usage_stats .occupancy
285+ cache_dirty_blocks_before = cache .get_dirty_blocks ()
286+ if cache_occupancy_before != 100 :
287+ TestRun .fail ("Failed to fully fill cache. Cache occupancy has to be 100%." )
288+
289+ with TestRun .step ("Unplug the first core device." ):
290+ core_dev1 .unplug ()
291+
292+ with TestRun .step ("Check if core device is really out of cache." ):
293+ output = str (casadm .list_caches ().stdout .splitlines ())
294+ if core_part1 .path in output :
295+ TestRun .exception ("The first core device should be unplugged!" )
296+
297+ with TestRun .step ("Verify that I/O to the remaining cores does not insert to cache" ):
298+ dd_builder (cache_mode , core2 , Size (100 , Unit .MebiByte )).run ()
299+ if float (core2 .get_occupancy ().get_value ()) != 0 :
300+ TestRun .LOGGER .error ("Cache occupancy increased despite dirty data form first core!" )
301+ else :
302+ TestRun .LOGGER .info ("The remaining core is not able to use cache." )
303+
304+ with TestRun .step ("Check if occupancy from the first core is not removed from cache." ):
305+ cache_occupancy_after = cache .get_statistics (percentage_val = True ).usage_stats .occupancy
306+ if cache_occupancy_after != 100 :
307+ TestRun .fail ("Cache occupancy has changed." )
308+
309+ with TestRun .step ("Check if Dirty blocks count before and after unplug stays the same." ):
310+ cache_dirty_blocks_after = cache .get_dirty_blocks ()
311+ if cache_dirty_blocks_before != cache_dirty_blocks_after :
312+ TestRun .fail ("Dirty block count after unplug should stay the same." )
313+
314+ with TestRun .step ("Stop cache." ):
315+ casadm .stop_all_caches ()
316+
317+ with TestRun .step ("Plug back the first core." ):
318+ core_dev1 .plug ()
319+
320+
237321def dd_builder (cache_mode : CacheMode , dev : Core , size : Size ):
238322 blocks = int (size .value / block_size .value )
239323 dd = Dd ().block_size (block_size ).count (blocks )
0 commit comments