From 0ebb9e908c1882a4dbb7ea8db0d02846990a5a86 Mon Sep 17 00:00:00 2001 From: Todd Herbert Date: Thu, 16 Jan 2025 05:21:14 +1300 Subject: [PATCH 1/3] Reattempt flash operations if failed --- .../InternalFileSytem/src/flash/flash_nrf5x.c | 109 ++++++++++++++---- 1 file changed, 85 insertions(+), 24 deletions(-) diff --git a/libraries/InternalFileSytem/src/flash/flash_nrf5x.c b/libraries/InternalFileSytem/src/flash/flash_nrf5x.c index f9fac1fdf..38094dcd0 100644 --- a/libraries/InternalFileSytem/src/flash/flash_nrf5x.c +++ b/libraries/InternalFileSytem/src/flash/flash_nrf5x.c @@ -28,6 +28,7 @@ #include "nrf_soc.h" #include "delay.h" #include "rtos.h" +#include "assert.h" #ifdef NRF52840_XXAA @@ -44,11 +45,18 @@ extern uint32_t __flash_arduino_start[]; // MACRO TYPEDEF CONSTANT ENUM DECLARATION //--------------------------------------------------------------------+ static SemaphoreHandle_t _sem = NULL; +static bool _flash_op_failed = false; void flash_nrf5x_event_cb (uint32_t event) { -// if (event != NRF_EVT_FLASH_OPERATION_SUCCESS) LOG_LV1("IFLASH", "Flash op Failed"); - if ( _sem ) xSemaphoreGive(_sem); + if ( _sem ) { + // Record the result, for consumption by fal_erase or fal_program + // Used to reattempt failed operations + _flash_op_failed = (event == NRF_EVT_FLASH_OPERATION_ERROR); + + // Signal to fal_erase or fal_program that our async flash op is now complete + xSemaphoreGive(_sem); + } } // Flash Abstraction Layer @@ -107,30 +115,46 @@ static bool fal_erase (uint32_t addr) // Init semaphore for first call if ( _sem == NULL ) { - _sem = xSemaphoreCreateCounting(10, 0); + _sem = xSemaphoreCreateBinary(); VERIFY(_sem); } - // retry if busy - uint32_t err; - while ( NRF_ERROR_BUSY == (err = sd_flash_page_erase(addr / FLASH_NRF52_PAGE_SIZE)) ) - { - delay(1); - } - VERIFY_STATUS(err, false); - - // wait for async event if SD is enabled + // Check if soft device is enabled + // If yes, flash operations are async, so we need to wait for the callback to give the semaphore uint8_t sd_en = 0; (void) sd_softdevice_is_enabled(&sd_en); - if ( sd_en ) xSemaphoreTake(_sem, portMAX_DELAY); + // Make multiple attempts + for (uint8_t attempt = 0;;) { + // Attempt to erase + uint32_t err = sd_flash_page_erase(addr / FLASH_NRF52_PAGE_SIZE); + // Retry if busy + if (err == NRF_ERROR_BUSY) { + delay(1); + continue; + } + // Count genuine attempts + attempt++; + // If using softdevice, erase is async, so we wait here + if (sd_en) xSemaphoreTake(_sem, portMAX_DELAY); + // If erase failed + if (_flash_op_failed) { + assert(attempt < 10); // Something went horribly wrong.. protect the flash + continue; // Try again + } + // Catch any other odd results + VERIFY_STATUS(err, 0); + // Success + break; + } return true; } static uint32_t fal_program (uint32_t dst, void const * src, uint32_t len) { - // wait for async event if SD is enabled + // Check if soft device is enabled + // If yes, flash operations are async, so we need to wait for the callback to give the semaphore uint8_t sd_en = 0; (void) sd_softdevice_is_enabled(&sd_en); @@ -148,19 +172,56 @@ static uint32_t fal_program (uint32_t dst, void const * src, uint32_t len) if ( sd_en ) xSemaphoreTake(_sem, portMAX_DELAY); #else - while ( NRF_ERROR_BUSY == (err = sd_flash_write((uint32_t*) dst, (uint32_t const *) src, len/8)) ) - { - delay(1); + + // First part of block + // ------------------ + for (uint8_t attempt = 0;;) { + // Attempt to write + uint32_t err = sd_flash_write((uint32_t*) dst, (uint32_t const *) src, len/8); + // Retry if busy + if (err == NRF_ERROR_BUSY) { + delay(1); + continue; + } + // Count genuine attempts + attempt++; + // If using softdevice, write is async, so we wait here + if (sd_en) xSemaphoreTake(_sem, portMAX_DELAY); + // If write failed + if (_flash_op_failed) { + assert(attempt < 10); // Something went horribly wrong.. protect the flash + continue; // Try again + } + // Catch any other odd results + VERIFY_STATUS(err, 0); + // Success + break; } - VERIFY_STATUS(err, 0); - if ( sd_en ) xSemaphoreTake(_sem, portMAX_DELAY); - while ( NRF_ERROR_BUSY == (err = sd_flash_write((uint32_t*) (dst+ len/2), (uint32_t const *) (src + len/2), len/8)) ) - { - delay(1); + // Second part of block + // -------------------- + for (uint8_t attempt = 0;;) { + // Attempt to write + uint32_t err = sd_flash_write((uint32_t*) (dst+ len/2), (uint32_t const *) (src + len/2), len/8); + // Retry if busy + if (err == NRF_ERROR_BUSY) { + delay(1); + continue; + } + // Count genuine attempts + attempt++; + // If using softdevice, write is async, so we wait here + if (sd_en) xSemaphoreTake(_sem, portMAX_DELAY); + // If write failed + if (_flash_op_failed) { + assert(attempt < 10); // Something went horribly wrong.. protect the flash + continue; // Try again + } + // Catch any other odd results + VERIFY_STATUS(err, 0); + // Success + break; } - VERIFY_STATUS(err, 0); - if ( sd_en ) xSemaphoreTake(_sem, portMAX_DELAY); #endif return len; From ed2d1f5ed23dca83ea89496e79714e237fba3c79 Mon Sep 17 00:00:00 2001 From: Todd Herbert Date: Thu, 16 Jan 2025 22:17:22 +1300 Subject: [PATCH 2/3] Allow failure; refactoring --- .../InternalFileSytem/src/flash/flash_nrf5x.c | 122 +++++++----------- 1 file changed, 46 insertions(+), 76 deletions(-) diff --git a/libraries/InternalFileSytem/src/flash/flash_nrf5x.c b/libraries/InternalFileSytem/src/flash/flash_nrf5x.c index 38094dcd0..c039ba810 100644 --- a/libraries/InternalFileSytem/src/flash/flash_nrf5x.c +++ b/libraries/InternalFileSytem/src/flash/flash_nrf5x.c @@ -28,8 +28,6 @@ #include "nrf_soc.h" #include "delay.h" #include "rtos.h" -#include "assert.h" - #ifdef NRF52840_XXAA #define BOOTLOADER_ADDR 0xF4000 @@ -59,6 +57,34 @@ void flash_nrf5x_event_cb (uint32_t event) } } +// How many retry attempts when performing flash write operations +#define MAX_RETRY 20 + +// Check whether a flash operation was successful, or should be repeated +static bool retry_flash_op (uint32_t op_result, bool sd_enabled) { + // If busy + if (op_result == NRF_ERROR_BUSY) { + delay(1); + return true; // Retry + } + + // Unspecified error + if (op_result != NRF_SUCCESS) + return true; // Retry + + // If the soft device is enabled, flash operations run async + // The callback (flash_nrf5x_event_cb) will give semaphore when the flash operation is complete + // The callback also checks for NRF_EVT_FLASH_OPERATION_ERROR, which is not available to us otherwise + if (sd_enabled) { + xSemaphoreTake(_sem, portMAX_DELAY); + if (_flash_op_failed) + return true; // Retry + } + + // Success + return false; +} + // Flash Abstraction Layer static bool fal_erase (uint32_t addr); static uint32_t fal_program (uint32_t dst, void const * src, uint32_t len); @@ -124,31 +150,13 @@ static bool fal_erase (uint32_t addr) uint8_t sd_en = 0; (void) sd_softdevice_is_enabled(&sd_en); - // Make multiple attempts - for (uint8_t attempt = 0;;) { - // Attempt to erase - uint32_t err = sd_flash_page_erase(addr / FLASH_NRF52_PAGE_SIZE); - // Retry if busy - if (err == NRF_ERROR_BUSY) { - delay(1); - continue; - } - // Count genuine attempts - attempt++; - // If using softdevice, erase is async, so we wait here - if (sd_en) xSemaphoreTake(_sem, portMAX_DELAY); - // If erase failed - if (_flash_op_failed) { - assert(attempt < 10); // Something went horribly wrong.. protect the flash - continue; // Try again - } - // Catch any other odd results - VERIFY_STATUS(err, 0); - // Success - break; + // Make multiple attempts to erase + uint8_t attempt = 0; + while (retry_flash_op(sd_flash_page_erase(addr / FLASH_NRF52_PAGE_SIZE), sd_en)) { + if (++attempt > MAX_RETRY) + return false; // Failure } - - return true; + return true; // Success } static uint32_t fal_program (uint32_t dst, void const * src, uint32_t len) @@ -164,63 +172,25 @@ static uint32_t fal_program (uint32_t dst, void const * src, uint32_t len) // https://devzone.nordicsemi.com/f/nordic-q-a/40088/sd_flash_write-cause-nrf_fault_id_sd_assert // Workaround: write half page at a time. #if NRF52832_XXAA - while ( NRF_ERROR_BUSY == (err = sd_flash_write((uint32_t*) dst, (uint32_t const *) src, len/4)) ) - { - delay(1); + uint8_t attempt = 0; + while (retry_flash_op(sd_flash_write((uint32_t*) dst, (uint32_t const *) src, len/4), sd_en)) { + if (++attempt > MAX_RETRY) + return 0; // Failure } - VERIFY_STATUS(err, 0); - - if ( sd_en ) xSemaphoreTake(_sem, portMAX_DELAY); #else // First part of block - // ------------------ - for (uint8_t attempt = 0;;) { - // Attempt to write - uint32_t err = sd_flash_write((uint32_t*) dst, (uint32_t const *) src, len/8); - // Retry if busy - if (err == NRF_ERROR_BUSY) { - delay(1); - continue; - } - // Count genuine attempts - attempt++; - // If using softdevice, write is async, so we wait here - if (sd_en) xSemaphoreTake(_sem, portMAX_DELAY); - // If write failed - if (_flash_op_failed) { - assert(attempt < 10); // Something went horribly wrong.. protect the flash - continue; // Try again - } - // Catch any other odd results - VERIFY_STATUS(err, 0); - // Success - break; + uint8_t attempt = 0; + while (retry_flash_op(sd_flash_write((uint32_t*) dst, (uint32_t const *) src, len/8), sd_en)) { + if (++attempt > MAX_RETRY) + return 0; // Failure } // Second part of block - // -------------------- - for (uint8_t attempt = 0;;) { - // Attempt to write - uint32_t err = sd_flash_write((uint32_t*) (dst+ len/2), (uint32_t const *) (src + len/2), len/8); - // Retry if busy - if (err == NRF_ERROR_BUSY) { - delay(1); - continue; - } - // Count genuine attempts - attempt++; - // If using softdevice, write is async, so we wait here - if (sd_en) xSemaphoreTake(_sem, portMAX_DELAY); - // If write failed - if (_flash_op_failed) { - assert(attempt < 10); // Something went horribly wrong.. protect the flash - continue; // Try again - } - // Catch any other odd results - VERIFY_STATUS(err, 0); - // Success - break; + attempt = 0; + while (retry_flash_op(sd_flash_write((uint32_t*) (dst+ len/2), (uint32_t const *) (src + len/2), len/8), sd_en)) { + if (++attempt > MAX_RETRY) + return 0; // Failure } #endif From 92e936c54b7a9ab6440376146b93ad0c0f0ab9c5 Mon Sep 17 00:00:00 2001 From: Todd Herbert Date: Fri, 24 Jan 2025 14:10:58 +1300 Subject: [PATCH 3/3] Align with pending upstream PR https://github.com/adafruit/Adafruit_nRF52_Arduino/pull/838 --- .../InternalFileSytem/src/flash/flash_nrf5x.c | 149 ++++++++++++------ 1 file changed, 101 insertions(+), 48 deletions(-) diff --git a/libraries/InternalFileSytem/src/flash/flash_nrf5x.c b/libraries/InternalFileSytem/src/flash/flash_nrf5x.c index c039ba810..39cb94a88 100644 --- a/libraries/InternalFileSytem/src/flash/flash_nrf5x.c +++ b/libraries/InternalFileSytem/src/flash/flash_nrf5x.c @@ -28,6 +28,8 @@ #include "nrf_soc.h" #include "delay.h" #include "rtos.h" +#include "assert.h" + #ifdef NRF52840_XXAA #define BOOTLOADER_ADDR 0xF4000 @@ -43,46 +45,56 @@ extern uint32_t __flash_arduino_start[]; // MACRO TYPEDEF CONSTANT ENUM DECLARATION //--------------------------------------------------------------------+ static SemaphoreHandle_t _sem = NULL; -static bool _flash_op_failed = false; +static uint32_t _flash_op_result = NRF_EVT_FLASH_OPERATION_SUCCESS; void flash_nrf5x_event_cb (uint32_t event) { if ( _sem ) { // Record the result, for consumption by fal_erase or fal_program // Used to reattempt failed operations - _flash_op_failed = (event == NRF_EVT_FLASH_OPERATION_ERROR); + _flash_op_result = event; // Signal to fal_erase or fal_program that our async flash op is now complete xSemaphoreGive(_sem); } } -// How many retry attempts when performing flash write operations +// How many retry attempts when performing flash operations #define MAX_RETRY 20 -// Check whether a flash operation was successful, or should be repeated -static bool retry_flash_op (uint32_t op_result, bool sd_enabled) { - // If busy - if (op_result == NRF_ERROR_BUSY) { - delay(1); - return true; // Retry - } +// When soft device is enabled, flash ops are async +// Eventual success is reported via callback, which we await +static uint32_t wait_for_async_flash_op_completion(uint32_t initial_result) +{ + // If initial result not NRF_SUCCESS, no need to await callback + // We will pass the initial result (failure) straight through + int32_t result = initial_result; - // Unspecified error - if (op_result != NRF_SUCCESS) - return true; // Retry + // Operation was queued successfully + if (initial_result == NRF_SUCCESS) { - // If the soft device is enabled, flash operations run async - // The callback (flash_nrf5x_event_cb) will give semaphore when the flash operation is complete - // The callback also checks for NRF_EVT_FLASH_OPERATION_ERROR, which is not available to us otherwise - if (sd_enabled) { + // Wait for result via callback xSemaphoreTake(_sem, portMAX_DELAY); - if (_flash_op_failed) - return true; // Retry + + // If completed successfully + if (_flash_op_result == NRF_EVT_FLASH_OPERATION_SUCCESS) { + result = NRF_SUCCESS; + } + + // If general failure. + // The comment on NRF_EVT_FLASH_OPERATION_ERROR describes it as a timeout, + // so we're using a similar error when translating from NRF_SOC_EVTS type to the global NRF52 error defines + else if (_flash_op_result == NRF_EVT_FLASH_OPERATION_ERROR) { + result = NRF_ERROR_TIMEOUT; + } + + // If this assert triggers, we need to implement a new NRF_SOC_EVTS value + else { + assert(false); + } } - - // Success - return false; + + return result; } // Flash Abstraction Layer @@ -139,8 +151,7 @@ bool flash_nrf5x_erase(uint32_t addr) static bool fal_erase (uint32_t addr) { // Init semaphore for first call - if ( _sem == NULL ) - { + if ( _sem == NULL ) { _sem = xSemaphoreCreateBinary(); VERIFY(_sem); } @@ -150,19 +161,30 @@ static bool fal_erase (uint32_t addr) uint8_t sd_en = 0; (void) sd_softdevice_is_enabled(&sd_en); - // Make multiple attempts to erase - uint8_t attempt = 0; - while (retry_flash_op(sd_flash_page_erase(addr / FLASH_NRF52_PAGE_SIZE), sd_en)) { - if (++attempt > MAX_RETRY) - return false; // Failure + // Erase the page + // Multiple attempts if needed + uint32_t err; + for (uint8_t attempt = 0; attempt < MAX_RETRY; ++attempt) { + err = sd_flash_page_erase(addr / FLASH_NRF52_PAGE_SIZE); + + if (sd_en) { + err = wait_for_async_flash_op_completion(err); // Only async if soft device enabled + } + if (err == NRF_SUCCESS) { + break; + } + if (err == NRF_ERROR_BUSY) { + delay(1); + } } - return true; // Success + VERIFY_STATUS(err, false); // Return false if all retries fail + + return true; // Successfully erased } static uint32_t fal_program (uint32_t dst, void const * src, uint32_t len) { - // Check if soft device is enabled - // If yes, flash operations are async, so we need to wait for the callback to give the semaphore + // wait for async event if SD is enabled uint8_t sd_en = 0; (void) sd_softdevice_is_enabled(&sd_en); @@ -172,26 +194,57 @@ static uint32_t fal_program (uint32_t dst, void const * src, uint32_t len) // https://devzone.nordicsemi.com/f/nordic-q-a/40088/sd_flash_write-cause-nrf_fault_id_sd_assert // Workaround: write half page at a time. #if NRF52832_XXAA - uint8_t attempt = 0; - while (retry_flash_op(sd_flash_write((uint32_t*) dst, (uint32_t const *) src, len/4), sd_en)) { - if (++attempt > MAX_RETRY) - return 0; // Failure + // Write the page + // Multiple attempts, if needed + for (uint8_t attempt = 0; attempt < MAX_RETRY; ++attempt) { + err = sd_flash_write((uint32_t*) dst, (uint32_t const *) src, len/4); + + if (sd_en) { + err = wait_for_async_flash_op_completion(err); // Only async if soft device enabled + } + if (err == NRF_SUCCESS) { + break; + } + if (err == NRF_ERROR_BUSY) { + delay(1); + } } -#else + VERIFY_STATUS(err, 0); // Return 0 if all retries fail - // First part of block - uint8_t attempt = 0; - while (retry_flash_op(sd_flash_write((uint32_t*) dst, (uint32_t const *) src, len/8), sd_en)) { - if (++attempt > MAX_RETRY) - return 0; // Failure +#else + // Write first part of page + // Multiple attempts, if needed + for (uint8_t attempt = 0; attempt < MAX_RETRY; ++attempt) { + err = sd_flash_write((uint32_t*) dst, (uint32_t const *) src, len/8); + + if (sd_en) { + err = wait_for_async_flash_op_completion(err); // Only async if soft device enabled + } + if (err == NRF_SUCCESS) { + break; + } + if (err == NRF_ERROR_BUSY) { + delay(1); + } } - - // Second part of block - attempt = 0; - while (retry_flash_op(sd_flash_write((uint32_t*) (dst+ len/2), (uint32_t const *) (src + len/2), len/8), sd_en)) { - if (++attempt > MAX_RETRY) - return 0; // Failure + VERIFY_STATUS(err, 0); // Return 0 if all retries fail + + // Write second part of page + // Multiple attempts, if needed + for (uint8_t attempt = 0; attempt < MAX_RETRY; ++attempt) { + err = sd_flash_write((uint32_t*) (dst+ len/2), (uint32_t const *) (src + len/2), len/8); + + if (sd_en) { + err = wait_for_async_flash_op_completion(err); // Only async if soft device enabled + } + if (err == NRF_SUCCESS) { + break; + } + if (err == NRF_ERROR_BUSY) { + delay(1); + } } + VERIFY_STATUS(err, 0); // Return 0 if all retries fail #endif return len;