Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions meson.options
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ option(
# failure unless overridden from respective JSON file configuration.
# Default value will be 3.
# A retry attempt value of zero indicates no retries will be performed.
option('retry_attempts', type: 'integer', min: 0, value: 3)
option('retry_attempts', type: 'integer', min: 0, value: 1)

# The retry interval in seconds which is applicable for all files/directories
# during sync retry unless overridden from respective JSON file configuration.
# Default value is 5secs.
option('retry_interval', type: 'integer', value: 5)
option('retry_interval', type: 'integer', value: 20)

#The option to enable the test suite
option('tests', type: 'feature', value: 'enabled', description: 'Build tests')
3 changes: 2 additions & 1 deletion src/data_sync_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ DataSyncConfig::DataSyncConfig(const nlohmann::json& config,
}
else
{
_retry = std::nullopt;
_retry = Retry(DEFAULT_RETRY_ATTEMPTS,
std::chrono::seconds(DEFAULT_RETRY_INTERVAL));
}

if (config.contains("ExcludeList"))
Expand Down
9 changes: 9 additions & 0 deletions src/data_sync_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,15 @@ struct DataSyncConfig
*/
std::optional<std::unordered_set<fs::path>> _includeList;

/**
* @brief Tracks file or directory paths currently being processed for
* sync.
*
* This container holds paths that are actively undergoing sync. Once
* processing completes, the path is removed from this set.
*/
mutable std::unordered_set<fs::path> _syncInProgressPaths;

private:
/**
* @brief A helper API to retrieve the corresponding enum type
Expand Down
157 changes: 130 additions & 27 deletions src/manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,40 @@ sdbusplus::async::task<> Manager::startSyncEvents()
sdbusplus::async::task<bool>
// NOLINTNEXTLINE
Manager::syncData(const config::DataSyncConfig& dataSyncCfg,
fs::path srcPath)
fs::path srcPath, std::vector<fs::path> vanishedPaths,
size_t retryCount)
{
if (_syncBMCDataIface.disable_sync())
{
co_return false;
}

using namespace std::string_literals;

const bool haveIncludeList = dataSyncCfg._includeList.has_value() &&
!dataSyncCfg._includeList->empty();

const fs::path currentSrcPath = srcPath.empty() ? dataSyncCfg._path
: srcPath;

const size_t maxAttempts = dataSyncCfg._retry->_retryAttempts;
const size_t retryIntervalSec =
dataSyncCfg._retry->_retryIntervalInSec.count();

// On first try only, if this path is already in retry/syncing, skip that
if (retryCount == 0)
{
if (dataSyncCfg._syncInProgressPaths.count(currentSrcPath) != 0U)
{
lg2::debug(
"Skipping sync for [{SRC}]: a sync is already in progress",
"SRC", currentSrcPath);
co_return true;
}
// Mark the path as in-progress so subsequent retries know to skip it
dataSyncCfg._syncInProgressPaths.insert(currentSrcPath);
}

// For more details about CLI options, refer rsync man page.
// https://download.samba.org/pub/rsync/rsync.1#OPTION_SUMMARY
std::string syncCmd{
Expand All @@ -186,6 +216,21 @@ sdbusplus::async::task<bool>
{
syncCmd.append(" "s + srcPath.string());
}
else if (!vanishedPaths.empty())
{
// framed include-list (built from vanished roots)
const std::string framedIncludeListCmd =
data_sync::retry::frameIncludeListCLI(dataSyncCfg, vanishedPaths);
syncCmd += framedIncludeListCmd;
}
else if ((dataSyncCfg._includeList.has_value()) && (srcPath.empty()))
{
// Configure the paths in include List as SRC paths
auto appendToCmd = [&syncCmd](const auto& path) {
syncCmd.append(" "s + path.string());
};
std::ranges::for_each(dataSyncCfg._includeList.value(), appendToCmd);
}
else
{
syncCmd.append(" "s + dataSyncCfg._path.string());
Expand All @@ -198,33 +243,98 @@ sdbusplus::async::task<bool>
#endif

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why new commit? You could just revert fb9588a commit, right?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did some other code go in that enables the sync to work on skiboards?

// Add destination data path if configured
syncCmd.append(dataSyncCfg._destPath.value_or(fs::path("")));
// TODO: Change the default destPath to empty once remote sync is enabled.
syncCmd.append(" "s +
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is modified?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is needed when a destination path is configured
without the space, the src and dest paths get merged (e.g., /a/b/c/data/backup)
so adding the space ensures they remain separate (/a/b/c/ /data/backup)

dataSyncCfg._destPath.value_or(fs::path("/")).string());
lg2::debug("RSYNC CMD : {CMD}", "CMD", syncCmd);

data_sync::async::AsyncCommandExecutor executor(_ctx);
auto result = co_await executor.execCmd(syncCmd); // NOLINT
lg2::debug("Rsync cmd output : {OUTPUT}", "OUTPUT", result.second);
if (result.first != 0)

if (result.first == 0)
{
// TODOs:
// 1. Retry based on rsync error code
// 2. Create error log and Disable redundancy if retry fails
// 3. Perform a callout
// Remove from in-progress after completing sync successfully
dataSyncCfg._syncInProgressPaths.erase(currentSrcPath);
co_return true;
}

if (retryCount < maxAttempts)
{
if (haveIncludeList)
{
if (result.first == 24)
{
auto vanishedPaths =
data_sync::retry::getVanishedSrcPaths(result.second);
lg2::warning(
"exit=24; switching to framed include-list retry with {NUM} root(s).",
"NUM", vanishedPaths.size());
co_await sleep_for(_ctx,
std::chrono::seconds(retryIntervalSec));
co_return co_await syncData(dataSyncCfg, fs::path{},
std::move(vanishedPaths),
retryCount + 1);
}

// NOTE: The following line is commented out as part of a temporary
// workaround. We are forcing Full Sync to succeed even if data syncing
// fails. This change should be reverted once proper error handling is
// implemented.
// setSyncEventsHealth(SyncEventsHealth::Critical);
lg2::info(
"Retry {RETRY_COUNT}/{MAX_ATTEMPTS} (exit={ERROR_CODE}) retrying with same include list",
"RETRY_COUNT", retryCount + 1, "MAX_ATTEMPTS", maxAttempts,
"ERROR_CODE", result.first);
co_await sleep_for(_ctx, std::chrono::seconds(retryIntervalSec));
co_return co_await syncData(dataSyncCfg, fs::path{},
std::move(vanishedPaths),
retryCount + 1);
}
if (result.first == 24)
{
auto vanished =
data_sync::retry::getVanishedSrcPaths(result.second);
const fs::path& nextSrc = vanished.front();

lg2::error(
"Error syncing [{PATH}], ErrCode : {ERRCODE}, Error : {ERROR}",
"PATH", dataSyncCfg._path, "ERRCODE", result.first, "ERROR",
result.second);
lg2::warning(
"exit=24; retry with switching SRC [{OLD}] -> vanishedPath [{NEW}]",
"OLD", currentSrcPath, "NEW", nextSrc);

co_return false;
co_await sleep_for(_ctx, std::chrono::seconds(retryIntervalSec));
co_return co_await syncData(dataSyncCfg, nextSrc, {},
retryCount + 1);
}
else
{
lg2::info(
"Retry {RETRY_COUNT}/{MAX_ATTEMPTS} (exit={ERROR_CODE}) → [{SRC}]",
"RETRY_COUNT", retryCount + 1, "MAX_ATTEMPTS", maxAttempts,
"ERROR_CODE", result.first, "SRC", currentSrcPath);

co_await sleep_for(_ctx, std::chrono::seconds(retryIntervalSec));
co_return co_await syncData(dataSyncCfg, currentSrcPath, {},
retryCount + 1);
}
}

// If we reach here, all retry attempts have been exhausted
// Mark sync events health as critical
setSyncEventsHealth(SyncEventsHealth::Critical);

// Remove from in-progress after completing all sync attempts
dataSyncCfg._syncInProgressPaths.erase(currentSrcPath);

if (haveIncludeList)
{
lg2::error(
"Sync failed after {MAX_ATTEMPTS} attempts (exit {ERROR_CODE}); include_paths={INC_NUM}; vanished_roots={VAN_NUM}",
"MAX_ATTEMPTS", maxAttempts, "ERROR_CODE", result.first, "INC_NUM",
dataSyncCfg._includeList->size(), "VAN_NUM", vanishedPaths.size());
}
co_return true;
else
{
lg2::error(
"Sync failed after {MAX_ATTEMPTS} attempts (exit {ERROR_CODE}): [{SRC}]",
"MAX_ATTEMPTS", maxAttempts, "ERROR_CODE", result.first, "SRC",
currentSrcPath);
}
co_return false;
}

sdbusplus::async::task<>
Expand Down Expand Up @@ -408,15 +518,8 @@ sdbusplus::async::task<void> Manager::startFullSync()
}
else
{
// Forcefully marking full sync as successful, even if data syncing
// fails.
// TODO: Revert this workaround once the proper logic is implemented
setFullSyncStatus(FullSyncStatus::FullSyncCompleted);
setSyncEventsHealth(SyncEventsHealth::Ok);
lg2::info("Full Sync passed temporarily despite sync failures");

// setFullSyncStatus(FullSyncStatus::FullSyncFailed);
// lg2::info("Full Sync failed");
setFullSyncStatus(FullSyncStatus::FullSyncFailed);
lg2::info("Full Sync failed");
}

// total duration/time diff of the Full Sync operation
Expand Down
8 changes: 7 additions & 1 deletion src/manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,13 +164,19 @@ class Manager
* performing a local copy instead.
*
* @param[in] dataSyncCfg - The data sync config to sync
* @param[in] srcPath - The optional source data path
* @param[in] vanishedPaths - List of vanished root paths reported by rsync
used to rebuild include-list filters for retry
* @param[in] retryCount - The current retry attempt count
*
* @return Returns true if sync succeeds; otherwise, returns false
*
*/
sdbusplus::async::task<bool>
syncData(const config::DataSyncConfig& dataSyncCfg,
fs::path srcPath = fs::path{});
fs::path srcPath = fs::path{},
std::vector<fs::path> vanishedPaths = {},
size_t retryCount = 0);

/**
* @brief A helper to API to monitor data to sync if its changed
Expand Down
Loading