Skip to content

Commit 95c6ed9

Browse files
authored
Fix assertion failure in IndexShard.updateGlobalCheckpointOnReplica() when remote translog is enabled (#6975)
Signed-off-by: Sachin Kale <[email protected]>
1 parent e12a5b9 commit 95c6ed9

File tree

2 files changed

+4
-6
lines changed

2 files changed

+4
-6
lines changed

server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -282,22 +282,18 @@ public void testPeerRecoveryWithRemoteStoreNoRemoteTranslogRefresh() throws Exce
282282
testPeerRecovery(false, randomIntBetween(2, 5), false);
283283
}
284284

285-
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/6193")
286285
public void testPeerRecoveryWithRemoteStoreAndRemoteTranslogNoDataFlush() throws Exception {
287286
testPeerRecovery(true, 1, true);
288287
}
289288

290-
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/6193")
291289
public void testPeerRecoveryWithRemoteStoreAndRemoteTranslogFlush() throws Exception {
292290
testPeerRecovery(true, randomIntBetween(2, 5), true);
293291
}
294292

295-
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/6193")
296293
public void testPeerRecoveryWithRemoteStoreAndRemoteTranslogNoDataRefresh() throws Exception {
297294
testPeerRecovery(true, 1, false);
298295
}
299296

300-
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/6193")
301297
public void testPeerRecoveryWithRemoteStoreAndRemoteTranslogRefresh() throws Exception {
302298
testPeerRecovery(true, randomIntBetween(2, 5), false);
303299
}

server/src/main/java/org/opensearch/index/shard/IndexShard.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3078,9 +3078,11 @@ public void updateGlobalCheckpointOnReplica(final long globalCheckpoint, final S
30783078
* calculations of the global checkpoint. However, we can not assert that we are in the translog stage of recovery here as
30793079
* while the global checkpoint update may have emanated from the primary when we were in that state, we could subsequently move
30803080
* to recovery finalization, or even finished recovery before the update arrives here.
3081+
* When remote translog is enabled for an index, replication operation is limited to primary term validation and does not
3082+
* update local checkpoint at replica, so the local checkpoint at replica can be less than globalCheckpoint.
30813083
*/
3082-
assert state() != IndexShardState.POST_RECOVERY && state() != IndexShardState.STARTED
3083-
: "supposedly in-sync shard copy received a global checkpoint ["
3084+
assert (state() != IndexShardState.POST_RECOVERY && state() != IndexShardState.STARTED)
3085+
|| indexSettings.isRemoteTranslogStoreEnabled() : "supposedly in-sync shard copy received a global checkpoint ["
30843086
+ globalCheckpoint
30853087
+ "] "
30863088
+ "that is higher than its local checkpoint ["

0 commit comments

Comments
 (0)