From ef290ec199d190c45904f51dbcffc4c07865d888 Mon Sep 17 00:00:00 2001 From: Jai Balani Date: Wed, 27 Nov 2024 12:04:16 +0530 Subject: [PATCH] WIP: Changes for state build --- .../com/github/ambry/store/DiskManager.java | 18 ++++++++ .../github/ambry/store/StorageManager.java | 46 ++++++++++++++----- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/ambry-store/src/main/java/com/github/ambry/store/DiskManager.java b/ambry-store/src/main/java/com/github/ambry/store/DiskManager.java index a7ac7a0a92..2cfcebd1b8 100644 --- a/ambry-store/src/main/java/com/github/ambry/store/DiskManager.java +++ b/ambry-store/src/main/java/com/github/ambry/store/DiskManager.java @@ -478,6 +478,24 @@ boolean addBlobStore(ReplicaId replica) { return succeed; } + /** + * File copy state building for the store. + * @param store + */ + void buildStoreStateForFileCopy(BlobStore store, String partitionName){ + try { + // collect store segment requirements and add into DiskSpaceAllocator + List storeRequirements = Collections.singletonList(store.getDiskSpaceRequirements()); + diskSpaceAllocator.addRequiredSegments(diskSpaceAllocator.getOverallRequirements(storeRequirements), false); + // add store into CompactionManager + compactionManager.addBlobStore(store); + } + catch (Exception e){ + logger.error("Failed to build state for FileCopy for partition {}", partitionName, + e); + } + } + /** * Start the BlobStore with given {@link PartitionId} {@code id}. * @param id the {@link PartitionId} of the {@link BlobStore} which should be started. diff --git a/ambry-store/src/main/java/com/github/ambry/store/StorageManager.java b/ambry-store/src/main/java/com/github/ambry/store/StorageManager.java index 7a27804ed0..cefe37a8f0 100644 --- a/ambry-store/src/main/java/com/github/ambry/store/StorageManager.java +++ b/ambry-store/src/main/java/com/github/ambry/store/StorageManager.java @@ -828,16 +828,7 @@ public void onPartitionBecomeBootstrapFromOffline(String partitionName) { } } } - if (isPrimaryClusterManagerListener) { - // Only update store state if this is a state transition for primary participant. Since replication Manager - // which eventually moves this state to STANDBY/LEADER only listens to primary participant, store state gets - // stuck in BOOTSTRAP if this is updated by second participant listener too - ReplicaState currentState = store.getCurrentState(); - if (currentState != ReplicaState.LEADER && currentState != ReplicaState.STANDBY) { - // Only set the current state to BOOTSTRAP when it's not LEADER or STANDBY - store.setCurrentState(ReplicaState.BOOTSTRAP); - } - } + } @Override @@ -1014,7 +1005,40 @@ public void onPartitionBecomeDroppedFromOffline(String partitionName) { @Override public void buildStateForFileCopy(String partitionName) { - // no op + // The partition map should have the replica for the current partition since it was called in pre-file-copy step. + ReplicaId replica = partitionNameToReplicaId.get(partitionName); + + if (replica == null) { + logger.error("No existing replica found for partition {} in partitionNameToReplicaId", partitionName); + throw new StateTransitionException( + "Existing replica " + partitionName + " is not found in clustermap for " + currentNode, ReplicaNotFound); + } + if (!addBlobStore(replica)){ + // We have decreased the available disk space in HelixClusterManager#getDiskForBootstrapReplica. Increase it + // back since addition of store failed. + replica.getDiskId().increaseAvailableSpaceInBytes(replica.getCapacityInBytes()); + if (!clusterMap.isDataNodeInFullAutoMode(currentNode)) { + logger.error("Failed to add store {} into storage manager", partitionName); + throw new StateTransitionException("Failed to add store " + partitionName + " into storage manager", + ReplicaOperationFailure); + } else { + logger.info("Failed to add store {} at location {}. Retrying bootstrapping replica at different location", + partitionName, replica.getReplicaPath()); + tryRemoveFailedBootstrapBlobStore(replica); + } + } + Store store = getStore(replica.getPartitionId(), false); + + if (isPrimaryClusterManagerListener) { + // Only update store state if this is a state transition for primary participant. Since replication Manager + // which eventually moves this state to STANDBY/LEADER only listens to primary participant, store state gets + // stuck in BOOTSTRAP if this is updated by second participant listener too + ReplicaState currentState = store.getCurrentState(); + if (currentState != ReplicaState.LEADER && currentState != ReplicaState.STANDBY) { + // Only set the current state to BOOTSTRAP when it's not LEADER or STANDBY + store.setCurrentState(ReplicaState.BOOTSTRAP); + } + } } /**