Skip to content

Commit fdef380

Browse files
ilievladiulianamuraru
authored andcommitted
Add endpoint to move all replicas from a disk to another disk of the same broker (#7)
Add endpoint to move replicas from a specified disk to other disks of the same broker
1 parent 6ca9e73 commit fdef380

File tree

15 files changed

+786
-26
lines changed

15 files changed

+786
-26
lines changed

config/cruisecontrol.properties

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,9 @@ num.proposal.precompute.threads=1
170170
# The impact of strictness on the relative balancedness score.
171171
#goal.balancedness.strictness.weight
172172

173+
# the error margin between removed disk size and remaining disk size
174+
#remove.disks.remaining.size.error.margin=0.1
175+
173176
# Configurations for the executor
174177
# =======================================
175178

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
/*
2+
* Copyright 2022 LinkedIn Corp. Licensed under the BSD 2-Clause License (the "License"). See License in the project root for license information.
3+
*/
4+
5+
package com.linkedin.kafka.cruisecontrol.analyzer.goals;
6+
7+
import com.linkedin.kafka.cruisecontrol.analyzer.OptimizationOptions;
8+
import com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance;
9+
import com.linkedin.kafka.cruisecontrol.analyzer.BalancingAction;
10+
import com.linkedin.kafka.cruisecontrol.analyzer.ProvisionResponse;
11+
import com.linkedin.kafka.cruisecontrol.analyzer.ProvisionStatus;
12+
import com.linkedin.kafka.cruisecontrol.common.Resource;
13+
import com.linkedin.kafka.cruisecontrol.model.Broker;
14+
import com.linkedin.kafka.cruisecontrol.model.ClusterModel;
15+
import com.linkedin.kafka.cruisecontrol.model.ClusterModelStats;
16+
import com.linkedin.kafka.cruisecontrol.model.Disk;
17+
import com.linkedin.kafka.cruisecontrol.model.Replica;
18+
import com.linkedin.kafka.cruisecontrol.monitor.ModelCompletenessRequirements;
19+
import java.util.Set;
20+
import java.util.Map;
21+
import java.util.List;
22+
import java.util.ArrayList;
23+
import java.util.Comparator;
24+
import org.slf4j.Logger;
25+
import org.slf4j.LoggerFactory;
26+
27+
import static com.linkedin.kafka.cruisecontrol.analyzer.ActionAcceptance.ACCEPT;
28+
import static com.linkedin.kafka.cruisecontrol.analyzer.goals.GoalUtils.MIN_NUM_VALID_WINDOWS_FOR_SELF_HEALING;
29+
30+
31+
/**
32+
* Soft goal to move the replicas to different log dir.
33+
*/
34+
public class DiskRemovalGoal implements Goal {
35+
private static final Logger LOG = LoggerFactory.getLogger(DiskRemovalGoal.class);
36+
private static final Double EPSILON = 0.0001;
37+
38+
private final ProvisionResponse _provisionResponse;
39+
protected final Map<Integer, Set<String>> _brokerIdAndLogdirs;
40+
protected final double _errorMargin;
41+
42+
public DiskRemovalGoal(Map<Integer, Set<String>> brokerIdAndLogdirs, double errorMargin) {
43+
_provisionResponse = new ProvisionResponse(ProvisionStatus.UNDECIDED);
44+
_brokerIdAndLogdirs = brokerIdAndLogdirs;
45+
_errorMargin = errorMargin;
46+
}
47+
48+
private void sanityCheckOptimizationOptions(OptimizationOptions optimizationOptions) {
49+
if (optimizationOptions.isTriggeredByGoalViolation()) {
50+
throw new IllegalArgumentException(String.format("%s goal does not support use by goal violation detector.", name()));
51+
}
52+
}
53+
54+
@Override
55+
public boolean optimize(ClusterModel clusterModel, Set<Goal> optimizedGoals, OptimizationOptions optimizationOptions) {
56+
sanityCheckOptimizationOptions(optimizationOptions);
57+
58+
for (Map.Entry<Integer, Set<String>> brokerIdLogDirs : _brokerIdAndLogdirs.entrySet()) {
59+
Integer brokerId = brokerIdLogDirs.getKey();
60+
Set<String> logDirsToRemove = brokerIdLogDirs.getValue();
61+
relocateBrokerLogDirs(clusterModel, brokerId, logDirsToRemove);
62+
}
63+
64+
return true;
65+
}
66+
67+
/**
68+
* This method relocates the replicas on the provided log dirs to other log dirs of the same broker.
69+
*
70+
* @param clusterModel the cluster model
71+
* @param brokerId the id of the broker where the movement will take place
72+
* @param logDirsToRemove the set of log dirs to be removed from the broker
73+
*/
74+
private void relocateBrokerLogDirs(ClusterModel clusterModel, Integer brokerId, Set<String> logDirsToRemove) {
75+
Broker currentBroker = clusterModel.broker(brokerId);
76+
List<Disk> remainingDisks = new ArrayList<>();
77+
currentBroker.disks().stream().filter(disk -> !logDirsToRemove.contains(disk.logDir())).forEach(remainingDisks::add);
78+
remainingDisks.sort(Comparator.comparing(Disk::logDir));
79+
List<Replica> replicasToMove = getReplicasToMoveAsListSortedBySizeDesc(currentBroker, logDirsToRemove);
80+
81+
int usedDiskIdx = -1;
82+
for (Replica replicaToMove : replicasToMove) {
83+
usedDiskIdx = relocateReplicaIfPossible(clusterModel, brokerId, remainingDisks, replicaToMove, usedDiskIdx);
84+
}
85+
}
86+
87+
/**
88+
* This method provides the list of replicas to be moved sorted in descending order by the disk utilization.
89+
*
90+
* @param broker the broker where the replicas are
91+
* @param logDirs the log dirs where the replicas are
92+
* @return the sorted list of replicas to be moved
93+
*/
94+
private List<Replica> getReplicasToMoveAsListSortedBySizeDesc(Broker broker, Set<String> logDirs) {
95+
List<Replica> replicasToMove = new ArrayList<>();
96+
for (String logDir : logDirs) {
97+
Set<Replica> logDirReplicas = broker.disk(logDir).replicas();
98+
replicasToMove.addAll(logDirReplicas);
99+
}
100+
101+
replicasToMove.sort(Comparator.comparingDouble(o -> ((Replica) o).load().expectedUtilizationFor(Resource.DISK)).reversed());
102+
return replicasToMove;
103+
}
104+
105+
/**
106+
* This method relocates the given replica on one of the candidate disks in a round-robin manner if there is enough space
107+
*
108+
* @param clusterModel the cluster model
109+
* @param brokerId the broker id where the replica movement occurs
110+
* @param remainingDisks the candidate disks on which to move the replica
111+
* @param replica the replica to move
112+
* @param usedDiskIdx the index of the last disk used to relocate replicas
113+
* @return the index of the disk used to relocate the replica to
114+
*/
115+
private int relocateReplicaIfPossible(ClusterModel clusterModel, Integer brokerId, List<Disk> remainingDisks, Replica replica, int usedDiskIdx) {
116+
int remainingDisksNumber = remainingDisks.size();
117+
int diskIndex = (usedDiskIdx + 1) % remainingDisksNumber;
118+
for (int i = 0; i < remainingDisksNumber; i++) {
119+
Disk destinationDisk = remainingDisks.get(diskIndex);
120+
if (isEnoughSpace(destinationDisk, replica)) {
121+
clusterModel.relocateReplica(replica.topicPartition(), brokerId, destinationDisk.logDir());
122+
return diskIndex;
123+
}
124+
diskIndex = (diskIndex + 1) % remainingDisksNumber;
125+
}
126+
LOG.info("Could not move replica {} to any of the remaining disks.", replica);
127+
return usedDiskIdx;
128+
}
129+
130+
/**
131+
* This method checks if the usage on the disk that the replica will be moved to is lower than the disk capacity
132+
* including the error margin.
133+
*
134+
* @param disk the disk on which the replica can be moved
135+
* @param replica the replica to move
136+
* @return boolean which reflects if there is enough disk space to move the replica
137+
*/
138+
private boolean isEnoughSpace(Disk disk, Replica replica) {
139+
double futureUsage = disk.utilization() + replica.load().expectedUtilizationFor(Resource.DISK);
140+
double remainingSpacePercentage = (1 - (futureUsage / disk.capacity()));
141+
return remainingSpacePercentage > _errorMargin
142+
|| (remainingSpacePercentage > 0 && Math.abs(remainingSpacePercentage - _errorMargin) < EPSILON);
143+
}
144+
145+
@Override
146+
public ActionAcceptance actionAcceptance(BalancingAction action, ClusterModel clusterModel) {
147+
return ACCEPT;
148+
}
149+
150+
@Override
151+
public ClusterModelStatsComparator clusterModelStatsComparator() {
152+
return new ClusterModelStatsComparator() {
153+
@Override
154+
public int compare(ClusterModelStats stats1, ClusterModelStats stats2) {
155+
return 0;
156+
}
157+
158+
@Override
159+
public String explainLastComparison() {
160+
return String.format("Comparison for the %s is irrelevant.", name());
161+
}
162+
};
163+
}
164+
165+
@Override
166+
public ModelCompletenessRequirements clusterModelCompletenessRequirements() {
167+
return new ModelCompletenessRequirements(MIN_NUM_VALID_WINDOWS_FOR_SELF_HEALING, 0, true);
168+
}
169+
170+
@Override
171+
public String name() {
172+
return DiskRemovalGoal.class.getSimpleName();
173+
}
174+
175+
@Override
176+
public void finish() {
177+
178+
}
179+
180+
@Override
181+
public boolean isHardGoal() {
182+
return false;
183+
}
184+
185+
@Override
186+
public ProvisionStatus provisionStatus() {
187+
// Provision status computation is not relevant to PLE goal.
188+
return provisionResponse().status();
189+
}
190+
191+
@Override
192+
public ProvisionResponse provisionResponse() {
193+
return _provisionResponse;
194+
}
195+
196+
@Override
197+
public void configure(Map<String, ?> configs) {
198+
199+
}
200+
}

cruise-control/src/main/java/com/linkedin/kafka/cruisecontrol/config/constants/AnalyzerConfig.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,15 @@ public final class AnalyzerConfig {
448448
String.format("The class implements %s interface and is used to generate replica to broker set mapping.",
449449
ReplicaToBrokerSetMappingPolicy.class.getName());
450450

451+
/**
452+
* <code>remove.disks.remaining.size.error.margin</code>
453+
*/
454+
public static final String REMOVE_DISKS_REMAINING_SIZE_ERROR_MARGIN = "remove.disks.remaining.size.error.margin";
455+
public static final double DEFAULT_REMOVE_DISKS_REMAINING_SIZE_ERROR_MARGIN = 0.1;
456+
public static final String REMOVE_DISKS_REMAINING_SIZE_ERROR_MARGIN_DOC = "The margin of error between the remaining and the "
457+
+ "removed disk sizes. The ratio between the removed and the remaining size should be greater than this parameter. The minimum "
458+
+ "value is 0.05 (5%).";
459+
451460
private AnalyzerConfig() {
452461
}
453462

@@ -683,6 +692,12 @@ public static ConfigDef define(ConfigDef configDef) {
683692
.define(REPLICA_TO_BROKER_SET_MAPPING_POLICY_CLASS_CONFIG,
684693
ConfigDef.Type.CLASS, DEFAULT_REPLICA_TO_BROKER_SET_MAPPING_POLICY_CLASS,
685694
ConfigDef.Importance.LOW,
686-
REPLICA_TO_BROKER_SET_MAPPING_POLICY_CLASS_DOC);
695+
REPLICA_TO_BROKER_SET_MAPPING_POLICY_CLASS_DOC)
696+
.define(REMOVE_DISKS_REMAINING_SIZE_ERROR_MARGIN,
697+
ConfigDef.Type.DOUBLE,
698+
DEFAULT_REMOVE_DISKS_REMAINING_SIZE_ERROR_MARGIN,
699+
atLeast(0.05),
700+
ConfigDef.Importance.LOW,
701+
REMOVE_DISKS_REMAINING_SIZE_ERROR_MARGIN_DOC);
687702
}
688703
}

cruise-control/src/main/java/com/linkedin/kafka/cruisecontrol/config/constants/CruiseControlParametersConfig.java

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,27 @@
44

55
package com.linkedin.kafka.cruisecontrol.config.constants;
66

7-
import com.linkedin.kafka.cruisecontrol.servlet.parameters.AddBrokerParameters;
8-
import com.linkedin.kafka.cruisecontrol.servlet.parameters.AdminParameters;
9-
import com.linkedin.kafka.cruisecontrol.servlet.parameters.BootstrapParameters;
7+
import com.linkedin.kafka.cruisecontrol.servlet.parameters.RemoveDisksParameters;
8+
import com.linkedin.kafka.cruisecontrol.servlet.parameters.PauseResumeParameters;
9+
import com.linkedin.kafka.cruisecontrol.servlet.parameters.StopProposalParameters;
10+
import com.linkedin.kafka.cruisecontrol.servlet.parameters.TrainParameters;
1011
import com.linkedin.kafka.cruisecontrol.servlet.parameters.ClusterLoadParameters;
11-
import com.linkedin.kafka.cruisecontrol.servlet.parameters.CruiseControlStateParameters;
12-
import com.linkedin.kafka.cruisecontrol.servlet.parameters.DemoteBrokerParameters;
13-
import com.linkedin.kafka.cruisecontrol.servlet.parameters.FixOfflineReplicasParameters;
14-
import com.linkedin.kafka.cruisecontrol.servlet.parameters.KafkaClusterStateParameters;
1512
import com.linkedin.kafka.cruisecontrol.servlet.parameters.PartitionLoadParameters;
16-
import com.linkedin.kafka.cruisecontrol.servlet.parameters.PauseResumeParameters;
13+
import com.linkedin.kafka.cruisecontrol.servlet.parameters.BootstrapParameters;
1714
import com.linkedin.kafka.cruisecontrol.servlet.parameters.ProposalsParameters;
18-
import com.linkedin.kafka.cruisecontrol.servlet.parameters.RebalanceParameters;
19-
import com.linkedin.kafka.cruisecontrol.servlet.parameters.RemoveBrokerParameters;
15+
import com.linkedin.kafka.cruisecontrol.servlet.parameters.CruiseControlStateParameters;
16+
import com.linkedin.kafka.cruisecontrol.servlet.parameters.KafkaClusterStateParameters;
17+
import com.linkedin.kafka.cruisecontrol.servlet.parameters.UserTasksParameters;
2018
import com.linkedin.kafka.cruisecontrol.servlet.parameters.ReviewBoardParameters;
2119
import com.linkedin.kafka.cruisecontrol.servlet.parameters.ReviewParameters;
22-
import com.linkedin.kafka.cruisecontrol.servlet.parameters.RightsizeParameters;
23-
import com.linkedin.kafka.cruisecontrol.servlet.parameters.StopProposalParameters;
20+
import com.linkedin.kafka.cruisecontrol.servlet.parameters.AddBrokerParameters;
21+
import com.linkedin.kafka.cruisecontrol.servlet.parameters.RemoveBrokerParameters;
22+
import com.linkedin.kafka.cruisecontrol.servlet.parameters.FixOfflineReplicasParameters;
23+
import com.linkedin.kafka.cruisecontrol.servlet.parameters.DemoteBrokerParameters;
24+
import com.linkedin.kafka.cruisecontrol.servlet.parameters.RebalanceParameters;
25+
import com.linkedin.kafka.cruisecontrol.servlet.parameters.AdminParameters;
2426
import com.linkedin.kafka.cruisecontrol.servlet.parameters.TopicConfigurationParameters;
25-
import com.linkedin.kafka.cruisecontrol.servlet.parameters.TrainParameters;
26-
import com.linkedin.kafka.cruisecontrol.servlet.parameters.UserTasksParameters;
27+
import com.linkedin.kafka.cruisecontrol.servlet.parameters.RightsizeParameters;
2728
import org.apache.kafka.common.config.ConfigDef;
2829

2930

@@ -179,6 +180,13 @@ public final class CruiseControlParametersConfig {
179180
public static final String DEFAULT_RIGHTSIZE_PARAMETERS_CLASS = RightsizeParameters.class.getName();
180181
public static final String RIGHTSIZE_PARAMETERS_CLASS_DOC = "The class for parameters of a provision rightsize request.";
181182

183+
/**
184+
* <code>remove.disks.parameters.class</code>
185+
*/
186+
public static final String REMOVE_DISKS_PARAMETERS_CLASS_CONFIG = "remove.disks.parameters.class";
187+
public static final String DEFAULT_REMOVE_DISKS_PARAMETERS_CLASS = RemoveDisksParameters.class.getName();
188+
public static final String REMOVE_DISKS_PARAMETERS_CLASS_DOC = "The class for parameters of a disks removal request.";
189+
182190
private CruiseControlParametersConfig() {
183191
}
184192

@@ -293,6 +301,11 @@ public static ConfigDef define(ConfigDef configDef) {
293301
ConfigDef.Type.CLASS,
294302
DEFAULT_RIGHTSIZE_PARAMETERS_CLASS,
295303
ConfigDef.Importance.MEDIUM,
296-
RIGHTSIZE_PARAMETERS_CLASS_DOC);
304+
RIGHTSIZE_PARAMETERS_CLASS_DOC)
305+
.define(REMOVE_DISKS_PARAMETERS_CLASS_CONFIG,
306+
ConfigDef.Type.CLASS,
307+
DEFAULT_REMOVE_DISKS_PARAMETERS_CLASS,
308+
ConfigDef.Importance.MEDIUM,
309+
REMOVE_DISKS_PARAMETERS_CLASS_DOC);
297310
}
298311
}

cruise-control/src/main/java/com/linkedin/kafka/cruisecontrol/config/constants/CruiseControlRequestConfig.java

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,18 @@
44

55
package com.linkedin.kafka.cruisecontrol.config.constants;
66

7-
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.AddBrokerRequest;
87
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.ClusterLoadRequest;
9-
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.CruiseControlStateRequest;
10-
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.DemoteRequest;
11-
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.FixOfflineReplicasRequest;
128
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.PartitionLoadRequest;
139
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.ProposalsRequest;
14-
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.RebalanceRequest;
10+
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.CruiseControlStateRequest;
11+
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.TopicConfigurationRequest;
12+
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.AddBrokerRequest;
1513
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.RemoveBrokerRequest;
14+
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.RemoveDisksRequest;
15+
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.DemoteRequest;
16+
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.RebalanceRequest;
17+
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.FixOfflineReplicasRequest;
1618
import com.linkedin.kafka.cruisecontrol.servlet.handler.sync.RightsizeRequest;
17-
import com.linkedin.kafka.cruisecontrol.servlet.handler.async.TopicConfigurationRequest;
1819
import com.linkedin.kafka.cruisecontrol.servlet.handler.sync.AdminRequest;
1920
import com.linkedin.kafka.cruisecontrol.servlet.handler.sync.BootstrapRequest;
2021
import com.linkedin.kafka.cruisecontrol.servlet.handler.sync.KafkaClusterStateRequest;
@@ -181,6 +182,13 @@ public final class CruiseControlRequestConfig {
181182
public static final String DEFAULT_RIGHTSIZE_REQUEST_CLASS = RightsizeRequest.class.getName();
182183
public static final String RIGHTSIZE_REQUEST_CLASS_DOC = "The class to handle a provision rightsize request.";
183184

185+
/**
186+
* <code>remove.disks.request.class</code>
187+
*/
188+
public static final String REMOVE_DISKS_REQUEST_CLASS_CONFIG = "remove.disks.request.class";
189+
public static final String DEFAULT_REMOVE_DISKS_REQUEST_CLASS = RemoveDisksRequest.class.getName();
190+
public static final String REMOVE_DISKS_REQUEST_CLASS_DOC = "The class to handle a disks removal request.";
191+
184192
private CruiseControlRequestConfig() {
185193
}
186194

@@ -295,6 +303,11 @@ public static ConfigDef define(ConfigDef configDef) {
295303
ConfigDef.Type.CLASS,
296304
DEFAULT_RIGHTSIZE_REQUEST_CLASS,
297305
ConfigDef.Importance.MEDIUM,
298-
RIGHTSIZE_REQUEST_CLASS_DOC);
306+
RIGHTSIZE_REQUEST_CLASS_DOC)
307+
.define(REMOVE_DISKS_REQUEST_CLASS_CONFIG,
308+
ConfigDef.Type.CLASS,
309+
DEFAULT_REMOVE_DISKS_REQUEST_CLASS,
310+
ConfigDef.Importance.MEDIUM,
311+
REMOVE_DISKS_REQUEST_CLASS_DOC);
299312
}
300313
}

cruise-control/src/main/java/com/linkedin/kafka/cruisecontrol/servlet/CruiseControlEndPoint.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ public enum CruiseControlEndPoint implements EndPoint {
3434
ADMIN(CRUISE_CONTROL_ADMIN),
3535
REVIEW(CRUISE_CONTROL_ADMIN),
3636
TOPIC_CONFIGURATION(KAFKA_ADMIN),
37-
RIGHTSIZE(KAFKA_ADMIN);
37+
RIGHTSIZE(KAFKA_ADMIN),
38+
REMOVE_DISKS(KAFKA_ADMIN);
3839

3940
private static final List<CruiseControlEndPoint> CACHED_VALUES = List.of(values());
4041
private static final List<CruiseControlEndPoint> GET_ENDPOINTS = Arrays.asList(BOOTSTRAP,
@@ -57,7 +58,8 @@ public enum CruiseControlEndPoint implements EndPoint {
5758
ADMIN,
5859
REVIEW,
5960
TOPIC_CONFIGURATION,
60-
RIGHTSIZE);
61+
RIGHTSIZE,
62+
REMOVE_DISKS);
6163

6264
private final EndpointType _endpointType;
6365

0 commit comments

Comments
 (0)