Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 16 additions & 20 deletions inference/core/models/instance_segmentation_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,28 +245,24 @@ def make_response(
"""
responses = []
for ind, (batch_predictions, batch_masks) in enumerate(zip(predictions, masks)):
predictions = []
for pred, mask in zip(batch_predictions, batch_masks):
if class_filter and not self.class_names[int(pred[6])] in class_filter:
# TODO: logger.debug
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would a debug log be necessary? This PR deletes this TODO since it didn't seem especially helpful, but I can also add logs if needed to the generator.

continue
# Passing args as a dictionary here since one of the args is 'class' (a protected term in Python)
predictions.append(
InstanceSegmentationPrediction(
**{
"x": pred[0] + (pred[2] - pred[0]) / 2,
"y": pred[1] + (pred[3] - pred[1]) / 2,
"width": pred[2] - pred[0],
"height": pred[3] - pred[1],
"points": [Point(x=point[0], y=point[1]) for point in mask],
"confidence": pred[4],
"class": self.class_names[int(pred[6])],
"class_id": int(pred[6]),
}
)
predictions_gen = (
InstanceSegmentationPrediction(
**{
"x": pred[0] + (pred[2] - pred[0]) / 2,
"y": pred[1] + (pred[3] - pred[1]) / 2,
"width": pred[2] - pred[0],
"height": pred[3] - pred[1],
"points": [Point(x=point[0], y=point[1]) for point in mask],
"confidence": pred[4],
"class": self.class_names[int(pred[6])],
"class_id": int(pred[6]),
}
)
for pred, mask in zip(batch_predictions, batch_masks)
if not class_filter or self.class_names[int(pred[6])] in class_filter
)
response = InstanceSegmentationInferenceResponse(
predictions=predictions,
predictions=predictions_gen,
image=InferenceResponseImage(
width=img_dims[ind][1], height=img_dims[ind][0]
),
Expand Down
4 changes: 2 additions & 2 deletions inference/models/yolact/yolact_instance_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,11 +272,11 @@ def make_response(
"""
responses = [
InstanceSegmentationInferenceResponse(
predictions=[
predictions=(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it converted to list before rendering? Not sure if it help with memory usage.

Copy link
Contributor Author

@shntu shntu Oct 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, there are a handful of enterprise users who import InferencePipeline and run it without actually using any of the HTTP responses.

I'm closing this PR because it indeed does not solve the problem - the memory usage is much higher on instance segmentation execution, and reducing the memory used by these masks is a very small fraction of it (would require a much larger change to actually reduce the memory pressure on the Jetson).

InstanceSegmentationPrediction(**p)
for p in batch_pred
if not class_filter or p["class_name"] in class_filter
],
),
image=InferenceResponseImage(
width=img_dims[i][1], height=img_dims[i][0]
),
Expand Down