Source code for kornia.contrib.models.structures

from __future__ import annotations

from dataclasses import dataclass
from typing import Optional

from kornia.core import Tensor
from kornia.core.check import KORNIA_CHECK
from kornia.geometry.transform import resize


[docs]@dataclass
class SegmentationResults:
    """Encapsulate the results obtained by a Segmentation model.

    Args:
        logits: Results logits with shape :math:`(B, C, H, W)`, where :math:`C` refers to the number of predicted masks
        scores: The scores from the logits. Shape :math:`(B, C)`
        mask_threshold: The threshold value to generate the `binary_masks` from the `logits`
    """

    logits: Tensor
    scores: Tensor
    mask_threshold: float = 0.0

    @property
    def binary_masks(self) -> Tensor:
        """Binary mask generated from logits considering the mask_threshold.

        Shape will be the same of logits :math:`(B, C, H, W)` where :math:`C` is the number masks predicted.

        .. note:: If you run `original_res_logits`, this will generate the masks
           based on the original resolution logits.
           Otherwise, this will use the low resolution logits (self.logits).
        """
        if self._original_res_logits is not None:
            x = self._original_res_logits
        else:
            x = self.logits

        return x > self.mask_threshold

[docs]    def original_res_logits(
        self, input_size: tuple[int, int], original_size: tuple[int, int], image_size_encoder: Optional[tuple[int, int]]
    ) -> Tensor:
        """Remove padding and upscale the logits to the original image size.

        Resize to image encoder input -> remove padding (bottom and right) -> Resize to original size

        .. note:: This method set a internal `original_res_logits` which will be used if available for the binary masks.

        Args:
            input_size: The size of the image input to the model, in (H, W) format. Used to remove padding.
            original_size: The original size of the image before resizing for input to the model, in (H, W) format.
            image_size_encoder: The size of the input image for image encoder, in (H, W) format. Used to resize the
                                logits back to encoder resolution before remove the padding.

        Returns:
            Batched logits in :math:`(K, C, H, W)` format, where (H, W) is given by original_size.
        """
        x = self.logits

        if isinstance(image_size_encoder, tuple):
            x = resize(x, size=image_size_encoder, interpolation="bilinear", align_corners=False, antialias=False)
        x = x[..., : input_size[0], : input_size[1]]

        x = resize(x, size=original_size, interpolation="bilinear", align_corners=False, antialias=False)

        self._original_res_logits = x
        return self._original_res_logits

[docs]    def squeeze(self, dim: int = 0) -> SegmentationResults:
        """Realize a squeeze for the dim given for all properties."""
        self.logits = self.logits.squeeze(dim)
        self.scores = self.scores.squeeze(dim)
        if isinstance(self._original_res_logits, Tensor):
            self._original_res_logits = self._original_res_logits.squeeze(dim)

        return self


[docs]@dataclass
class Prompts:
    """Encapsulate the prompts inputs for a Model.

    Args:
        points: A tuple with the keypoints (coordinates x, y) and their respective labels. Shape :math:`(K, N, 2)` for
                the keypoints, and :math:`(K, N)`
        boxes: Batched box inputs, with shape :math:`(K, 4)`. Expected to be into xyxy format.
        masks: Batched mask prompts to the model with shape :math:`(K, 1, H, W)`
    """

    points: Optional[tuple[Tensor, Tensor]] = None
    boxes: Optional[Tensor] = None
    masks: Optional[Tensor] = None

    def __post_init__(self) -> None:
        if isinstance(self.keypoints, Tensor) and isinstance(self.boxes, Tensor):
            KORNIA_CHECK(self.keypoints.shape[0] == self.boxes.shape[0], "The prompts should have the same batch size!")

    @property
    def keypoints(self) -> Optional[Tensor]:
        """The keypoints from the `points`"""
        return self.points[0] if isinstance(self.points, tuple) else None

    @property
    def keypoints_labels(self) -> Optional[Tensor]:
        """The keypoints labels from the `points`"""
        return self.points[1] if isinstance(self.points, tuple) else None