Source code for kornia.morphology.morphology

from typing import List, Optional

import torch
import torch.nn.functional as F

__all__ = ["dilation", "erosion", "opening", "closing", "gradient", "top_hat", "bottom_hat"]


def _neight2channels_like_kernel(kernel: torch.Tensor) -> torch.Tensor:
    h, w = kernel.size()
    kernel = torch.eye(h * w, dtype=kernel.dtype, device=kernel.device)
    return kernel.view(h * w, 1, h, w)


[docs]def dilation( tensor: torch.Tensor, kernel: torch.Tensor, structuring_element: Optional[torch.Tensor] = None, origin: Optional[List[int]] = None, border_type: str = 'geodesic', border_value: float = 0.0, max_val: float = 1e4, engine: str = 'unfold', ) -> torch.Tensor: r"""Return the dilated image applying the same kernel in each channel. .. image:: _static/img/dilation.png The kernel must have 2 dimensions. Args: tensor: Image with shape :math:`(B, C, H, W)`. kernel: Positions of non-infinite elements of a flat structuring element. Non-zero values give the set of neighbors of the center over which the operation is applied. Its shape is :math:`(k_x, k_y)`. For full structural elements use torch.ones_like(structural_element). structuring_element: Structuring element used for the grayscale dilation. It may be a non-flat structuring element. origin: Origin of the structuring element. Default: ``None`` and uses the center of the structuring element as origin (rounding towards zero). border_type: It determines how the image borders are handled, where ``border_value`` is the value when ``border_type`` is equal to ``constant``. Default: ``geodesic`` which ignores the values that are outside the image when applying the operation. border_value: Value to fill past edges of input if ``border_type`` is ``constant``. max_val: The value of the infinite elements in the kernel. engine: convolution is faster and less memory hungry, and unfold is more stable numerically Returns: Dilated image with shape :math:`(B, C, H, W)`. .. note:: See a working example `here <https://kornia-tutorials.readthedocs.io/en/latest/ morphology_101.html>`__. Example: >>> tensor = torch.rand(1, 3, 5, 5) >>> kernel = torch.ones(3, 3) >>> dilated_img = dilation(tensor, kernel) """ if not isinstance(tensor, torch.Tensor): raise TypeError(f"Input type is not a torch.Tensor. Got {type(tensor)}") if len(tensor.shape) != 4: raise ValueError(f"Input size must have 4 dimensions. Got {tensor.dim()}") if not isinstance(kernel, torch.Tensor): raise TypeError(f"Kernel type is not a torch.Tensor. Got {type(kernel)}") if len(kernel.shape) != 2: raise ValueError(f"Kernel size must have 2 dimensions. Got {kernel.dim()}") # origin se_h, se_w = kernel.shape if origin is None: origin = [se_h // 2, se_w // 2] # pad pad_e: List[int] = [origin[1], se_w - origin[1] - 1, origin[0], se_h - origin[0] - 1] if border_type == 'geodesic': border_value = -max_val border_type = 'constant' output: torch.Tensor = F.pad(tensor, pad_e, mode=border_type, value=border_value) # computation if structuring_element is None: neighborhood = torch.zeros_like(kernel) neighborhood[kernel == 0] = -max_val else: neighborhood = structuring_element.clone() neighborhood[kernel == 0] = -max_val if engine == 'unfold': output = output.unfold(2, se_h, 1).unfold(3, se_w, 1) output, _ = torch.max(output + neighborhood.flip((0, 1)), 4) output, _ = torch.max(output, 4) elif engine == 'convolution': B, C, H, W = tensor.size() h_pad, w_pad = output.shape[-2:] reshape_kernel = _neight2channels_like_kernel(kernel) output, _ = F.conv2d( output.view(B * C, 1, h_pad, w_pad), reshape_kernel, padding=0, bias=neighborhood.view(-1).flip(0) ).max(dim=1) output = output.view(B, C, H, W) else: raise NotImplementedError(f"engine {engine} is unknown, use 'convolution' or 'unfold'") return output.view_as(tensor)
[docs]def erosion( tensor: torch.Tensor, kernel: torch.Tensor, structuring_element: Optional[torch.Tensor] = None, origin: Optional[List[int]] = None, border_type: str = 'geodesic', border_value: float = 0.0, max_val: float = 1e4, engine: str = 'unfold', ) -> torch.Tensor: r"""Return the eroded image applying the same kernel in each channel. .. image:: _static/img/erosion.png The kernel must have 2 dimensions. Args: tensor: Image with shape :math:`(B, C, H, W)`. kernel: Positions of non-infinite elements of a flat structuring element. Non-zero values give the set of neighbors of the center over which the operation is applied. Its shape is :math:`(k_x, k_y)`. For full structural elements use torch.ones_like(structural_element). structuring_element (torch.Tensor, optional): Structuring element used for the grayscale dilation. It may be a non-flat structuring element. origin: Origin of the structuring element. Default: ``None`` and uses the center of the structuring element as origin (rounding towards zero). border_type: It determines how the image borders are handled, where ``border_value`` is the value when ``border_type`` is equal to ``constant``. Default: ``geodesic`` which ignores the values that are outside the image when applying the operation. border_value: Value to fill past edges of input if border_type is ``constant``. max_val: The value of the infinite elements in the kernel. engine: ``convolution`` is faster and less memory hungry, and ``unfold`` is more stable numerically Returns: Eroded image with shape :math:`(B, C, H, W)`. .. note:: See a working example `here <https://kornia-tutorials.readthedocs.io/en/latest/ morphology_101.html>`__. Example: >>> tensor = torch.rand(1, 3, 5, 5) >>> kernel = torch.ones(5, 5) >>> output = erosion(tensor, kernel) """ if not isinstance(tensor, torch.Tensor): raise TypeError(f"Input type is not a torch.Tensor. Got {type(tensor)}") if len(tensor.shape) != 4: raise ValueError(f"Input size must have 4 dimensions. Got {tensor.dim()}") if not isinstance(kernel, torch.Tensor): raise TypeError(f"Kernel type is not a torch.Tensor. Got {type(kernel)}") if len(kernel.shape) != 2: raise ValueError(f"Kernel size must have 2 dimensions. Got {kernel.dim()}") # origin se_h, se_w = kernel.shape if origin is None: origin = [se_h // 2, se_w // 2] # pad pad_e: List[int] = [origin[1], se_w - origin[1] - 1, origin[0], se_h - origin[0] - 1] if border_type == 'geodesic': border_value = max_val border_type = 'constant' output: torch.Tensor = F.pad(tensor, pad_e, mode=border_type, value=border_value) # computation if structuring_element is None: neighborhood = torch.zeros_like(kernel) neighborhood[kernel == 0] = -max_val else: neighborhood = structuring_element.clone() neighborhood[kernel == 0] = -max_val if engine == 'unfold': output = output.unfold(2, se_h, 1).unfold(3, se_w, 1) output, _ = torch.min(output - neighborhood, 4) output, _ = torch.min(output, 4) elif engine == 'convolution': B, C, H, W = tensor.size() Hpad, Wpad = output.shape[-2:] reshape_kernel = _neight2channels_like_kernel(kernel) output, _ = F.conv2d( output.view(B * C, 1, Hpad, Wpad), reshape_kernel, padding=0, bias=-neighborhood.view(-1) ).min(dim=1) output = output.view(B, C, H, W) else: raise NotImplementedError(f"engine {engine} is unknown, use 'convolution' or 'unfold'") return output
[docs]def opening( tensor: torch.Tensor, kernel: torch.Tensor, structuring_element: Optional[torch.Tensor] = None, origin: Optional[List[int]] = None, border_type: str = 'geodesic', border_value: float = 0.0, max_val: float = 1e4, engine: str = 'unfold', ) -> torch.Tensor: r"""Return the opened image, (that means, dilation after an erosion) applying the same kernel in each channel. .. image:: _static/img/opening.png The kernel must have 2 dimensions. Args: tensor: Image with shape :math:`(B, C, H, W)`. kernel: Positions of non-infinite elements of a flat structuring element. Non-zero values give the set of neighbors of the center over which the operation is applied. Its shape is :math:`(k_x, k_y)`. For full structural elements use torch.ones_like(structural_element). structuring_element: Structuring element used for the grayscale dilation. It may be a non-flat structuring element. origin: Origin of the structuring element. Default: ``None`` and uses the center of the structuring element as origin (rounding towards zero). border_type: It determines how the image borders are handled, where ``border_value`` is the value when ``border_type`` is equal to ``constant``. Default: ``geodesic`` which ignores the values that are outside the image when applying the operation. border_value: Value to fill past edges of input if ``border_type`` is ``constant``. max_val: The value of the infinite elements in the kernel. engine: convolution is faster and less memory hungry, and unfold is more stable numerically Returns: torch.Tensor: Opened image with shape :math:`(B, C, H, W)`. .. note:: See a working example `here <https://kornia-tutorials.readthedocs.io/en/latest/ morphology_101.html>`__. Example: >>> tensor = torch.rand(1, 3, 5, 5) >>> kernel = torch.ones(3, 3) >>> opened_img = opening(tensor, kernel) """ if not isinstance(tensor, torch.Tensor): raise TypeError(f"Input type is not a torch.Tensor. Got {type(tensor)}") if len(tensor.shape) != 4: raise ValueError(f"Input size must have 4 dimensions. Got {tensor.dim()}") if not isinstance(kernel, torch.Tensor): raise TypeError(f"Kernel type is not a torch.Tensor. Got {type(kernel)}") if len(kernel.shape) != 2: raise ValueError(f"Kernel size must have 2 dimensions. Got {kernel.dim()}") return dilation( erosion( tensor, kernel=kernel, structuring_element=structuring_element, origin=origin, border_type=border_type, border_value=border_value, max_val=max_val, ), kernel=kernel, structuring_element=structuring_element, origin=origin, border_type=border_type, border_value=border_value, max_val=max_val, engine=engine, )
[docs]def closing( tensor: torch.Tensor, kernel: torch.Tensor, structuring_element: Optional[torch.Tensor] = None, origin: Optional[List[int]] = None, border_type: str = 'geodesic', border_value: float = 0.0, max_val: float = 1e4, engine: str = 'unfold', ) -> torch.Tensor: r"""Return the closed image, (that means, erosion after a dilation) applying the same kernel in each channel. .. image:: _static/img/closing.png The kernel must have 2 dimensions. Args: tensor: Image with shape :math:`(B, C, H, W)`. kernel: Positions of non-infinite elements of a flat structuring element. Non-zero values give the set of neighbors of the center over which the operation is applied. Its shape is :math:`(k_x, k_y)`. For full structural elements use torch.ones_like(structural_element). structuring_element: Structuring element used for the grayscale dilation. It may be a non-flat structuring element. origin: Origin of the structuring element. Default is None and uses the center of the structuring element as origin (rounding towards zero). border_type: It determines how the image borders are handled, where ``border_value`` is the value when ``border_type`` is equal to ``constant``. Default: ``geodesic`` which ignores the values that are outside the image when applying the operation. border_value: Value to fill past edges of input if ``border_type`` is ``constant``. max_val: The value of the infinite elements in the kernel. engine: convolution is faster and less memory hungry, and unfold is more stable numerically Returns: Closed image with shape :math:`(B, C, H, W)`. .. note:: See a working example `here <https://kornia-tutorials.readthedocs.io/en/latest/ morphology_101.html>`__. Example: >>> tensor = torch.rand(1, 3, 5, 5) >>> kernel = torch.ones(3, 3) >>> closed_img = closing(tensor, kernel) """ if not isinstance(tensor, torch.Tensor): raise TypeError(f"Input type is not a torch.Tensor. Got {type(tensor)}") if len(tensor.shape) != 4: raise ValueError(f"Input size must have 4 dimensions. Got {tensor.dim()}") if not isinstance(kernel, torch.Tensor): raise TypeError(f"Kernel type is not a torch.Tensor. Got {type(kernel)}") if len(kernel.shape) != 2: raise ValueError(f"Kernel size must have 2 dimensions. Got {kernel.dim()}") return erosion( dilation( tensor, kernel=kernel, structuring_element=structuring_element, origin=origin, border_type=border_type, border_value=border_value, max_val=max_val, ), kernel=kernel, structuring_element=structuring_element, origin=origin, border_type=border_type, border_value=border_value, max_val=max_val, engine=engine, )
# Morphological Gradient
[docs]def gradient( tensor: torch.Tensor, kernel: torch.Tensor, structuring_element: Optional[torch.Tensor] = None, origin: Optional[List[int]] = None, border_type: str = 'geodesic', border_value: float = 0.0, max_val: float = 1e4, engine: str = 'unfold', ) -> torch.Tensor: r"""Return the morphological gradient of an image. .. image:: _static/img/gradient.png That means, (dilation - erosion) applying the same kernel in each channel. The kernel must have 2 dimensions. Args: tensor: Image with shape :math:`(B, C, H, W)`. kernel: Positions of non-infinite elements of a flat structuring element. Non-zero values give the set of neighbors of the center over which the operation is applied. Its shape is :math:`(k_x, k_y)`. For full structural elements use torch.ones_like(structural_element). structuring_element: Structuring element used for the grayscale dilation. It may be a non-flat structuring element. origin: Origin of the structuring element. Default is None and uses the center of the structuring element as origin (rounding towards zero). border_type: It determines how the image borders are handled, where ``border_value`` is the value when ``border_type`` is equal to ``constant``. Default: ``geodesic`` which ignores the values that are outside the image when applying the operation. border_value: Value to fill past edges of input if ``border_type`` is ``constant``. max_val: The value of the infinite elements in the kernel. engine: convolution is faster and less memory hungry, and unfold is more stable numerically Returns: Gradient image with shape :math:`(B, C, H, W)`. .. note:: See a working example `here <https://kornia-tutorials.readthedocs.io/en/latest/ morphology_101.html>`__. Example: >>> tensor = torch.rand(1, 3, 5, 5) >>> kernel = torch.ones(3, 3) >>> gradient_img = gradient(tensor, kernel) """ return dilation( tensor, kernel=kernel, structuring_element=structuring_element, origin=origin, border_type=border_type, border_value=border_value, max_val=max_val, engine=engine, ) - erosion( tensor, kernel=kernel, structuring_element=structuring_element, origin=origin, border_type=border_type, border_value=border_value, max_val=max_val, engine=engine, )
[docs]def top_hat( tensor: torch.Tensor, kernel: torch.Tensor, structuring_element: Optional[torch.Tensor] = None, origin: Optional[List[int]] = None, border_type: str = 'geodesic', border_value: float = 0.0, max_val: float = 1e4, engine: str = 'unfold', ) -> torch.Tensor: r"""Return the top hat transformation of an image. .. image:: _static/img/top_hat.png That means, (image - opened_image) applying the same kernel in each channel. The kernel must have 2 dimensions. See :func:`~kornia.morphology.opening` for details. Args: tensor: Image with shape :math:`(B, C, H, W)`. kernel: Positions of non-infinite elements of a flat structuring element. Non-zero values give the set of neighbors of the center over which the operation is applied. Its shape is :math:`(k_x, k_y)`. For full structural elements use torch.ones_like(structural_element). structuring_element: Structuring element used for the grayscale dilation. It may be a non-flat structuring element. origin: Origin of the structuring element. Default: ``None`` and uses the center of the structuring element as origin (rounding towards zero). border_type: It determines how the image borders are handled, where ``border_value`` is the value when ``border_type`` is equal to ``constant``. Default: ``geodesic`` which ignores the values that are outside the image when applying the operation. border_value: Value to fill past edges of input if ``border_type`` is ``constant``. max_val: The value of the infinite elements in the kernel. engine: convolution is faster and less memory hungry, and unfold is more stable numerically Returns: Top hat transformed image with shape :math:`(B, C, H, W)`. .. note:: See a working example `here <https://kornia-tutorials.readthedocs.io/en/latest/ morphology_101.html>`__. Example: >>> tensor = torch.rand(1, 3, 5, 5) >>> kernel = torch.ones(3, 3) >>> top_hat_img = top_hat(tensor, kernel) """ if not isinstance(tensor, torch.Tensor): raise TypeError(f"Input type is not a torch.Tensor. Got {type(tensor)}") if len(tensor.shape) != 4: raise ValueError(f"Input size must have 4 dimensions. Got {tensor.dim()}") if not isinstance(kernel, torch.Tensor): raise TypeError(f"Kernel type is not a torch.Tensor. Got {type(kernel)}") if len(kernel.shape) != 2: raise ValueError(f"Kernel size must have 2 dimensions. Got {kernel.dim()}") return tensor - opening( tensor, kernel=kernel, structuring_element=structuring_element, origin=origin, border_type=border_type, border_value=border_value, max_val=max_val, engine=engine, )
[docs]def bottom_hat( tensor: torch.Tensor, kernel: torch.Tensor, structuring_element: Optional[torch.Tensor] = None, origin: Optional[List[int]] = None, border_type: str = 'geodesic', border_value: float = 0.0, max_val: float = 1e4, engine: str = 'unfold', ) -> torch.Tensor: r"""Return the bottom hat transformation of an image. .. image:: _static/img/bottom_hat.png That means, (closed_image - image) applying the same kernel in each channel. The kernel must have 2 dimensions. See :func:`~kornia.morphology.closing` for details. Args: tensor: Image with shape :math:`(B, C, H, W)`. kernel: Positions of non-infinite elements of a flat structuring element. Non-zero values give the set of neighbors of the center over which the operation is applied. Its shape is :math:`(k_x, k_y)`. For full structural elements use torch.ones_like(structural_element). structuring_element: Structuring element used for the grayscale dilation. It may be a non-flat structuring element. origin: Origin of the structuring element. Default: ``None`` and uses the center of the structuring element as origin (rounding towards zero). border_type: It determines how the image borders are handled, where ``border_value`` is the value when ``border_type`` is equal to ``constant``. Default: ``geodesic`` which ignores the values that are outside the image when applying the operation. border_value: Value to fill past edges of input if ``border_type`` is ``constant``. max_val: The value of the infinite elements in the kernel. engine: convolution is faster and less memory hungry, and unfold is more stable numerically Returns: Top hat transformed image with shape :math:`(B, C, H, W)`. .. note:: See a working example `here <https://kornia-tutorials.readthedocs.io/en/latest/ morphology_101.html>`__. Example: >>> tensor = torch.rand(1, 3, 5, 5) >>> kernel = torch.ones(3, 3) >>> bottom_hat_img = bottom_hat(tensor, kernel) """ if not isinstance(tensor, torch.Tensor): raise TypeError(f"Input type is not a torch.Tensor. Got {type(tensor)}") if len(tensor.shape) != 4: raise ValueError(f"Input size must have 4 dimensions. Got {tensor.dim()}") if not isinstance(kernel, torch.Tensor): raise TypeError(f"Kernel type is not a torch.Tensor. Got {type(kernel)}") if len(kernel.shape) != 2: raise ValueError(f"Kernel size must have 2 dimensions. Got {kernel.dim()}") return ( closing( tensor, kernel=kernel, structuring_element=structuring_element, origin=origin, border_type=border_type, border_value=border_value, max_val=max_val, engine=engine, ) - tensor )