| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535 |
- # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
- # pyre-unsafe
- from __future__ import absolute_import, division, print_function, unicode_literals
- import math
- from typing import List, Tuple
- import torch
- # from detectron2.layers.rotated_boxes import pairwise_iou_rotated
- from .boxes import Boxes
- def pairwise_iou_rotated(boxes1, boxes2):
- """
- Return intersection-over-union (Jaccard index) of boxes.
- Both sets of boxes are expected to be in
- (x_center, y_center, width, height, angle) format.
- Arguments:
- boxes1 (Tensor[N, 5])
- boxes2 (Tensor[M, 5])
- Returns:
- iou (Tensor[N, M]): the NxM matrix containing the pairwise
- IoU values for every element in boxes1 and boxes2
- """
- return torch.ops.detectron2.box_iou_rotated(boxes1, boxes2)
- class RotatedBoxes(Boxes):
- """
- This structure stores a list of rotated boxes as a Nx5 torch.Tensor.
- It supports some common methods about boxes
- (`area`, `clip`, `nonempty`, etc),
- and also behaves like a Tensor
- (support indexing, `to(device)`, `.device`, and iteration over all boxes)
- """
- def __init__(self, tensor: torch.Tensor):
- """
- Args:
- tensor (Tensor[float]): a Nx5 matrix. Each row is
- (x_center, y_center, width, height, angle),
- in which angle is represented in degrees.
- While there's no strict range restriction for it,
- the recommended principal range is between [-180, 180) degrees.
- Assume we have a horizontal box B = (x_center, y_center, width, height),
- where width is along the x-axis and height is along the y-axis.
- The rotated box B_rot (x_center, y_center, width, height, angle)
- can be seen as:
- 1. When angle == 0:
- B_rot == B
- 2. When angle > 0:
- B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CCW;
- 3. When angle < 0:
- B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CW.
- Mathematically, since the right-handed coordinate system for image space
- is (y, x), where y is top->down and x is left->right, the 4 vertices of the
- rotated rectangle :math:`(yr_i, xr_i)` (i = 1, 2, 3, 4) can be obtained from
- the vertices of the horizontal rectangle :math:`(y_i, x_i)` (i = 1, 2, 3, 4)
- in the following way (:math:`\\theta = angle*\\pi/180` is the angle in radians,
- :math:`(y_c, x_c)` is the center of the rectangle):
- .. math::
- yr_i = \\cos(\\theta) (y_i - y_c) - \\sin(\\theta) (x_i - x_c) + y_c,
- xr_i = \\sin(\\theta) (y_i - y_c) + \\cos(\\theta) (x_i - x_c) + x_c,
- which is the standard rigid-body rotation transformation.
- Intuitively, the angle is
- (1) the rotation angle from y-axis in image space
- to the height vector (top->down in the box's local coordinate system)
- of the box in CCW, and
- (2) the rotation angle from x-axis in image space
- to the width vector (left->right in the box's local coordinate system)
- of the box in CCW.
- More intuitively, consider the following horizontal box ABCD represented
- in (x1, y1, x2, y2): (3, 2, 7, 4),
- covering the [3, 7] x [2, 4] region of the continuous coordinate system
- which looks like this:
- .. code:: none
- O--------> x
- |
- | A---B
- | | |
- | D---C
- |
- v y
- Note that each capital letter represents one 0-dimensional geometric point
- instead of a 'square pixel' here.
- In the example above, using (x, y) to represent a point we have:
- .. math::
- O = (0, 0), A = (3, 2), B = (7, 2), C = (7, 4), D = (3, 4)
- We name vector AB = vector DC as the width vector in box's local coordinate system, and
- vector AD = vector BC as the height vector in box's local coordinate system. Initially,
- when angle = 0 degree, they're aligned with the positive directions of x-axis and y-axis
- in the image space, respectively.
- For better illustration, we denote the center of the box as E,
- .. code:: none
- O--------> x
- |
- | A---B
- | | E |
- | D---C
- |
- v y
- where the center E = ((3+7)/2, (2+4)/2) = (5, 3).
- Also,
- .. math::
- width = |AB| = |CD| = 7 - 3 = 4,
- height = |AD| = |BC| = 4 - 2 = 2.
- Therefore, the corresponding representation for the same shape in rotated box in
- (x_center, y_center, width, height, angle) format is:
- (5, 3, 4, 2, 0),
- Now, let's consider (5, 3, 4, 2, 90), which is rotated by 90 degrees
- CCW (counter-clockwise) by definition. It looks like this:
- .. code:: none
- O--------> x
- | B-C
- | | |
- | |E|
- | | |
- | A-D
- v y
- The center E is still located at the same point (5, 3), while the vertices
- ABCD are rotated by 90 degrees CCW with regard to E:
- A = (4, 5), B = (4, 1), C = (6, 1), D = (6, 5)
- Here, 90 degrees can be seen as the CCW angle to rotate from y-axis to
- vector AD or vector BC (the top->down height vector in box's local coordinate system),
- or the CCW angle to rotate from x-axis to vector AB or vector DC (the left->right
- width vector in box's local coordinate system).
- .. math::
- width = |AB| = |CD| = 5 - 1 = 4,
- height = |AD| = |BC| = 6 - 4 = 2.
- Next, how about (5, 3, 4, 2, -90), which is rotated by 90 degrees CW (clockwise)
- by definition? It looks like this:
- .. code:: none
- O--------> x
- | D-A
- | | |
- | |E|
- | | |
- | C-B
- v y
- The center E is still located at the same point (5, 3), while the vertices
- ABCD are rotated by 90 degrees CW with regard to E:
- A = (6, 1), B = (6, 5), C = (4, 5), D = (4, 1)
- .. math::
- width = |AB| = |CD| = 5 - 1 = 4,
- height = |AD| = |BC| = 6 - 4 = 2.
- This covers exactly the same region as (5, 3, 4, 2, 90) does, and their IoU
- will be 1. However, these two will generate different RoI Pooling results and
- should not be treated as an identical box.
- On the other hand, it's easy to see that (X, Y, W, H, A) is identical to
- (X, Y, W, H, A+360N), for any integer N. For example (5, 3, 4, 2, 270) would be
- identical to (5, 3, 4, 2, -90), because rotating the shape 270 degrees CCW is
- equivalent to rotating the same shape 90 degrees CW.
- We could rotate further to get (5, 3, 4, 2, 180), or (5, 3, 4, 2, -180):
- .. code:: none
- O--------> x
- |
- | C---D
- | | E |
- | B---A
- |
- v y
- .. math::
- A = (7, 4), B = (3, 4), C = (3, 2), D = (7, 2),
- width = |AB| = |CD| = 7 - 3 = 4,
- height = |AD| = |BC| = 4 - 2 = 2.
- Finally, this is a very inaccurate (heavily quantized) illustration of
- how (5, 3, 4, 2, 60) looks like in case anyone wonders:
- .. code:: none
- O--------> x
- | B\
- | / C
- | /E /
- | A /
- | `D
- v y
- It's still a rectangle with center of (5, 3), width of 4 and height of 2,
- but its angle (and thus orientation) is somewhere between
- (5, 3, 4, 2, 0) and (5, 3, 4, 2, 90).
- """
- device = (
- tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
- )
- tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
- if tensor.numel() == 0:
- # Use reshape, so we don't end up creating a new tensor that does not depend on
- # the inputs (and consequently confuses jit)
- tensor = tensor.reshape((0, 5)).to(dtype=torch.float32, device=device)
- assert tensor.dim() == 2 and tensor.size(-1) == 5, tensor.size()
- self.tensor = tensor
- def clone(self) -> "RotatedBoxes":
- """
- Clone the RotatedBoxes.
- Returns:
- RotatedBoxes
- """
- return RotatedBoxes(self.tensor.clone())
- def to(self, device: torch.device, non_blocking: bool = False):
- # Boxes are assumed float32 and does not support to(dtype)
- return RotatedBoxes(self.tensor.to(device=device, non_blocking=non_blocking))
- def area(self) -> torch.Tensor:
- """
- Computes the area of all the boxes.
- Returns:
- torch.Tensor: a vector with areas of each box.
- """
- box = self.tensor
- area = box[:, 2] * box[:, 3]
- return area
- # Avoid in-place operations so that we can torchscript; NOTE: this creates a new tensor
- def normalize_angles(self) -> None:
- """
- Restrict angles to the range of [-180, 180) degrees
- """
- angle_tensor = (self.tensor[:, 4] + 180.0) % 360.0 - 180.0
- self.tensor = torch.cat((self.tensor[:, :4], angle_tensor[:, None]), dim=1)
- def clip(
- self, box_size: Tuple[int, int], clip_angle_threshold: float = 1.0
- ) -> None:
- """
- Clip (in place) the boxes by limiting x coordinates to the range [0, width]
- and y coordinates to the range [0, height].
- For RRPN:
- Only clip boxes that are almost horizontal with a tolerance of
- clip_angle_threshold to maintain backward compatibility.
- Rotated boxes beyond this threshold are not clipped for two reasons:
- 1. There are potentially multiple ways to clip a rotated box to make it
- fit within the image.
- 2. It's tricky to make the entire rectangular box fit within the image
- and still be able to not leave out pixels of interest.
- Therefore we rely on ops like RoIAlignRotated to safely handle this.
- Args:
- box_size (height, width): The clipping box's size.
- clip_angle_threshold:
- Iff. abs(normalized(angle)) <= clip_angle_threshold (in degrees),
- we do the clipping as horizontal boxes.
- """
- h, w = box_size
- # normalize angles to be within (-180, 180] degrees
- self.normalize_angles()
- idx = torch.where(torch.abs(self.tensor[:, 4]) <= clip_angle_threshold)[0]
- # convert to (x1, y1, x2, y2)
- x1 = self.tensor[idx, 0] - self.tensor[idx, 2] / 2.0
- y1 = self.tensor[idx, 1] - self.tensor[idx, 3] / 2.0
- x2 = self.tensor[idx, 0] + self.tensor[idx, 2] / 2.0
- y2 = self.tensor[idx, 1] + self.tensor[idx, 3] / 2.0
- # clip
- x1.clamp_(min=0, max=w)
- y1.clamp_(min=0, max=h)
- x2.clamp_(min=0, max=w)
- y2.clamp_(min=0, max=h)
- # convert back to (xc, yc, w, h)
- self.tensor[idx, 0] = (x1 + x2) / 2.0
- self.tensor[idx, 1] = (y1 + y2) / 2.0
- # make sure widths and heights do not increase due to numerical errors
- self.tensor[idx, 2] = torch.min(self.tensor[idx, 2], x2 - x1)
- self.tensor[idx, 3] = torch.min(self.tensor[idx, 3], y2 - y1)
- def nonempty(self, threshold: float = 0.0) -> torch.Tensor:
- """
- Find boxes that are non-empty.
- A box is considered empty, if either of its side is no larger than threshold.
- Returns:
- Tensor: a binary vector which represents
- whether each box is empty (False) or non-empty (True).
- """
- box = self.tensor
- widths = box[:, 2]
- heights = box[:, 3]
- keep = (widths > threshold) & (heights > threshold)
- return keep
- def __getitem__(self, item) -> "RotatedBoxes":
- """
- Returns:
- RotatedBoxes: Create a new :class:`RotatedBoxes` by indexing.
- The following usage are allowed:
- 1. `new_boxes = boxes[3]`: return a `RotatedBoxes` which contains only one box.
- 2. `new_boxes = boxes[2:10]`: return a slice of boxes.
- 3. `new_boxes = boxes[vector]`, where vector is a torch.ByteTensor
- with `length = len(boxes)`. Nonzero elements in the vector will be selected.
- Note that the returned RotatedBoxes might share storage with this RotatedBoxes,
- subject to Pytorch's indexing semantics.
- """
- if isinstance(item, int):
- return RotatedBoxes(self.tensor[item].view(1, -1))
- b = self.tensor[item]
- assert b.dim() == 2, (
- "Indexing on RotatedBoxes with {} failed to return a matrix!".format(item)
- )
- return RotatedBoxes(b)
- def __len__(self) -> int:
- return self.tensor.shape[0]
- def __repr__(self) -> str:
- return "RotatedBoxes(" + str(self.tensor) + ")"
- def inside_box(
- self, box_size: Tuple[int, int], boundary_threshold: int = 0
- ) -> torch.Tensor:
- """
- Args:
- box_size (height, width): Size of the reference box covering
- [0, width] x [0, height]
- boundary_threshold (int): Boxes that extend beyond the reference box
- boundary by more than boundary_threshold are considered "outside".
- For RRPN, it might not be necessary to call this function since it's common
- for rotated box to extend to outside of the image boundaries
- (the clip function only clips the near-horizontal boxes)
- Returns:
- a binary vector, indicating whether each box is inside the reference box.
- """
- height, width = box_size
- cnt_x = self.tensor[..., 0]
- cnt_y = self.tensor[..., 1]
- half_w = self.tensor[..., 2] / 2.0
- half_h = self.tensor[..., 3] / 2.0
- a = self.tensor[..., 4]
- c = torch.abs(torch.cos(a * math.pi / 180.0))
- s = torch.abs(torch.sin(a * math.pi / 180.0))
- # This basically computes the horizontal bounding rectangle of the rotated box
- max_rect_dx = c * half_w + s * half_h
- max_rect_dy = c * half_h + s * half_w
- inds_inside = (
- (cnt_x - max_rect_dx >= -boundary_threshold)
- & (cnt_y - max_rect_dy >= -boundary_threshold)
- & (cnt_x + max_rect_dx < width + boundary_threshold)
- & (cnt_y + max_rect_dy < height + boundary_threshold)
- )
- return inds_inside
- def get_centers(self) -> torch.Tensor:
- """
- Returns:
- The box centers in a Nx2 array of (x, y).
- """
- return self.tensor[:, :2]
- def scale(self, scale_x: float, scale_y: float) -> None:
- """
- Scale the rotated box with horizontal and vertical scaling factors
- Note: when scale_factor_x != scale_factor_y,
- the rotated box does not preserve the rectangular shape when the angle
- is not a multiple of 90 degrees under resize transformation.
- Instead, the shape is a parallelogram (that has skew)
- Here we make an approximation by fitting a rotated rectangle to the parallelogram.
- """
- self.tensor[:, 0] *= scale_x
- self.tensor[:, 1] *= scale_y
- theta = self.tensor[:, 4] * math.pi / 180.0
- c = torch.cos(theta)
- s = torch.sin(theta)
- # In image space, y is top->down and x is left->right
- # Consider the local coordintate system for the rotated box,
- # where the box center is located at (0, 0), and the four vertices ABCD are
- # A(-w / 2, -h / 2), B(w / 2, -h / 2), C(w / 2, h / 2), D(-w / 2, h / 2)
- # the midpoint of the left edge AD of the rotated box E is:
- # E = (A+D)/2 = (-w / 2, 0)
- # the midpoint of the top edge AB of the rotated box F is:
- # F(0, -h / 2)
- # To get the old coordinates in the global system, apply the rotation transformation
- # (Note: the right-handed coordinate system for image space is yOx):
- # (old_x, old_y) = (s * y + c * x, c * y - s * x)
- # E(old) = (s * 0 + c * (-w/2), c * 0 - s * (-w/2)) = (-c * w / 2, s * w / 2)
- # F(old) = (s * (-h / 2) + c * 0, c * (-h / 2) - s * 0) = (-s * h / 2, -c * h / 2)
- # After applying the scaling factor (sfx, sfy):
- # E(new) = (-sfx * c * w / 2, sfy * s * w / 2)
- # F(new) = (-sfx * s * h / 2, -sfy * c * h / 2)
- # The new width after scaling tranformation becomes:
- # w(new) = |E(new) - O| * 2
- # = sqrt[(sfx * c * w / 2)^2 + (sfy * s * w / 2)^2] * 2
- # = sqrt[(sfx * c)^2 + (sfy * s)^2] * w
- # i.e., scale_factor_w = sqrt[(sfx * c)^2 + (sfy * s)^2]
- #
- # For example,
- # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_w == scale_factor_x;
- # when |angle| = 90, c = 0, |s| = 1, scale_factor_w == scale_factor_y
- self.tensor[:, 2] *= torch.sqrt((scale_x * c) ** 2 + (scale_y * s) ** 2)
- # h(new) = |F(new) - O| * 2
- # = sqrt[(sfx * s * h / 2)^2 + (sfy * c * h / 2)^2] * 2
- # = sqrt[(sfx * s)^2 + (sfy * c)^2] * h
- # i.e., scale_factor_h = sqrt[(sfx * s)^2 + (sfy * c)^2]
- #
- # For example,
- # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_h == scale_factor_y;
- # when |angle| = 90, c = 0, |s| = 1, scale_factor_h == scale_factor_x
- self.tensor[:, 3] *= torch.sqrt((scale_x * s) ** 2 + (scale_y * c) ** 2)
- # The angle is the rotation angle from y-axis in image space to the height
- # vector (top->down in the box's local coordinate system) of the box in CCW.
- #
- # angle(new) = angle_yOx(O - F(new))
- # = angle_yOx( (sfx * s * h / 2, sfy * c * h / 2) )
- # = atan2(sfx * s * h / 2, sfy * c * h / 2)
- # = atan2(sfx * s, sfy * c)
- #
- # For example,
- # when sfx == sfy, angle(new) == atan2(s, c) == angle(old)
- self.tensor[:, 4] = torch.atan2(scale_x * s, scale_y * c) * 180 / math.pi
- @classmethod
- def cat(cls, boxes_list: List["RotatedBoxes"]) -> "RotatedBoxes":
- """
- Concatenates a list of RotatedBoxes into a single RotatedBoxes
- Arguments:
- boxes_list (list[RotatedBoxes])
- Returns:
- RotatedBoxes: the concatenated RotatedBoxes
- """
- assert isinstance(boxes_list, (list, tuple))
- if len(boxes_list) == 0:
- return cls(torch.empty(0))
- assert all([isinstance(box, RotatedBoxes) for box in boxes_list])
- # use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input
- cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0))
- return cat_boxes
- @property
- def device(self) -> torch.device:
- return self.tensor.device
- @torch.jit.unused
- def __iter__(self):
- """
- Yield a box as a Tensor of shape (5,) at a time.
- """
- yield from self.tensor
- def pairwise_iou(boxes1: RotatedBoxes, boxes2: RotatedBoxes) -> None:
- """
- Given two lists of rotated boxes of size N and M,
- compute the IoU (intersection over union)
- between **all** N x M pairs of boxes.
- The box order must be (x_center, y_center, width, height, angle).
- Args:
- boxes1, boxes2 (RotatedBoxes):
- two `RotatedBoxes`. Contains N & M rotated boxes, respectively.
- Returns:
- Tensor: IoU, sized [N,M].
- """
- return pairwise_iou_rotated(boxes1.tensor, boxes2.tensor)
|