garmentiq.landmark.detection.utils

  1import math
  2import numpy as np
  3import cv2
  4from PIL import Image
  5import torchvision.transforms as transforms
  6from typing import Union
  7
  8
  9def get_max_preds(batch_heatmaps):
 10    """
 11    get predictions from score maps
 12    heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
 13
 14    Args:
 15        batch_heatmaps (numpy.ndarray): Heatmaps generated by the model.
 16                                       Shape: [batch_size, num_joints, height, width].
 17
 18    Returns:
 19        tuple:
 20            - preds (numpy.ndarray): Predicted coordinates.
 21            - maxvals (numpy.ndarray): Maximum values (confidence scores) for each prediction.
 22    """
 23    assert isinstance(
 24        batch_heatmaps, np.ndarray
 25    ), "batch_heatmaps should be numpy.ndarray"
 26    assert batch_heatmaps.ndim == 4, "batch_images should be 4-ndim"
 27
 28    batch_size = batch_heatmaps.shape[0]
 29    num_joints = batch_heatmaps.shape[1]
 30    width = batch_heatmaps.shape[3]
 31    heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
 32    idx = np.argmax(heatmaps_reshaped, 2)
 33    maxvals = np.amax(heatmaps_reshaped, 2)
 34
 35    maxvals = maxvals.reshape((batch_size, num_joints, 1))
 36    idx = idx.reshape((batch_size, num_joints, 1))
 37
 38    preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
 39
 40    preds[:, :, 0] = (preds[:, :, 0]) % width
 41    preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
 42
 43    pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
 44    pred_mask = pred_mask.astype(np.float32)
 45
 46    preds *= pred_mask
 47    return preds, maxvals
 48
 49
 50def get_final_preds(output, height=96, width=72):
 51    """
 52    Transforms raw heatmap outputs into final landmark coordinates.
 53
 54    Applies post-processing (e.g., quarter offset for sub-pixel accuracy)
 55    to refine the landmark predictions from heatmaps.
 56
 57    Args:
 58        output (numpy.ndarray): Raw heatmap output from the model.
 59        height (int): Height of the heatmap. Defaults to 96.
 60        width (int): Width of the heatmap. Defaults to 72.
 61
 62    Returns:
 63        tuple:
 64            - coords (numpy.ndarray): Final predicted coordinates.
 65            - maxvals (numpy.ndarray): Confidence scores for the predictions.
 66    """
 67    heatmap_height = height
 68    heatmap_width = width
 69
 70    batch_heatmaps = output
 71    coords, maxvals = get_max_preds(batch_heatmaps)
 72    # post-processing
 73    for n in range(coords.shape[0]):
 74        for p in range(coords.shape[1]):
 75            hm = batch_heatmaps[n][p]
 76            px = int(math.floor(coords[n][p][0] + 0.5))
 77            py = int(math.floor(coords[n][p][1] + 0.5))
 78            if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
 79                diff = np.array(
 80                    [hm[py][px + 1] - hm[py][px - 1], hm[py + 1][px] - hm[py - 1][px]]
 81                )
 82                coords[n][p] += np.sign(diff) * 0.25
 83
 84    return coords, maxvals
 85
 86
 87def flip_back(output_flipped, matched_parts, heatmap_wid):
 88    """
 89    Flips the output (coordinates or heatmaps) horizontally for test-time augmentation.
 90
 91    Args:
 92        output_flipped (numpy.ndarray): The output (heatmaps or coordinates) that has been flipped.
 93        matched_parts (list): A list of tuples indicating which joint pairs are left-right symmetric.
 94        heatmap_wid (int): The width of the heatmap (used for coordinate flipping).
 95
 96    Returns:
 97        numpy.ndarray: The flipped output with joints correctly reordered.
 98    """
 99    if output_flipped.ndim == 4:
100        output_flipped = output_flipped[:, :, :, ::-1]
101        for pair in matched_parts:
102            tmp = output_flipped[:, pair[0], :, :].copy()
103            output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
104            output_flipped[:, pair[1], :, :] = tmp
105    elif output_flipped.ndim == 3:
106        output_flipped[:, :, 0] = heatmap_wid - output_flipped[:, :, 0]
107        for pair in matched_parts:
108            tmp = output_flipped[:, pair[0], :].copy()
109            output_flipped[:, pair[0], :] = output_flipped[:, pair[1], :]
110            output_flipped[:, pair[1], :] = tmp
111    else:
112        raise NotImplementedError(
113            "output_flipped should be [batch_size, num_joints, height, width], "
114            "or [batch_size, num_joints, coord_dim"
115        )
116
117    return output_flipped
118
119
120def fliplr_joints(joints, joints_vis, width, matched_parts):
121    """
122    Flips joint coordinates horizontally and reorders them based on matched parts.
123
124    Args:
125        joints (numpy.ndarray): Array of joint coordinates.
126        joints_vis (numpy.ndarray): Array indicating visibility of joints.
127        width (int): Width of the image or feature map.
128        matched_parts (list): A list of tuples indicating which joint pairs are left-right symmetric.
129
130    Returns:
131        tuple:
132            - joints (numpy.ndarray): Flipped and reordered joint coordinates.
133            - joints_vis (numpy.ndarray): Corresponding joint visibility.
134    """
135    # Flip horizontal
136    joints[:, 0] = width - joints[:, 0] - 1
137
138    # Change left-right parts
139    for pair in matched_parts:
140        joints[pair[0], :], joints[pair[1], :] = (
141            joints[pair[1], :],
142            joints[pair[0], :].copy(),
143        )
144        joints_vis[pair[0], :], joints_vis[pair[1], :] = (
145            joints_vis[pair[1], :],
146            joints_vis[pair[0], :].copy(),
147        )
148
149    return joints * joints_vis, joints_vis
150
151
152def transform_preds(coords, center, scale, output_size: list[int, int] = [72, 96]):
153    """
154    Transforms predicted coordinates from heatmap space back to original image space.
155
156    Args:
157        coords (numpy.ndarray): Predicted coordinates in heatmap space.
158        center (numpy.ndarray): Center of the original image (or cropped region).
159        scale (numpy.ndarray): Scale factor applied during preprocessing.
160        output_size (list[int, int], optional): The size of the output image after transformation. Defaults to [72, 96].
161
162    Returns:
163        numpy.ndarray: Transformed coordinates in original image space.
164    """
165    target_coords = np.zeros(coords.shape)
166    trans = get_affine_transform(center, scale, 0, output_size, inv=1)
167    for p in range(coords.shape[0]):
168        target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
169    return target_coords
170
171
172def get_affine_transform(
173    center, scale, rot, output_size, shift=np.array([0, 0], dtype=np.float32), inv=0
174):
175    """
176    Calculates the 2x3 affine transformation matrix for image cropping and resizing.
177
178    Args:
179        center (numpy.ndarray): Center of the original image or region of interest.
180        scale (numpy.ndarray): Scale factor for the transformation.
181        rot (float): Rotation angle in degrees.
182        output_size (list): Target output size [width, height].
183        shift (numpy.ndarray, optional): Shift applied to the center. Defaults to [0, 0].
184        inv (int, optional): If 1, returns the inverse transformation matrix. Defaults to 0.
185
186    Returns:
187        numpy.ndarray: The 2x3 affine transformation matrix.
188    """
189    if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
190        print(scale)
191        scale = np.array([scale, scale])
192
193    scale_tmp = scale * 200.0
194    src_w = scale_tmp[0]
195    dst_w = output_size[0]
196    dst_h = output_size[1]
197
198    rot_rad = np.pi * rot / 180
199    src_dir = get_dir([0, src_w * -0.5], rot_rad)
200    dst_dir = np.array([0, dst_w * -0.5], np.float32)
201
202    src = np.zeros((3, 2), dtype=np.float32)
203    dst = np.zeros((3, 2), dtype=np.float32)
204    src[0, :] = center + scale_tmp * shift
205    src[1, :] = center + src_dir + scale_tmp * shift
206    dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
207    dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
208
209    src[2:, :] = get_3rd_point(src[0, :], src[1, :])
210    dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
211
212    if inv:
213        trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
214    else:
215        trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
216
217    return trans
218
219
220def affine_transform(pt, t):
221    """
222    Applies an affine transformation matrix to a 2D point.
223
224    Args:
225        pt (tuple or list): The 2D point (x, y) to transform.
226        t (numpy.ndarray): The 2x3 affine transformation matrix.
227
228    Returns:
229        numpy.ndarray: The transformed 2D point.
230    """
231    new_pt = np.array([pt[0], pt[1], 1.0]).T
232    new_pt = np.dot(t, new_pt)
233    return new_pt[:2]
234
235
236def get_3rd_point(a, b):
237    """
238    Calculates a third point to form a right-angled triangle with two given points.
239    Used for creating a 3-point basis for affine transformations.
240
241    Args:
242        a (numpy.ndarray): First point.
243        b (numpy.ndarray): Second point.
244
245    Returns:
246        numpy.ndarray: The calculated third point.
247    """
248    direct = a - b
249    return b + np.array([-direct[1], direct[0]], dtype=np.float32)
250
251
252def get_dir(src_point, rot_rad):
253    """
254    Calculates the direction vector after rotation.
255
256    Args:
257        src_point (list): Source point [x, y].
258        rot_rad (float): Rotation angle in radians.
259
260    Returns:
261        list: The rotated direction vector.
262    """
263    sn, cs = np.sin(rot_rad), np.cos(rot_rad)
264
265    src_result = [0, 0]
266    src_result[0] = src_point[0] * cs - src_point[1] * sn
267    src_result[1] = src_point[0] * sn + src_point[1] * cs
268
269    return src_result
270
271
272def crop(img, center, scale, output_size, rot=0):
273    """
274    Crops and resizes an image using an affine transformation.
275
276    Args:
277        img (numpy.ndarray): The input image.
278        center (numpy.ndarray): The center of the crop region.
279        scale (numpy.ndarray): The scale factor for the crop.
280        output_size (tuple): The target output size (width, height).
281        rot (int, optional): Rotation angle in degrees. Defaults to 0.
282
283    Returns:
284        numpy.ndarray: The cropped and transformed image.
285    """
286    trans = get_affine_transform(center, scale, rot, output_size)
287
288    dst_img = cv2.warpAffine(
289        img, trans, (int(output_size[0]), int(output_size[1])), flags=cv2.INTER_LINEAR
290    )
291
292    return dst_img
293
294
295def input_image_transform(
296    img_input: Union[str, np.ndarray],
297    scale_std: float = 200.0,
298    resize_dim: list[int] = [288, 384],
299    normalize_mean: list[float] = [0.485, 0.456, 0.406],
300    normalize_std: list[float] = [0.229, 0.224, 0.225],
301):
302    """
303    Preprocesses an input image for landmark detection.
304
305    This function takes an image (either path or NumPy array), applies an affine
306    transformation (scaling, centering), resizes it, converts it to a PyTorch tensor,
307    and normalizes it.
308
309    Args:
310        img_input (Union[str, np.ndarray]): Path to the image file or a NumPy array of the image.
311        scale_std (float, optional): Standard scale for image transformation. Defaults to 200.0.
312        resize_dim (list[int], optional): Target dimensions [width, height] for the transformed image.
313                                         Defaults to [288, 384].
314        normalize_mean (list[float], optional): Mean values for image normalization (RGB channels).
315                                                Defaults to [0.485, 0.456, 0.406].
316        normalize_std (list[float], optional): Standard deviation values for image normalization (RGB channels).
317                                               Defaults to [0.229, 0.224, 0.225].
318
319    Raises:
320        ValueError: If `img_input` is neither a file path nor a NumPy array.
321
322    Returns:
323        tuple:
324            - input_tensor (torch.Tensor): The preprocessed image as a PyTorch tensor, ready for model input.
325            - image_np (numpy.ndarray): The original image as a NumPy array (RGB).
326            - center (numpy.ndarray): The center of the original image used for transformation.
327            - scale (numpy.ndarray): The scale factor used for transformation.
328    """
329    if isinstance(img_input, str):
330        img = Image.open(img_input).convert("RGB")
331    elif isinstance(img_input, np.ndarray):
332        img = Image.fromarray(img_input.astype(np.uint8))
333    else:
334        raise ValueError("img_input must be a file path or a NumPy array.")
335
336    image_np = np.array(img)
337
338    h, w = image_np.shape[:2]
339    center = np.array([w / 2, h / 2], dtype=np.float32)
340    scale = np.array([w / scale_std, h / scale_std], dtype=np.float32)
341    image_size = np.array(resize_dim)
342    rotation = 0
343
344    trans = get_affine_transform(center, scale, rotation, image_size)
345    warped_image = cv2.warpAffine(
346        image_np,
347        trans,
348        (int(image_size[0]), int(image_size[1])),
349        flags=cv2.INTER_LINEAR,
350    )
351
352    to_tensor = transforms.ToTensor()
353    normalize = transforms.Normalize(normalize_mean, normalize_std)
354    input_tensor = normalize(to_tensor(warped_image)).unsqueeze(0)
355
356    return input_tensor, image_np, center, scale
def get_max_preds(batch_heatmaps):
10def get_max_preds(batch_heatmaps):
11    """
12    get predictions from score maps
13    heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
14
15    Args:
16        batch_heatmaps (numpy.ndarray): Heatmaps generated by the model.
17                                       Shape: [batch_size, num_joints, height, width].
18
19    Returns:
20        tuple:
21            - preds (numpy.ndarray): Predicted coordinates.
22            - maxvals (numpy.ndarray): Maximum values (confidence scores) for each prediction.
23    """
24    assert isinstance(
25        batch_heatmaps, np.ndarray
26    ), "batch_heatmaps should be numpy.ndarray"
27    assert batch_heatmaps.ndim == 4, "batch_images should be 4-ndim"
28
29    batch_size = batch_heatmaps.shape[0]
30    num_joints = batch_heatmaps.shape[1]
31    width = batch_heatmaps.shape[3]
32    heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
33    idx = np.argmax(heatmaps_reshaped, 2)
34    maxvals = np.amax(heatmaps_reshaped, 2)
35
36    maxvals = maxvals.reshape((batch_size, num_joints, 1))
37    idx = idx.reshape((batch_size, num_joints, 1))
38
39    preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
40
41    preds[:, :, 0] = (preds[:, :, 0]) % width
42    preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
43
44    pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
45    pred_mask = pred_mask.astype(np.float32)
46
47    preds *= pred_mask
48    return preds, maxvals

get predictions from score maps heatmaps: numpy.ndarray([batch_size, num_joints, height, width])

Arguments:
  • batch_heatmaps (numpy.ndarray): Heatmaps generated by the model. Shape: [batch_size, num_joints, height, width].
Returns:

tuple: - preds (numpy.ndarray): Predicted coordinates. - maxvals (numpy.ndarray): Maximum values (confidence scores) for each prediction.

def get_final_preds(output, height=96, width=72):
51def get_final_preds(output, height=96, width=72):
52    """
53    Transforms raw heatmap outputs into final landmark coordinates.
54
55    Applies post-processing (e.g., quarter offset for sub-pixel accuracy)
56    to refine the landmark predictions from heatmaps.
57
58    Args:
59        output (numpy.ndarray): Raw heatmap output from the model.
60        height (int): Height of the heatmap. Defaults to 96.
61        width (int): Width of the heatmap. Defaults to 72.
62
63    Returns:
64        tuple:
65            - coords (numpy.ndarray): Final predicted coordinates.
66            - maxvals (numpy.ndarray): Confidence scores for the predictions.
67    """
68    heatmap_height = height
69    heatmap_width = width
70
71    batch_heatmaps = output
72    coords, maxvals = get_max_preds(batch_heatmaps)
73    # post-processing
74    for n in range(coords.shape[0]):
75        for p in range(coords.shape[1]):
76            hm = batch_heatmaps[n][p]
77            px = int(math.floor(coords[n][p][0] + 0.5))
78            py = int(math.floor(coords[n][p][1] + 0.5))
79            if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
80                diff = np.array(
81                    [hm[py][px + 1] - hm[py][px - 1], hm[py + 1][px] - hm[py - 1][px]]
82                )
83                coords[n][p] += np.sign(diff) * 0.25
84
85    return coords, maxvals

Transforms raw heatmap outputs into final landmark coordinates.

Applies post-processing (e.g., quarter offset for sub-pixel accuracy) to refine the landmark predictions from heatmaps.

Arguments:
  • output (numpy.ndarray): Raw heatmap output from the model.
  • height (int): Height of the heatmap. Defaults to 96.
  • width (int): Width of the heatmap. Defaults to 72.
Returns:

tuple: - coords (numpy.ndarray): Final predicted coordinates. - maxvals (numpy.ndarray): Confidence scores for the predictions.

def flip_back(output_flipped, matched_parts, heatmap_wid):
 88def flip_back(output_flipped, matched_parts, heatmap_wid):
 89    """
 90    Flips the output (coordinates or heatmaps) horizontally for test-time augmentation.
 91
 92    Args:
 93        output_flipped (numpy.ndarray): The output (heatmaps or coordinates) that has been flipped.
 94        matched_parts (list): A list of tuples indicating which joint pairs are left-right symmetric.
 95        heatmap_wid (int): The width of the heatmap (used for coordinate flipping).
 96
 97    Returns:
 98        numpy.ndarray: The flipped output with joints correctly reordered.
 99    """
100    if output_flipped.ndim == 4:
101        output_flipped = output_flipped[:, :, :, ::-1]
102        for pair in matched_parts:
103            tmp = output_flipped[:, pair[0], :, :].copy()
104            output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
105            output_flipped[:, pair[1], :, :] = tmp
106    elif output_flipped.ndim == 3:
107        output_flipped[:, :, 0] = heatmap_wid - output_flipped[:, :, 0]
108        for pair in matched_parts:
109            tmp = output_flipped[:, pair[0], :].copy()
110            output_flipped[:, pair[0], :] = output_flipped[:, pair[1], :]
111            output_flipped[:, pair[1], :] = tmp
112    else:
113        raise NotImplementedError(
114            "output_flipped should be [batch_size, num_joints, height, width], "
115            "or [batch_size, num_joints, coord_dim"
116        )
117
118    return output_flipped

Flips the output (coordinates or heatmaps) horizontally for test-time augmentation.

Arguments:
  • output_flipped (numpy.ndarray): The output (heatmaps or coordinates) that has been flipped.
  • matched_parts (list): A list of tuples indicating which joint pairs are left-right symmetric.
  • heatmap_wid (int): The width of the heatmap (used for coordinate flipping).
Returns:

numpy.ndarray: The flipped output with joints correctly reordered.

def fliplr_joints(joints, joints_vis, width, matched_parts):
121def fliplr_joints(joints, joints_vis, width, matched_parts):
122    """
123    Flips joint coordinates horizontally and reorders them based on matched parts.
124
125    Args:
126        joints (numpy.ndarray): Array of joint coordinates.
127        joints_vis (numpy.ndarray): Array indicating visibility of joints.
128        width (int): Width of the image or feature map.
129        matched_parts (list): A list of tuples indicating which joint pairs are left-right symmetric.
130
131    Returns:
132        tuple:
133            - joints (numpy.ndarray): Flipped and reordered joint coordinates.
134            - joints_vis (numpy.ndarray): Corresponding joint visibility.
135    """
136    # Flip horizontal
137    joints[:, 0] = width - joints[:, 0] - 1
138
139    # Change left-right parts
140    for pair in matched_parts:
141        joints[pair[0], :], joints[pair[1], :] = (
142            joints[pair[1], :],
143            joints[pair[0], :].copy(),
144        )
145        joints_vis[pair[0], :], joints_vis[pair[1], :] = (
146            joints_vis[pair[1], :],
147            joints_vis[pair[0], :].copy(),
148        )
149
150    return joints * joints_vis, joints_vis

Flips joint coordinates horizontally and reorders them based on matched parts.

Arguments:
  • joints (numpy.ndarray): Array of joint coordinates.
  • joints_vis (numpy.ndarray): Array indicating visibility of joints.
  • width (int): Width of the image or feature map.
  • matched_parts (list): A list of tuples indicating which joint pairs are left-right symmetric.
Returns:

tuple: - joints (numpy.ndarray): Flipped and reordered joint coordinates. - joints_vis (numpy.ndarray): Corresponding joint visibility.

def transform_preds(coords, center, scale, output_size: list[int, int] = [72, 96]):
153def transform_preds(coords, center, scale, output_size: list[int, int] = [72, 96]):
154    """
155    Transforms predicted coordinates from heatmap space back to original image space.
156
157    Args:
158        coords (numpy.ndarray): Predicted coordinates in heatmap space.
159        center (numpy.ndarray): Center of the original image (or cropped region).
160        scale (numpy.ndarray): Scale factor applied during preprocessing.
161        output_size (list[int, int], optional): The size of the output image after transformation. Defaults to [72, 96].
162
163    Returns:
164        numpy.ndarray: Transformed coordinates in original image space.
165    """
166    target_coords = np.zeros(coords.shape)
167    trans = get_affine_transform(center, scale, 0, output_size, inv=1)
168    for p in range(coords.shape[0]):
169        target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
170    return target_coords

Transforms predicted coordinates from heatmap space back to original image space.

Arguments:
  • coords (numpy.ndarray): Predicted coordinates in heatmap space.
  • center (numpy.ndarray): Center of the original image (or cropped region).
  • scale (numpy.ndarray): Scale factor applied during preprocessing.
  • output_size (list[int, int], optional): The size of the output image after transformation. Defaults to [72, 96].
Returns:

numpy.ndarray: Transformed coordinates in original image space.

def get_affine_transform( center, scale, rot, output_size, shift=array([0., 0.], dtype=float32), inv=0):
173def get_affine_transform(
174    center, scale, rot, output_size, shift=np.array([0, 0], dtype=np.float32), inv=0
175):
176    """
177    Calculates the 2x3 affine transformation matrix for image cropping and resizing.
178
179    Args:
180        center (numpy.ndarray): Center of the original image or region of interest.
181        scale (numpy.ndarray): Scale factor for the transformation.
182        rot (float): Rotation angle in degrees.
183        output_size (list): Target output size [width, height].
184        shift (numpy.ndarray, optional): Shift applied to the center. Defaults to [0, 0].
185        inv (int, optional): If 1, returns the inverse transformation matrix. Defaults to 0.
186
187    Returns:
188        numpy.ndarray: The 2x3 affine transformation matrix.
189    """
190    if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
191        print(scale)
192        scale = np.array([scale, scale])
193
194    scale_tmp = scale * 200.0
195    src_w = scale_tmp[0]
196    dst_w = output_size[0]
197    dst_h = output_size[1]
198
199    rot_rad = np.pi * rot / 180
200    src_dir = get_dir([0, src_w * -0.5], rot_rad)
201    dst_dir = np.array([0, dst_w * -0.5], np.float32)
202
203    src = np.zeros((3, 2), dtype=np.float32)
204    dst = np.zeros((3, 2), dtype=np.float32)
205    src[0, :] = center + scale_tmp * shift
206    src[1, :] = center + src_dir + scale_tmp * shift
207    dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
208    dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
209
210    src[2:, :] = get_3rd_point(src[0, :], src[1, :])
211    dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
212
213    if inv:
214        trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
215    else:
216        trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
217
218    return trans

Calculates the 2x3 affine transformation matrix for image cropping and resizing.

Arguments:
  • center (numpy.ndarray): Center of the original image or region of interest.
  • scale (numpy.ndarray): Scale factor for the transformation.
  • rot (float): Rotation angle in degrees.
  • output_size (list): Target output size [width, height].
  • shift (numpy.ndarray, optional): Shift applied to the center. Defaults to [0, 0].
  • inv (int, optional): If 1, returns the inverse transformation matrix. Defaults to 0.
Returns:

numpy.ndarray: The 2x3 affine transformation matrix.

def affine_transform(pt, t):
221def affine_transform(pt, t):
222    """
223    Applies an affine transformation matrix to a 2D point.
224
225    Args:
226        pt (tuple or list): The 2D point (x, y) to transform.
227        t (numpy.ndarray): The 2x3 affine transformation matrix.
228
229    Returns:
230        numpy.ndarray: The transformed 2D point.
231    """
232    new_pt = np.array([pt[0], pt[1], 1.0]).T
233    new_pt = np.dot(t, new_pt)
234    return new_pt[:2]

Applies an affine transformation matrix to a 2D point.

Arguments:
  • pt (tuple or list): The 2D point (x, y) to transform.
  • t (numpy.ndarray): The 2x3 affine transformation matrix.
Returns:

numpy.ndarray: The transformed 2D point.

def get_3rd_point(a, b):
237def get_3rd_point(a, b):
238    """
239    Calculates a third point to form a right-angled triangle with two given points.
240    Used for creating a 3-point basis for affine transformations.
241
242    Args:
243        a (numpy.ndarray): First point.
244        b (numpy.ndarray): Second point.
245
246    Returns:
247        numpy.ndarray: The calculated third point.
248    """
249    direct = a - b
250    return b + np.array([-direct[1], direct[0]], dtype=np.float32)

Calculates a third point to form a right-angled triangle with two given points. Used for creating a 3-point basis for affine transformations.

Arguments:
  • a (numpy.ndarray): First point.
  • b (numpy.ndarray): Second point.
Returns:

numpy.ndarray: The calculated third point.

def get_dir(src_point, rot_rad):
253def get_dir(src_point, rot_rad):
254    """
255    Calculates the direction vector after rotation.
256
257    Args:
258        src_point (list): Source point [x, y].
259        rot_rad (float): Rotation angle in radians.
260
261    Returns:
262        list: The rotated direction vector.
263    """
264    sn, cs = np.sin(rot_rad), np.cos(rot_rad)
265
266    src_result = [0, 0]
267    src_result[0] = src_point[0] * cs - src_point[1] * sn
268    src_result[1] = src_point[0] * sn + src_point[1] * cs
269
270    return src_result

Calculates the direction vector after rotation.

Arguments:
  • src_point (list): Source point [x, y].
  • rot_rad (float): Rotation angle in radians.
Returns:

list: The rotated direction vector.

def crop(img, center, scale, output_size, rot=0):
273def crop(img, center, scale, output_size, rot=0):
274    """
275    Crops and resizes an image using an affine transformation.
276
277    Args:
278        img (numpy.ndarray): The input image.
279        center (numpy.ndarray): The center of the crop region.
280        scale (numpy.ndarray): The scale factor for the crop.
281        output_size (tuple): The target output size (width, height).
282        rot (int, optional): Rotation angle in degrees. Defaults to 0.
283
284    Returns:
285        numpy.ndarray: The cropped and transformed image.
286    """
287    trans = get_affine_transform(center, scale, rot, output_size)
288
289    dst_img = cv2.warpAffine(
290        img, trans, (int(output_size[0]), int(output_size[1])), flags=cv2.INTER_LINEAR
291    )
292
293    return dst_img

Crops and resizes an image using an affine transformation.

Arguments:
  • img (numpy.ndarray): The input image.
  • center (numpy.ndarray): The center of the crop region.
  • scale (numpy.ndarray): The scale factor for the crop.
  • output_size (tuple): The target output size (width, height).
  • rot (int, optional): Rotation angle in degrees. Defaults to 0.
Returns:

numpy.ndarray: The cropped and transformed image.

def input_image_transform( img_input: Union[str, numpy.ndarray], scale_std: float = 200.0, resize_dim: list[int] = [288, 384], normalize_mean: list[float] = [0.485, 0.456, 0.406], normalize_std: list[float] = [0.229, 0.224, 0.225]):
296def input_image_transform(
297    img_input: Union[str, np.ndarray],
298    scale_std: float = 200.0,
299    resize_dim: list[int] = [288, 384],
300    normalize_mean: list[float] = [0.485, 0.456, 0.406],
301    normalize_std: list[float] = [0.229, 0.224, 0.225],
302):
303    """
304    Preprocesses an input image for landmark detection.
305
306    This function takes an image (either path or NumPy array), applies an affine
307    transformation (scaling, centering), resizes it, converts it to a PyTorch tensor,
308    and normalizes it.
309
310    Args:
311        img_input (Union[str, np.ndarray]): Path to the image file or a NumPy array of the image.
312        scale_std (float, optional): Standard scale for image transformation. Defaults to 200.0.
313        resize_dim (list[int], optional): Target dimensions [width, height] for the transformed image.
314                                         Defaults to [288, 384].
315        normalize_mean (list[float], optional): Mean values for image normalization (RGB channels).
316                                                Defaults to [0.485, 0.456, 0.406].
317        normalize_std (list[float], optional): Standard deviation values for image normalization (RGB channels).
318                                               Defaults to [0.229, 0.224, 0.225].
319
320    Raises:
321        ValueError: If `img_input` is neither a file path nor a NumPy array.
322
323    Returns:
324        tuple:
325            - input_tensor (torch.Tensor): The preprocessed image as a PyTorch tensor, ready for model input.
326            - image_np (numpy.ndarray): The original image as a NumPy array (RGB).
327            - center (numpy.ndarray): The center of the original image used for transformation.
328            - scale (numpy.ndarray): The scale factor used for transformation.
329    """
330    if isinstance(img_input, str):
331        img = Image.open(img_input).convert("RGB")
332    elif isinstance(img_input, np.ndarray):
333        img = Image.fromarray(img_input.astype(np.uint8))
334    else:
335        raise ValueError("img_input must be a file path or a NumPy array.")
336
337    image_np = np.array(img)
338
339    h, w = image_np.shape[:2]
340    center = np.array([w / 2, h / 2], dtype=np.float32)
341    scale = np.array([w / scale_std, h / scale_std], dtype=np.float32)
342    image_size = np.array(resize_dim)
343    rotation = 0
344
345    trans = get_affine_transform(center, scale, rotation, image_size)
346    warped_image = cv2.warpAffine(
347        image_np,
348        trans,
349        (int(image_size[0]), int(image_size[1])),
350        flags=cv2.INTER_LINEAR,
351    )
352
353    to_tensor = transforms.ToTensor()
354    normalize = transforms.Normalize(normalize_mean, normalize_std)
355    input_tensor = normalize(to_tensor(warped_image)).unsqueeze(0)
356
357    return input_tensor, image_np, center, scale

Preprocesses an input image for landmark detection.

This function takes an image (either path or NumPy array), applies an affine transformation (scaling, centering), resizes it, converts it to a PyTorch tensor, and normalizes it.

Arguments:
  • img_input (Union[str, np.ndarray]): Path to the image file or a NumPy array of the image.
  • scale_std (float, optional): Standard scale for image transformation. Defaults to 200.0.
  • resize_dim (list[int], optional): Target dimensions [width, height] for the transformed image. Defaults to [288, 384].
  • normalize_mean (list[float], optional): Mean values for image normalization (RGB channels). Defaults to [0.485, 0.456, 0.406].
  • normalize_std (list[float], optional): Standard deviation values for image normalization (RGB channels). Defaults to [0.229, 0.224, 0.225].
Raises:
  • ValueError: If img_input is neither a file path nor a NumPy array.
Returns:

tuple: - input_tensor (torch.Tensor): The preprocessed image as a PyTorch tensor, ready for model input. - image_np (numpy.ndarray): The original image as a NumPy array (RGB). - center (numpy.ndarray): The center of the original image used for transformation. - scale (numpy.ndarray): The scale factor used for transformation.