garmentiq.landmark.detection.utils
1import math 2import numpy as np 3import cv2 4from PIL import Image 5import torchvision.transforms as transforms 6from typing import Union 7 8 9def get_max_preds(batch_heatmaps): 10 """ 11 get predictions from score maps 12 heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) 13 14 Args: 15 batch_heatmaps (numpy.ndarray): Heatmaps generated by the model. 16 Shape: [batch_size, num_joints, height, width]. 17 18 Returns: 19 tuple: 20 - preds (numpy.ndarray): Predicted coordinates. 21 - maxvals (numpy.ndarray): Maximum values (confidence scores) for each prediction. 22 """ 23 assert isinstance( 24 batch_heatmaps, np.ndarray 25 ), "batch_heatmaps should be numpy.ndarray" 26 assert batch_heatmaps.ndim == 4, "batch_images should be 4-ndim" 27 28 batch_size = batch_heatmaps.shape[0] 29 num_joints = batch_heatmaps.shape[1] 30 width = batch_heatmaps.shape[3] 31 heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) 32 idx = np.argmax(heatmaps_reshaped, 2) 33 maxvals = np.amax(heatmaps_reshaped, 2) 34 35 maxvals = maxvals.reshape((batch_size, num_joints, 1)) 36 idx = idx.reshape((batch_size, num_joints, 1)) 37 38 preds = np.tile(idx, (1, 1, 2)).astype(np.float32) 39 40 preds[:, :, 0] = (preds[:, :, 0]) % width 41 preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) 42 43 pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) 44 pred_mask = pred_mask.astype(np.float32) 45 46 preds *= pred_mask 47 return preds, maxvals 48 49 50def get_final_preds(output, height=96, width=72): 51 """ 52 Transforms raw heatmap outputs into final landmark coordinates. 53 54 Applies post-processing (e.g., quarter offset for sub-pixel accuracy) 55 to refine the landmark predictions from heatmaps. 56 57 Args: 58 output (numpy.ndarray): Raw heatmap output from the model. 59 height (int): Height of the heatmap. Defaults to 96. 60 width (int): Width of the heatmap. Defaults to 72. 61 62 Returns: 63 tuple: 64 - coords (numpy.ndarray): Final predicted coordinates. 65 - maxvals (numpy.ndarray): Confidence scores for the predictions. 66 """ 67 heatmap_height = height 68 heatmap_width = width 69 70 batch_heatmaps = output 71 coords, maxvals = get_max_preds(batch_heatmaps) 72 # post-processing 73 for n in range(coords.shape[0]): 74 for p in range(coords.shape[1]): 75 hm = batch_heatmaps[n][p] 76 px = int(math.floor(coords[n][p][0] + 0.5)) 77 py = int(math.floor(coords[n][p][1] + 0.5)) 78 if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: 79 diff = np.array( 80 [hm[py][px + 1] - hm[py][px - 1], hm[py + 1][px] - hm[py - 1][px]] 81 ) 82 coords[n][p] += np.sign(diff) * 0.25 83 84 return coords, maxvals 85 86 87def flip_back(output_flipped, matched_parts, heatmap_wid): 88 """ 89 Flips the output (coordinates or heatmaps) horizontally for test-time augmentation. 90 91 Args: 92 output_flipped (numpy.ndarray): The output (heatmaps or coordinates) that has been flipped. 93 matched_parts (list): A list of tuples indicating which joint pairs are left-right symmetric. 94 heatmap_wid (int): The width of the heatmap (used for coordinate flipping). 95 96 Returns: 97 numpy.ndarray: The flipped output with joints correctly reordered. 98 """ 99 if output_flipped.ndim == 4: 100 output_flipped = output_flipped[:, :, :, ::-1] 101 for pair in matched_parts: 102 tmp = output_flipped[:, pair[0], :, :].copy() 103 output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :] 104 output_flipped[:, pair[1], :, :] = tmp 105 elif output_flipped.ndim == 3: 106 output_flipped[:, :, 0] = heatmap_wid - output_flipped[:, :, 0] 107 for pair in matched_parts: 108 tmp = output_flipped[:, pair[0], :].copy() 109 output_flipped[:, pair[0], :] = output_flipped[:, pair[1], :] 110 output_flipped[:, pair[1], :] = tmp 111 else: 112 raise NotImplementedError( 113 "output_flipped should be [batch_size, num_joints, height, width], " 114 "or [batch_size, num_joints, coord_dim" 115 ) 116 117 return output_flipped 118 119 120def fliplr_joints(joints, joints_vis, width, matched_parts): 121 """ 122 Flips joint coordinates horizontally and reorders them based on matched parts. 123 124 Args: 125 joints (numpy.ndarray): Array of joint coordinates. 126 joints_vis (numpy.ndarray): Array indicating visibility of joints. 127 width (int): Width of the image or feature map. 128 matched_parts (list): A list of tuples indicating which joint pairs are left-right symmetric. 129 130 Returns: 131 tuple: 132 - joints (numpy.ndarray): Flipped and reordered joint coordinates. 133 - joints_vis (numpy.ndarray): Corresponding joint visibility. 134 """ 135 # Flip horizontal 136 joints[:, 0] = width - joints[:, 0] - 1 137 138 # Change left-right parts 139 for pair in matched_parts: 140 joints[pair[0], :], joints[pair[1], :] = ( 141 joints[pair[1], :], 142 joints[pair[0], :].copy(), 143 ) 144 joints_vis[pair[0], :], joints_vis[pair[1], :] = ( 145 joints_vis[pair[1], :], 146 joints_vis[pair[0], :].copy(), 147 ) 148 149 return joints * joints_vis, joints_vis 150 151 152def transform_preds(coords, center, scale, output_size: list[int, int] = [72, 96]): 153 """ 154 Transforms predicted coordinates from heatmap space back to original image space. 155 156 Args: 157 coords (numpy.ndarray): Predicted coordinates in heatmap space. 158 center (numpy.ndarray): Center of the original image (or cropped region). 159 scale (numpy.ndarray): Scale factor applied during preprocessing. 160 output_size (list[int, int], optional): The size of the output image after transformation. Defaults to [72, 96]. 161 162 Returns: 163 numpy.ndarray: Transformed coordinates in original image space. 164 """ 165 target_coords = np.zeros(coords.shape) 166 trans = get_affine_transform(center, scale, 0, output_size, inv=1) 167 for p in range(coords.shape[0]): 168 target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) 169 return target_coords 170 171 172def get_affine_transform( 173 center, scale, rot, output_size, shift=np.array([0, 0], dtype=np.float32), inv=0 174): 175 """ 176 Calculates the 2x3 affine transformation matrix for image cropping and resizing. 177 178 Args: 179 center (numpy.ndarray): Center of the original image or region of interest. 180 scale (numpy.ndarray): Scale factor for the transformation. 181 rot (float): Rotation angle in degrees. 182 output_size (list): Target output size [width, height]. 183 shift (numpy.ndarray, optional): Shift applied to the center. Defaults to [0, 0]. 184 inv (int, optional): If 1, returns the inverse transformation matrix. Defaults to 0. 185 186 Returns: 187 numpy.ndarray: The 2x3 affine transformation matrix. 188 """ 189 if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 190 print(scale) 191 scale = np.array([scale, scale]) 192 193 scale_tmp = scale * 200.0 194 src_w = scale_tmp[0] 195 dst_w = output_size[0] 196 dst_h = output_size[1] 197 198 rot_rad = np.pi * rot / 180 199 src_dir = get_dir([0, src_w * -0.5], rot_rad) 200 dst_dir = np.array([0, dst_w * -0.5], np.float32) 201 202 src = np.zeros((3, 2), dtype=np.float32) 203 dst = np.zeros((3, 2), dtype=np.float32) 204 src[0, :] = center + scale_tmp * shift 205 src[1, :] = center + src_dir + scale_tmp * shift 206 dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 207 dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir 208 209 src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 210 dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 211 212 if inv: 213 trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 214 else: 215 trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 216 217 return trans 218 219 220def affine_transform(pt, t): 221 """ 222 Applies an affine transformation matrix to a 2D point. 223 224 Args: 225 pt (tuple or list): The 2D point (x, y) to transform. 226 t (numpy.ndarray): The 2x3 affine transformation matrix. 227 228 Returns: 229 numpy.ndarray: The transformed 2D point. 230 """ 231 new_pt = np.array([pt[0], pt[1], 1.0]).T 232 new_pt = np.dot(t, new_pt) 233 return new_pt[:2] 234 235 236def get_3rd_point(a, b): 237 """ 238 Calculates a third point to form a right-angled triangle with two given points. 239 Used for creating a 3-point basis for affine transformations. 240 241 Args: 242 a (numpy.ndarray): First point. 243 b (numpy.ndarray): Second point. 244 245 Returns: 246 numpy.ndarray: The calculated third point. 247 """ 248 direct = a - b 249 return b + np.array([-direct[1], direct[0]], dtype=np.float32) 250 251 252def get_dir(src_point, rot_rad): 253 """ 254 Calculates the direction vector after rotation. 255 256 Args: 257 src_point (list): Source point [x, y]. 258 rot_rad (float): Rotation angle in radians. 259 260 Returns: 261 list: The rotated direction vector. 262 """ 263 sn, cs = np.sin(rot_rad), np.cos(rot_rad) 264 265 src_result = [0, 0] 266 src_result[0] = src_point[0] * cs - src_point[1] * sn 267 src_result[1] = src_point[0] * sn + src_point[1] * cs 268 269 return src_result 270 271 272def crop(img, center, scale, output_size, rot=0): 273 """ 274 Crops and resizes an image using an affine transformation. 275 276 Args: 277 img (numpy.ndarray): The input image. 278 center (numpy.ndarray): The center of the crop region. 279 scale (numpy.ndarray): The scale factor for the crop. 280 output_size (tuple): The target output size (width, height). 281 rot (int, optional): Rotation angle in degrees. Defaults to 0. 282 283 Returns: 284 numpy.ndarray: The cropped and transformed image. 285 """ 286 trans = get_affine_transform(center, scale, rot, output_size) 287 288 dst_img = cv2.warpAffine( 289 img, trans, (int(output_size[0]), int(output_size[1])), flags=cv2.INTER_LINEAR 290 ) 291 292 return dst_img 293 294 295def input_image_transform( 296 img_input: Union[str, np.ndarray], 297 scale_std: float = 200.0, 298 resize_dim: list[int] = [288, 384], 299 normalize_mean: list[float] = [0.485, 0.456, 0.406], 300 normalize_std: list[float] = [0.229, 0.224, 0.225], 301): 302 """ 303 Preprocesses an input image for landmark detection. 304 305 This function takes an image (either path or NumPy array), applies an affine 306 transformation (scaling, centering), resizes it, converts it to a PyTorch tensor, 307 and normalizes it. 308 309 Args: 310 img_input (Union[str, np.ndarray]): Path to the image file or a NumPy array of the image. 311 scale_std (float, optional): Standard scale for image transformation. Defaults to 200.0. 312 resize_dim (list[int], optional): Target dimensions [width, height] for the transformed image. 313 Defaults to [288, 384]. 314 normalize_mean (list[float], optional): Mean values for image normalization (RGB channels). 315 Defaults to [0.485, 0.456, 0.406]. 316 normalize_std (list[float], optional): Standard deviation values for image normalization (RGB channels). 317 Defaults to [0.229, 0.224, 0.225]. 318 319 Raises: 320 ValueError: If `img_input` is neither a file path nor a NumPy array. 321 322 Returns: 323 tuple: 324 - input_tensor (torch.Tensor): The preprocessed image as a PyTorch tensor, ready for model input. 325 - image_np (numpy.ndarray): The original image as a NumPy array (RGB). 326 - center (numpy.ndarray): The center of the original image used for transformation. 327 - scale (numpy.ndarray): The scale factor used for transformation. 328 """ 329 if isinstance(img_input, str): 330 img = Image.open(img_input).convert("RGB") 331 elif isinstance(img_input, np.ndarray): 332 img = Image.fromarray(img_input.astype(np.uint8)) 333 else: 334 raise ValueError("img_input must be a file path or a NumPy array.") 335 336 image_np = np.array(img) 337 338 h, w = image_np.shape[:2] 339 center = np.array([w / 2, h / 2], dtype=np.float32) 340 scale = np.array([w / scale_std, h / scale_std], dtype=np.float32) 341 image_size = np.array(resize_dim) 342 rotation = 0 343 344 trans = get_affine_transform(center, scale, rotation, image_size) 345 warped_image = cv2.warpAffine( 346 image_np, 347 trans, 348 (int(image_size[0]), int(image_size[1])), 349 flags=cv2.INTER_LINEAR, 350 ) 351 352 to_tensor = transforms.ToTensor() 353 normalize = transforms.Normalize(normalize_mean, normalize_std) 354 input_tensor = normalize(to_tensor(warped_image)).unsqueeze(0) 355 356 return input_tensor, image_np, center, scale
10def get_max_preds(batch_heatmaps): 11 """ 12 get predictions from score maps 13 heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) 14 15 Args: 16 batch_heatmaps (numpy.ndarray): Heatmaps generated by the model. 17 Shape: [batch_size, num_joints, height, width]. 18 19 Returns: 20 tuple: 21 - preds (numpy.ndarray): Predicted coordinates. 22 - maxvals (numpy.ndarray): Maximum values (confidence scores) for each prediction. 23 """ 24 assert isinstance( 25 batch_heatmaps, np.ndarray 26 ), "batch_heatmaps should be numpy.ndarray" 27 assert batch_heatmaps.ndim == 4, "batch_images should be 4-ndim" 28 29 batch_size = batch_heatmaps.shape[0] 30 num_joints = batch_heatmaps.shape[1] 31 width = batch_heatmaps.shape[3] 32 heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) 33 idx = np.argmax(heatmaps_reshaped, 2) 34 maxvals = np.amax(heatmaps_reshaped, 2) 35 36 maxvals = maxvals.reshape((batch_size, num_joints, 1)) 37 idx = idx.reshape((batch_size, num_joints, 1)) 38 39 preds = np.tile(idx, (1, 1, 2)).astype(np.float32) 40 41 preds[:, :, 0] = (preds[:, :, 0]) % width 42 preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) 43 44 pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) 45 pred_mask = pred_mask.astype(np.float32) 46 47 preds *= pred_mask 48 return preds, maxvals
get predictions from score maps heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
Arguments:
- batch_heatmaps (numpy.ndarray): Heatmaps generated by the model. Shape: [batch_size, num_joints, height, width].
Returns:
tuple: - preds (numpy.ndarray): Predicted coordinates. - maxvals (numpy.ndarray): Maximum values (confidence scores) for each prediction.
51def get_final_preds(output, height=96, width=72): 52 """ 53 Transforms raw heatmap outputs into final landmark coordinates. 54 55 Applies post-processing (e.g., quarter offset for sub-pixel accuracy) 56 to refine the landmark predictions from heatmaps. 57 58 Args: 59 output (numpy.ndarray): Raw heatmap output from the model. 60 height (int): Height of the heatmap. Defaults to 96. 61 width (int): Width of the heatmap. Defaults to 72. 62 63 Returns: 64 tuple: 65 - coords (numpy.ndarray): Final predicted coordinates. 66 - maxvals (numpy.ndarray): Confidence scores for the predictions. 67 """ 68 heatmap_height = height 69 heatmap_width = width 70 71 batch_heatmaps = output 72 coords, maxvals = get_max_preds(batch_heatmaps) 73 # post-processing 74 for n in range(coords.shape[0]): 75 for p in range(coords.shape[1]): 76 hm = batch_heatmaps[n][p] 77 px = int(math.floor(coords[n][p][0] + 0.5)) 78 py = int(math.floor(coords[n][p][1] + 0.5)) 79 if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: 80 diff = np.array( 81 [hm[py][px + 1] - hm[py][px - 1], hm[py + 1][px] - hm[py - 1][px]] 82 ) 83 coords[n][p] += np.sign(diff) * 0.25 84 85 return coords, maxvals
Transforms raw heatmap outputs into final landmark coordinates.
Applies post-processing (e.g., quarter offset for sub-pixel accuracy) to refine the landmark predictions from heatmaps.
Arguments:
- output (numpy.ndarray): Raw heatmap output from the model.
- height (int): Height of the heatmap. Defaults to 96.
- width (int): Width of the heatmap. Defaults to 72.
Returns:
tuple: - coords (numpy.ndarray): Final predicted coordinates. - maxvals (numpy.ndarray): Confidence scores for the predictions.
88def flip_back(output_flipped, matched_parts, heatmap_wid): 89 """ 90 Flips the output (coordinates or heatmaps) horizontally for test-time augmentation. 91 92 Args: 93 output_flipped (numpy.ndarray): The output (heatmaps or coordinates) that has been flipped. 94 matched_parts (list): A list of tuples indicating which joint pairs are left-right symmetric. 95 heatmap_wid (int): The width of the heatmap (used for coordinate flipping). 96 97 Returns: 98 numpy.ndarray: The flipped output with joints correctly reordered. 99 """ 100 if output_flipped.ndim == 4: 101 output_flipped = output_flipped[:, :, :, ::-1] 102 for pair in matched_parts: 103 tmp = output_flipped[:, pair[0], :, :].copy() 104 output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :] 105 output_flipped[:, pair[1], :, :] = tmp 106 elif output_flipped.ndim == 3: 107 output_flipped[:, :, 0] = heatmap_wid - output_flipped[:, :, 0] 108 for pair in matched_parts: 109 tmp = output_flipped[:, pair[0], :].copy() 110 output_flipped[:, pair[0], :] = output_flipped[:, pair[1], :] 111 output_flipped[:, pair[1], :] = tmp 112 else: 113 raise NotImplementedError( 114 "output_flipped should be [batch_size, num_joints, height, width], " 115 "or [batch_size, num_joints, coord_dim" 116 ) 117 118 return output_flipped
Flips the output (coordinates or heatmaps) horizontally for test-time augmentation.
Arguments:
- output_flipped (numpy.ndarray): The output (heatmaps or coordinates) that has been flipped.
- matched_parts (list): A list of tuples indicating which joint pairs are left-right symmetric.
- heatmap_wid (int): The width of the heatmap (used for coordinate flipping).
Returns:
numpy.ndarray: The flipped output with joints correctly reordered.
121def fliplr_joints(joints, joints_vis, width, matched_parts): 122 """ 123 Flips joint coordinates horizontally and reorders them based on matched parts. 124 125 Args: 126 joints (numpy.ndarray): Array of joint coordinates. 127 joints_vis (numpy.ndarray): Array indicating visibility of joints. 128 width (int): Width of the image or feature map. 129 matched_parts (list): A list of tuples indicating which joint pairs are left-right symmetric. 130 131 Returns: 132 tuple: 133 - joints (numpy.ndarray): Flipped and reordered joint coordinates. 134 - joints_vis (numpy.ndarray): Corresponding joint visibility. 135 """ 136 # Flip horizontal 137 joints[:, 0] = width - joints[:, 0] - 1 138 139 # Change left-right parts 140 for pair in matched_parts: 141 joints[pair[0], :], joints[pair[1], :] = ( 142 joints[pair[1], :], 143 joints[pair[0], :].copy(), 144 ) 145 joints_vis[pair[0], :], joints_vis[pair[1], :] = ( 146 joints_vis[pair[1], :], 147 joints_vis[pair[0], :].copy(), 148 ) 149 150 return joints * joints_vis, joints_vis
Flips joint coordinates horizontally and reorders them based on matched parts.
Arguments:
- joints (numpy.ndarray): Array of joint coordinates.
- joints_vis (numpy.ndarray): Array indicating visibility of joints.
- width (int): Width of the image or feature map.
- matched_parts (list): A list of tuples indicating which joint pairs are left-right symmetric.
Returns:
tuple: - joints (numpy.ndarray): Flipped and reordered joint coordinates. - joints_vis (numpy.ndarray): Corresponding joint visibility.
153def transform_preds(coords, center, scale, output_size: list[int, int] = [72, 96]): 154 """ 155 Transforms predicted coordinates from heatmap space back to original image space. 156 157 Args: 158 coords (numpy.ndarray): Predicted coordinates in heatmap space. 159 center (numpy.ndarray): Center of the original image (or cropped region). 160 scale (numpy.ndarray): Scale factor applied during preprocessing. 161 output_size (list[int, int], optional): The size of the output image after transformation. Defaults to [72, 96]. 162 163 Returns: 164 numpy.ndarray: Transformed coordinates in original image space. 165 """ 166 target_coords = np.zeros(coords.shape) 167 trans = get_affine_transform(center, scale, 0, output_size, inv=1) 168 for p in range(coords.shape[0]): 169 target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) 170 return target_coords
Transforms predicted coordinates from heatmap space back to original image space.
Arguments:
- coords (numpy.ndarray): Predicted coordinates in heatmap space.
- center (numpy.ndarray): Center of the original image (or cropped region).
- scale (numpy.ndarray): Scale factor applied during preprocessing.
- output_size (list[int, int], optional): The size of the output image after transformation. Defaults to [72, 96].
Returns:
numpy.ndarray: Transformed coordinates in original image space.
173def get_affine_transform( 174 center, scale, rot, output_size, shift=np.array([0, 0], dtype=np.float32), inv=0 175): 176 """ 177 Calculates the 2x3 affine transformation matrix for image cropping and resizing. 178 179 Args: 180 center (numpy.ndarray): Center of the original image or region of interest. 181 scale (numpy.ndarray): Scale factor for the transformation. 182 rot (float): Rotation angle in degrees. 183 output_size (list): Target output size [width, height]. 184 shift (numpy.ndarray, optional): Shift applied to the center. Defaults to [0, 0]. 185 inv (int, optional): If 1, returns the inverse transformation matrix. Defaults to 0. 186 187 Returns: 188 numpy.ndarray: The 2x3 affine transformation matrix. 189 """ 190 if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 191 print(scale) 192 scale = np.array([scale, scale]) 193 194 scale_tmp = scale * 200.0 195 src_w = scale_tmp[0] 196 dst_w = output_size[0] 197 dst_h = output_size[1] 198 199 rot_rad = np.pi * rot / 180 200 src_dir = get_dir([0, src_w * -0.5], rot_rad) 201 dst_dir = np.array([0, dst_w * -0.5], np.float32) 202 203 src = np.zeros((3, 2), dtype=np.float32) 204 dst = np.zeros((3, 2), dtype=np.float32) 205 src[0, :] = center + scale_tmp * shift 206 src[1, :] = center + src_dir + scale_tmp * shift 207 dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 208 dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir 209 210 src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 211 dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 212 213 if inv: 214 trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 215 else: 216 trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 217 218 return trans
Calculates the 2x3 affine transformation matrix for image cropping and resizing.
Arguments:
- center (numpy.ndarray): Center of the original image or region of interest.
- scale (numpy.ndarray): Scale factor for the transformation.
- rot (float): Rotation angle in degrees.
- output_size (list): Target output size [width, height].
- shift (numpy.ndarray, optional): Shift applied to the center. Defaults to [0, 0].
- inv (int, optional): If 1, returns the inverse transformation matrix. Defaults to 0.
Returns:
numpy.ndarray: The 2x3 affine transformation matrix.
221def affine_transform(pt, t): 222 """ 223 Applies an affine transformation matrix to a 2D point. 224 225 Args: 226 pt (tuple or list): The 2D point (x, y) to transform. 227 t (numpy.ndarray): The 2x3 affine transformation matrix. 228 229 Returns: 230 numpy.ndarray: The transformed 2D point. 231 """ 232 new_pt = np.array([pt[0], pt[1], 1.0]).T 233 new_pt = np.dot(t, new_pt) 234 return new_pt[:2]
Applies an affine transformation matrix to a 2D point.
Arguments:
- pt (tuple or list): The 2D point (x, y) to transform.
- t (numpy.ndarray): The 2x3 affine transformation matrix.
Returns:
numpy.ndarray: The transformed 2D point.
237def get_3rd_point(a, b): 238 """ 239 Calculates a third point to form a right-angled triangle with two given points. 240 Used for creating a 3-point basis for affine transformations. 241 242 Args: 243 a (numpy.ndarray): First point. 244 b (numpy.ndarray): Second point. 245 246 Returns: 247 numpy.ndarray: The calculated third point. 248 """ 249 direct = a - b 250 return b + np.array([-direct[1], direct[0]], dtype=np.float32)
Calculates a third point to form a right-angled triangle with two given points. Used for creating a 3-point basis for affine transformations.
Arguments:
- a (numpy.ndarray): First point.
- b (numpy.ndarray): Second point.
Returns:
numpy.ndarray: The calculated third point.
253def get_dir(src_point, rot_rad): 254 """ 255 Calculates the direction vector after rotation. 256 257 Args: 258 src_point (list): Source point [x, y]. 259 rot_rad (float): Rotation angle in radians. 260 261 Returns: 262 list: The rotated direction vector. 263 """ 264 sn, cs = np.sin(rot_rad), np.cos(rot_rad) 265 266 src_result = [0, 0] 267 src_result[0] = src_point[0] * cs - src_point[1] * sn 268 src_result[1] = src_point[0] * sn + src_point[1] * cs 269 270 return src_result
Calculates the direction vector after rotation.
Arguments:
- src_point (list): Source point [x, y].
- rot_rad (float): Rotation angle in radians.
Returns:
list: The rotated direction vector.
273def crop(img, center, scale, output_size, rot=0): 274 """ 275 Crops and resizes an image using an affine transformation. 276 277 Args: 278 img (numpy.ndarray): The input image. 279 center (numpy.ndarray): The center of the crop region. 280 scale (numpy.ndarray): The scale factor for the crop. 281 output_size (tuple): The target output size (width, height). 282 rot (int, optional): Rotation angle in degrees. Defaults to 0. 283 284 Returns: 285 numpy.ndarray: The cropped and transformed image. 286 """ 287 trans = get_affine_transform(center, scale, rot, output_size) 288 289 dst_img = cv2.warpAffine( 290 img, trans, (int(output_size[0]), int(output_size[1])), flags=cv2.INTER_LINEAR 291 ) 292 293 return dst_img
Crops and resizes an image using an affine transformation.
Arguments:
- img (numpy.ndarray): The input image.
- center (numpy.ndarray): The center of the crop region.
- scale (numpy.ndarray): The scale factor for the crop.
- output_size (tuple): The target output size (width, height).
- rot (int, optional): Rotation angle in degrees. Defaults to 0.
Returns:
numpy.ndarray: The cropped and transformed image.
296def input_image_transform( 297 img_input: Union[str, np.ndarray], 298 scale_std: float = 200.0, 299 resize_dim: list[int] = [288, 384], 300 normalize_mean: list[float] = [0.485, 0.456, 0.406], 301 normalize_std: list[float] = [0.229, 0.224, 0.225], 302): 303 """ 304 Preprocesses an input image for landmark detection. 305 306 This function takes an image (either path or NumPy array), applies an affine 307 transformation (scaling, centering), resizes it, converts it to a PyTorch tensor, 308 and normalizes it. 309 310 Args: 311 img_input (Union[str, np.ndarray]): Path to the image file or a NumPy array of the image. 312 scale_std (float, optional): Standard scale for image transformation. Defaults to 200.0. 313 resize_dim (list[int], optional): Target dimensions [width, height] for the transformed image. 314 Defaults to [288, 384]. 315 normalize_mean (list[float], optional): Mean values for image normalization (RGB channels). 316 Defaults to [0.485, 0.456, 0.406]. 317 normalize_std (list[float], optional): Standard deviation values for image normalization (RGB channels). 318 Defaults to [0.229, 0.224, 0.225]. 319 320 Raises: 321 ValueError: If `img_input` is neither a file path nor a NumPy array. 322 323 Returns: 324 tuple: 325 - input_tensor (torch.Tensor): The preprocessed image as a PyTorch tensor, ready for model input. 326 - image_np (numpy.ndarray): The original image as a NumPy array (RGB). 327 - center (numpy.ndarray): The center of the original image used for transformation. 328 - scale (numpy.ndarray): The scale factor used for transformation. 329 """ 330 if isinstance(img_input, str): 331 img = Image.open(img_input).convert("RGB") 332 elif isinstance(img_input, np.ndarray): 333 img = Image.fromarray(img_input.astype(np.uint8)) 334 else: 335 raise ValueError("img_input must be a file path or a NumPy array.") 336 337 image_np = np.array(img) 338 339 h, w = image_np.shape[:2] 340 center = np.array([w / 2, h / 2], dtype=np.float32) 341 scale = np.array([w / scale_std, h / scale_std], dtype=np.float32) 342 image_size = np.array(resize_dim) 343 rotation = 0 344 345 trans = get_affine_transform(center, scale, rotation, image_size) 346 warped_image = cv2.warpAffine( 347 image_np, 348 trans, 349 (int(image_size[0]), int(image_size[1])), 350 flags=cv2.INTER_LINEAR, 351 ) 352 353 to_tensor = transforms.ToTensor() 354 normalize = transforms.Normalize(normalize_mean, normalize_std) 355 input_tensor = normalize(to_tensor(warped_image)).unsqueeze(0) 356 357 return input_tensor, image_np, center, scale
Preprocesses an input image for landmark detection.
This function takes an image (either path or NumPy array), applies an affine transformation (scaling, centering), resizes it, converts it to a PyTorch tensor, and normalizes it.
Arguments:
- img_input (Union[str, np.ndarray]): Path to the image file or a NumPy array of the image.
- scale_std (float, optional): Standard scale for image transformation. Defaults to 200.0.
- resize_dim (list[int], optional): Target dimensions [width, height] for the transformed image. Defaults to [288, 384].
- normalize_mean (list[float], optional): Mean values for image normalization (RGB channels). Defaults to [0.485, 0.456, 0.406].
- normalize_std (list[float], optional): Standard deviation values for image normalization (RGB channels). Defaults to [0.229, 0.224, 0.225].
Raises:
- ValueError: If
img_input
is neither a file path nor a NumPy array.
Returns:
tuple: - input_tensor (torch.Tensor): The preprocessed image as a PyTorch tensor, ready for model input. - image_np (numpy.ndarray): The original image as a NumPy array (RGB). - center (numpy.ndarray): The center of the original image used for transformation. - scale (numpy.ndarray): The scale factor used for transformation.