garmentiq.tailor
1import os 2from typing import List, Dict, Type, Any, Optional, Union 3import torch.nn as nn 4import numpy as np 5from pathlib import Path 6import pandas as pd 7from tqdm.auto import tqdm 8import textwrap 9from PIL import Image, ImageDraw, ImageFont 10from . import classification 11from . import segmentation 12from . import landmark 13from . import utils 14 15 16class tailor: 17 """ 18 The `tailor` class acts as a central agent for the GarmentIQ pipeline, 19 orchestrating garment measurement from classification to landmark derivation. 20 21 It integrates functionalities from other modules (classification, segmentation, landmark) 22 to provide a smooth end-to-end process for automated garment measurement from images. 23 24 Attributes: 25 input_dir (str): Directory containing input images. 26 model_dir (str): Directory where models are stored. 27 output_dir (str): Directory to save processed outputs. 28 class_dict (dict): Dictionary defining garment classes and their properties. 29 do_derive (bool): Flag to enable landmark derivation. 30 do_refine (bool): Flag to enable landmark refinement. 31 classification_model_path (str): Path to the classification model. 32 classification_model_class (Type[nn.Module]): Class definition for the classification model. 33 classification_model_args (Dict): Arguments for the classification model. 34 segmentation_model_name (str): Name or path for the segmentation model. 35 segmentation_model_args (Dict): Arguments for the segmentation model. 36 landmark_detection_model_path (str): Path to the landmark detection model. 37 landmark_detection_model_class (Type[nn.Module]): Class definition for the landmark detection model. 38 landmark_detection_model_args (Dict): Arguments for the landmark detection model. 39 refinement_args (Optional[Dict]): Arguments for landmark refinement. 40 derivation_dict (Optional[Dict]): Dictionary for landmark derivation rules. 41 """ 42 43 def __init__( 44 self, 45 input_dir: str, 46 model_dir: str, 47 output_dir: str, 48 class_dict: dict, 49 do_derive: bool, 50 do_refine: bool, 51 classification_model_path: str, 52 classification_model_class: Type[nn.Module], 53 classification_model_args: Dict, 54 segmentation_model_name: str, 55 segmentation_model_args: Dict, 56 landmark_detection_model_path: str, 57 landmark_detection_model_class: Type[nn.Module], 58 landmark_detection_model_args: Dict, 59 refinement_args: Optional[Dict] = None, 60 derivation_dict: Optional[Dict] = None, 61 ): 62 """ 63 Initializes the `tailor` agent with paths, model configurations, and processing flags. 64 65 Args: 66 input_dir (str): Path to the directory containing input images. 67 model_dir (str): Path to the directory where all required models are stored. 68 output_dir (str): Path to the directory where all processed outputs will be saved. 69 class_dict (dict): A dictionary defining the garment classes, their predefined points, 70 index ranges, and instruction JSON file paths. 71 do_derive (bool): If True, enables the landmark derivation step. 72 do_refine (bool): If True, enables the landmark refinement step. 73 classification_model_path (str): The filename or relative path to the classification model. 74 classification_model_class (Type[nn.Module]): The Python class of the classification model. 75 classification_model_args (Dict): A dictionary of arguments to initialize the classification model. 76 segmentation_model_name (str): The name or path of the pretrained segmentation model. 77 segmentation_model_args (Dict): A dictionary of arguments for the segmentation model. 78 landmark_detection_model_path (str): The filename or relative path to the landmark detection model. 79 landmark_detection_model_class (Type[nn.Module]): The Python class of the landmark detection model. 80 landmark_detection_model_args (Dict): A dictionary of arguments for the landmark detection model. 81 refinement_args (Optional[Dict]): Optional arguments for the refinement process, 82 e.g., `window_size`, `ksize`, `sigmaX`. Defaults to None. 83 derivation_dict (Optional[Dict]): A dictionary defining derivation rules for non-predefined landmarks. 84 Required if `do_derive` is True. 85 86 Raises: 87 ValueError: If `do_derive` is True but `derivation_dict` is None. 88 """ 89 # Directories 90 self.input_dir = input_dir 91 self.model_dir = model_dir 92 self.output_dir = output_dir 93 94 # Classes 95 self.class_dict = class_dict 96 self.classes = sorted(list(class_dict.keys())) 97 98 # Derivation 99 self.do_derive = do_derive 100 if self.do_derive: 101 if derivation_dict is None: 102 raise ValueError( 103 "`derivation_dict` must be provided if `do_derive=True`." 104 ) 105 self.derivation_dict = derivation_dict 106 else: 107 self.derivation_dict = None 108 109 # Refinement setup 110 self.do_refine = do_refine 111 self.do_refine = do_refine 112 if self.do_refine: 113 if refinement_args is None: 114 self.refinement_args = {} 115 self.refinement_args = refinement_args 116 else: 117 self.refinement_args = None 118 119 # Classification model setup 120 self.classification_model_path = classification_model_path 121 self.classification_model_args = classification_model_args 122 self.classification_model_class = classification_model_class 123 filtered_model_args = { 124 k: v 125 for k, v in self.classification_model_args.items() 126 if k not in ("resize_dim", "normalize_mean", "normalize_std") 127 } 128 129 # Load the model using the filtered arguments 130 self.classification_model = classification.load_model( 131 model_path=f"{self.model_dir}/{self.classification_model_path}", 132 model_class=self.classification_model_class, 133 model_args=filtered_model_args, 134 ) 135 136 # Segmentation model setup 137 self.segmentation_model_name = segmentation_model_name 138 self.segmentation_model_args = segmentation_model_args 139 self.segmentation_has_bg_color = "background_color" in segmentation_model_args 140 self.segmentation_model = segmentation.load_model( 141 pretrained_model=self.segmentation_model_name, 142 pretrained_model_args={ 143 "trust_remote_code": segmentation_model_args["trust_remote_code"] 144 }, 145 high_precision=segmentation_model_args["high_precision"], 146 ) 147 148 # Landmark detection model setup 149 self.landmark_detection_model_path = landmark_detection_model_path 150 self.landmark_detection_model_class = landmark_detection_model_class 151 self.landmark_detection_model_args = landmark_detection_model_args 152 self.landmark_detection_model = landmark.detection.load_model( 153 model_path=f"{self.model_dir}/{self.landmark_detection_model_path}", 154 model_class=self.landmark_detection_model_class, 155 ) 156 157 def summary(self): 158 """ 159 Prints a summary of the `tailor` agent's configuration, including directory paths, 160 defined classes, processing options (refine, derive), and loaded models. 161 """ 162 width = 80 163 sep = "=" * width 164 165 print(sep) 166 print("TAILOR AGENT SUMMARY".center(width)) 167 print(sep) 168 169 # Directories 170 print("DIRECTORY PATHS".center(width, "-")) 171 print(f"{'Input directory:':25} {self.input_dir}") 172 print(f"{'Model directory:':25} {self.model_dir}") 173 print(f"{'Output directory:':25} {self.output_dir}") 174 print() 175 176 # Classes 177 print("CLASSES".center(width, "-")) 178 print(f"{'Class Index':<11} | Class Name") 179 print(f"{'-'*11} | {'-'*66}") 180 for i, cls in enumerate(self.classes): 181 print(f"{i:<11} | {cls}") 182 print() 183 184 # Flags 185 print("OPTIONS".center(width, "-")) 186 print(f"{'Do refine?:':25} {self.do_refine}") 187 print(f"{'Do derive?:':25} {self.do_derive}") 188 print() 189 190 # Models 191 print("MODELS".center(width, "-")) 192 print( 193 f"{'Classification Model:':25} {self.classification_model_class.__name__}" 194 ) 195 print(f"{'Segmentation Model:':25} {self.segmentation_model_name}") 196 print(f"{' └─ Change BG color?:':25} {self.segmentation_has_bg_color}") 197 print( 198 f"{'Landmark Detection Model:':25} {self.landmark_detection_model_class.__class__.__name__}" 199 ) 200 print(sep) 201 202 def classify(self, image: str, verbose=False): 203 """ 204 Classifies a single garment image using the configured classification model. 205 206 Args: 207 image (str): The filename of the image to classify, located in `self.input_dir`. 208 verbose (bool): If True, prints detailed classification output. Defaults to False. 209 210 Returns: 211 tuple: 212 - label (str): The predicted class label of the garment. 213 - probabilities (List[float]): A list of probabilities for each class. 214 """ 215 label, probablities = classification.predict( 216 model=self.classification_model, 217 image_path=f"{self.input_dir}/{image}", 218 classes=self.classes, 219 resize_dim=self.classification_model_args.get("resize_dim"), 220 normalize_mean=self.classification_model_args.get("normalize_mean"), 221 normalize_std=self.classification_model_args.get("normalize_std"), 222 verbose=verbose, 223 ) 224 return label, probablities 225 226 def segment(self, image: str): 227 """ 228 Segments a single garment image to extract its mask and optionally modifies the background color. 229 230 Args: 231 image (str): The filename of the image to segment, located in `self.input_dir`. 232 233 Returns: 234 tuple: 235 - original_img (np.ndarray): The original image with the mask overlaid. 236 - mask (np.ndarray): The binary segmentation mask. 237 - bg_modified_img (np.ndarray, optional): The image with the background color changed, 238 returned only if `background_color` is specified 239 in `segmentation_model_args`. 240 """ 241 original_img, mask = segmentation.extract( 242 model=self.segmentation_model, 243 image_path=f"{self.input_dir}/{image}", 244 resize_dim=self.segmentation_model_args.get("resize_dim"), 245 normalize_mean=self.segmentation_model_args.get("normalize_mean"), 246 normalize_std=self.segmentation_model_args.get("normalize_std"), 247 high_precision=self.segmentation_model_args.get("high_precision"), 248 ) 249 250 background_color = self.segmentation_model_args.get("background_color") 251 252 if background_color is None: 253 return original_img, mask 254 else: 255 bg_modified_img = segmentation.change_background_color( 256 image_np=original_img, mask_np=mask, background_color=background_color 257 ) 258 return original_img, mask, bg_modified_img 259 260 def detect(self, class_name: str, image: Union[str, np.ndarray]): 261 """ 262 Detects predefined landmarks on a garment image based on its classified class. 263 264 Args: 265 class_name (str): The classified name of the garment. 266 image (Union[str, np.ndarray]): The path to the image file or a NumPy array of the image. 267 268 Returns: 269 tuple: 270 - coords (np.array): Detected landmark coordinates. 271 - maxval (np.array): Confidence scores for detected landmarks. 272 - detection_dict (dict): A dictionary containing detailed landmark detection data. 273 """ 274 if isinstance(image, str): 275 image = f"{self.input_dir}/{image}" 276 277 coords, maxval, detection_dict = landmark.detect( 278 class_name=class_name, 279 class_dict=self.class_dict, 280 image_path=image, 281 model=self.landmark_detection_model, 282 scale_std=self.landmark_detection_model_args.get("scale_std"), 283 resize_dim=self.landmark_detection_model_args.get("resize_dim"), 284 normalize_mean=self.landmark_detection_model_args.get("normalize_mean"), 285 normalize_std=self.landmark_detection_model_args.get("normalize_std"), 286 ) 287 return coords, maxval, detection_dict 288 289 def derive( 290 self, 291 class_name: str, 292 detection_dict: dict, 293 derivation_dict: dict, 294 landmark_coords: np.array, 295 np_mask: np.array, 296 ): 297 """ 298 Derives non-predefined landmark coordinates based on predefined landmarks and a mask. 299 300 Args: 301 class_name (str): The name of the garment class. 302 detection_dict (dict): The dictionary containing detected landmarks. 303 derivation_dict (dict): The dictionary defining derivation rules. 304 landmark_coords (np.array): NumPy array of initial landmark coordinates. 305 np_mask (np.array): NumPy array of the segmentation mask. 306 307 Returns: 308 tuple: 309 - derived_coords (dict): A dictionary of the newly derived landmark coordinates. 310 - updated_detection_dict (dict): The detection dictionary updated with derived landmarks. 311 """ 312 derived_coords, updated_detection_dict = landmark.derive( 313 class_name=class_name, 314 detection_dict=detection_dict, 315 derivation_dict=derivation_dict, 316 landmark_coords=landmark_coords, 317 np_mask=np_mask, 318 ) 319 return derived_coords, updated_detection_dict 320 321 def refine( 322 self, 323 class_name: str, 324 detection_np: np.array, 325 detection_conf: np.array, 326 detection_dict: dict, 327 mask: np.array, 328 window_size: int = 5, 329 ksize: tuple = (11, 11), 330 sigmaX: float = 0.0, 331 ): 332 """ 333 Refines detected landmark coordinates using a blurred segmentation mask. 334 335 Args: 336 class_name (str): The name of the garment class. 337 detection_np (np.array): NumPy array of initial landmark predictions. 338 detection_conf (np.array): NumPy array of confidence scores for each predicted landmark. 339 detection_dict (dict): Dictionary containing landmark data for each class. 340 mask (np.array): Grayscale mask image used to guide refinement. 341 window_size (int, optional): Size of the window used in the refinement algorithm. Defaults to 5. 342 ksize (tuple, optional): Kernel size for Gaussian blur. Must be odd integers. Defaults to (11, 11). 343 sigmaX (float, optional): Gaussian kernel standard deviation in the X direction. Defaults to 0.0. 344 345 Returns: 346 tuple: 347 - refined_detection_np (np.array): Array of the same shape as `detection_np` with refined coordinates. 348 - detection_dict (dict): Updated detection dictionary with refined landmark coordinates. 349 """ 350 if self.refinement_args: 351 if self.refinement_args.get("window_size") is not None: 352 window_size = self.refinement_args["window_size"] 353 if self.refinement_args.get("ksize") is not None: 354 ksize = self.refinement_args["ksize"] 355 if self.refinement_args.get("sigmaX") is not None: 356 sigmaX = self.refinement_args["sigmaX"] 357 358 refined_detection_np, refined_detection_dict = landmark.refine( 359 class_name=class_name, 360 detection_np=detection_np, 361 detection_conf=detection_conf, 362 detection_dict=detection_dict, 363 mask=mask, 364 window_size=window_size, 365 ksize=ksize, 366 sigmaX=sigmaX, 367 ) 368 369 return refined_detection_np, refined_detection_dict 370 371 def measure( 372 self, 373 save_segmentation_image: bool = False, 374 save_measurement_image: bool = False, 375 ): 376 """ 377 Executes the full garment measurement pipeline for all images in the input directory. 378 379 This method processes each image through a multi-stage pipeline that includes garment classification, 380 segmentation, landmark detection, optional refinement, and measurement derivation. During classification, 381 the system identifies the type of garment (e.g., shirt, dress, pants). Segmentation follows, producing 382 binary or instance masks that separate the garment from the background. Landmark detection is then 383 performed to locate anatomical or garment-specific keypoints such as shoulders or waist positions. If 384 enabled, an optional refinement step applies post-processing or model-based corrections to improve the 385 accuracy of detected keypoints. Finally, the system calculates key garment dimensions - such as chest width, 386 waist width, and full length - based on the detected landmarks. In addition to this processing pipeline, 387 the method also manages data and visual output exports. For each input image, a cleaned JSON file is 388 generated containing the predicted garment class, landmark coordinates, and the resulting measurements. 389 Optionally, visual outputs such as segmentation masks and images annotated with landmarks and measurements 390 can be saved to assist in inspection or debugging. 391 392 Args: 393 save_segmentation_image (bool): If True, saves segmentation masks and background-modified images. 394 Defaults to False. 395 save_measurement_image (bool): If True, saves images overlaid with detected landmarks and measurements. 396 Defaults to False. 397 398 Returns: 399 tuple: 400 - metadata (pd.DataFrame): A DataFrame containing metadata for each processed image, such as: 401 - Original image path 402 - Paths to any saved segmentation or annotated images 403 - Class and measurement results 404 - outputs (dict): A dictionary mapping image filenames to their detailed processing results, including: 405 - Predicted class 406 - Detected landmarks with coordinates and confidence scores 407 - Calculated measurements 408 - File paths to any saved images (if applicable) 409 410 Example of exported JSON: 411 ``` 412 { 413 "cloth_3.jpg": { 414 "class": "vest dress", 415 "landmarks": { 416 "10": { 417 "conf": 0.7269417643547058, 418 "x": 611.0, 419 "y": 861.0 420 }, 421 "16": { 422 "conf": 0.6769524812698364, 423 "x": 1226.0, 424 "y": 838.0 425 }, 426 "17": { 427 "conf": 0.7472652196884155, 428 "x": 1213.0, 429 "y": 726.0 430 }, 431 "18": { 432 "conf": 0.7360446453094482, 433 "x": 1238.0, 434 "y": 613.0 435 }, 436 "2": { 437 "conf": 0.9256571531295776, 438 "x": 703.0, 439 "y": 264.0 440 }, 441 "20": { 442 "x": 700.936, 443 "y": 2070.0 444 }, 445 "8": { 446 "conf": 0.7129100561141968, 447 "x": 563.0, 448 "y": 613.0 449 }, 450 "9": { 451 "conf": 0.8203497529029846, 452 "x": 598.0, 453 "y": 726.0 454 } 455 }, 456 "measurements": { 457 "chest": { 458 "distance": 675.0, 459 "landmarks": { 460 "end": "18", 461 "start": "8" 462 } 463 }, 464 "full length": { 465 "distance": 1806.0011794281863, 466 "landmarks": { 467 "end": "20", 468 "start": "2" 469 } 470 }, 471 "hips": { 472 "distance": 615.4299310238331, 473 "landmarks": { 474 "end": "16", 475 "start": "10" 476 } 477 }, 478 "waist": { 479 "distance": 615.0, 480 "landmarks": { 481 "end": "17", 482 "start": "9" 483 } 484 } 485 } 486 } 487 } 488 ``` 489 """ 490 # Some helper variables 491 use_bg_color = self.segmentation_model_args.get("background_color") is not None 492 outputs = {} 493 494 # Step 1: Create the output directory 495 Path(self.output_dir).mkdir(parents=True, exist_ok=True) 496 Path(f"{self.output_dir}/measurement_json").mkdir(parents=True, exist_ok=True) 497 498 if save_segmentation_image and ( 499 use_bg_color or self.do_derive or self.do_refine 500 ): 501 Path(f"{self.output_dir}/mask_image").mkdir(parents=True, exist_ok=True) 502 if use_bg_color: 503 Path(f"{self.output_dir}/bg_modified_image").mkdir( 504 parents=True, exist_ok=True 505 ) 506 507 if save_measurement_image: 508 Path(f"{self.output_dir}/measurement_image").mkdir( 509 parents=True, exist_ok=True 510 ) 511 512 # Step 2: Collect image filenames from input_dir 513 image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.tiff"] 514 input_path = Path(self.input_dir) 515 516 image_files = [] 517 for ext in image_extensions: 518 image_files.extend(input_path.glob(ext)) 519 520 # Step 3: Determine column structure 521 columns = [ 522 "filename", 523 "class", 524 "mask_image" if use_bg_color or self.do_derive or self.do_refine else None, 525 "bg_modified_image" if use_bg_color else None, 526 "measurement_image", 527 "measurement_json", 528 ] 529 columns = [col for col in columns if col is not None] 530 531 metadata = pd.DataFrame(columns=columns) 532 metadata["filename"] = [img.name for img in image_files] 533 534 # Step 4: Print start message and information 535 print(f"Start measuring {len(metadata['filename'])} garment images ...") 536 537 if self.do_derive and self.do_refine: 538 message = ( 539 "There are 5 measurement steps: classification, segmentation, " 540 "landmark detection, landmark refinement, and landmark derivation." 541 ) 542 elif self.do_derive: 543 message = ( 544 "There are 4 measurement steps: classification, segmentation, " 545 "landmark detection, and landmark derivation." 546 ) 547 elif self.do_refine: 548 message = ( 549 "There are 4 measurement steps: classification, segmentation, " 550 "landmark detection, and landmark refinement." 551 ) 552 elif use_bg_color: 553 message = ( 554 "There are 3 measurement steps: classification, segmentation, " 555 "and landmark detection." 556 ) 557 else: 558 message = ( 559 "There are 2 measurement steps: classification and landmark detection." 560 ) 561 562 print(textwrap.fill(message, width=80)) 563 564 # Step 5: Classification 565 for idx, image in tqdm( 566 enumerate(metadata["filename"]), total=len(metadata), desc="Classification" 567 ): 568 label, _ = self.classify(image=image, verbose=False) 569 metadata.at[idx, "class"] = label 570 outputs[image] = {} 571 572 # Step 6: Segmentation 573 if use_bg_color or (self.do_derive or self.do_refine): 574 for idx, image in tqdm( 575 enumerate(metadata["filename"]), 576 total=len(metadata), 577 desc="Segmentation", 578 ): 579 if use_bg_color: 580 original_img, mask, bg_modified_image = self.segment(image=image) 581 outputs[image] = { 582 "mask": mask, 583 "bg_modified_image": bg_modified_image, 584 } 585 else: 586 original_img, mask = self.segment(image=image) 587 outputs[image] = { 588 "mask": mask, 589 } 590 591 # Step 7: Landmark detection 592 for idx, image in tqdm( 593 enumerate(metadata["filename"]), 594 total=len(metadata), 595 desc="Landmark detection", 596 ): 597 label = metadata.loc[metadata["filename"] == image, "class"].values[0] 598 if use_bg_color: 599 coords, maxvals, detection_dict = self.detect( 600 class_name=label, image=outputs[image]["bg_modified_image"] 601 ) 602 outputs[image]["detection_dict"] = detection_dict 603 if self.do_derive or self.do_refine: 604 outputs[image]["coords"] = coords 605 outputs[image]["maxvals"] = maxvals 606 else: 607 coords, maxvals, detection_dict = self.detect( 608 class_name=label, image=image 609 ) 610 outputs[image]["detection_dict"] = detection_dict 611 if self.do_derive or self.do_refine: 612 outputs[image]["coords"] = coords 613 outputs[image]["maxvals"] = maxvals 614 615 # Step 8: Landmark refinement 616 if self.do_refine: 617 for idx, image in tqdm( 618 enumerate(metadata["filename"]), 619 total=len(metadata), 620 desc="Landmark refinement", 621 ): 622 label = metadata.loc[metadata["filename"] == image, "class"].values[0] 623 updated_coords, updated_detection_dict = self.refine( 624 class_name=label, 625 detection_np=outputs[image]["coords"], 626 detection_conf=outputs[image]["maxvals"], 627 detection_dict=outputs[image]["detection_dict"], 628 mask=outputs[image]["mask"], 629 ) 630 outputs[image]["coords"] = updated_coords 631 outputs[image]["detection_dict"] = updated_detection_dict 632 633 # Step 9: Landmark derivation 634 if self.do_derive: 635 for idx, image in tqdm( 636 enumerate(metadata["filename"]), 637 total=len(metadata), 638 desc="Landmark derivation", 639 ): 640 label = metadata.loc[metadata["filename"] == image, "class"].values[0] 641 derived_coords, updated_detection_dict = self.derive( 642 class_name=label, 643 detection_dict=outputs[image]["detection_dict"], 644 derivation_dict=self.derivation_dict, 645 landmark_coords=outputs[image]["coords"], 646 np_mask=outputs[image]["mask"], 647 ) 648 outputs[image]["detection_dict"] = updated_detection_dict 649 650 # Step 10: Save segmentation image 651 if save_segmentation_image and ( 652 use_bg_color or self.do_derive or self.do_refine 653 ): 654 for idx, image in tqdm( 655 enumerate(metadata["filename"]), 656 total=len(metadata), 657 desc="Save segmentation image", 658 ): 659 transformed_name = os.path.splitext(image)[0] 660 Image.fromarray(outputs[image]["mask"]).save( 661 f"{self.output_dir}/mask_image/{transformed_name}_mask.png" 662 ) 663 metadata.at[ 664 idx, "mask_image" 665 ] = f"{self.output_dir}/mask_image/{transformed_name}_mask.png" 666 if use_bg_color: 667 Image.fromarray(outputs[image]["bg_modified_image"]).save( 668 f"{self.output_dir}/bg_modified_image/{transformed_name}_bg_modified.png" 669 ) 670 metadata.at[ 671 idx, "bg_modified_image" 672 ] = f"{self.output_dir}/bg_modified_image/{transformed_name}_bg_modified.png" 673 674 # Step 10: Save measurement image 675 if save_measurement_image: 676 for idx, image in tqdm( 677 enumerate(metadata["filename"]), 678 total=len(metadata), 679 desc="Save measurement image", 680 ): 681 label = metadata.loc[metadata["filename"] == image, "class"].values[0] 682 transformed_name = os.path.splitext(image)[0] 683 684 image_to_save = Image.open(f"{self.input_dir}/{image}").convert("RGB") 685 draw = ImageDraw.Draw(image_to_save) 686 font = ImageFont.load_default() 687 landmarks = outputs[image]["detection_dict"][label]["landmarks"] 688 689 for lm_id, lm_data in landmarks.items(): 690 x, y = lm_data["x"], lm_data["y"] 691 radius = 5 692 draw.ellipse( 693 (x - radius, y - radius, x + radius, y + radius), fill="green" 694 ) 695 draw.text((x + 8, y - 8), lm_id, fill="green", font=font) 696 697 image_to_save.save( 698 f"{self.output_dir}/measurement_image/{transformed_name}_measurement.png" 699 ) 700 metadata.at[ 701 idx, "measurement_image" 702 ] = f"{self.output_dir}/measurement_image/{transformed_name}_measurement.png" 703 704 # Step 11: Save measurement json 705 for idx, image in tqdm( 706 enumerate(metadata["filename"]), 707 total=len(metadata), 708 desc="Save measurement json", 709 ): 710 label = metadata.loc[metadata["filename"] == image, "class"].values[0] 711 transformed_name = os.path.splitext(image)[0] 712 713 # Clean the detection dictionary 714 final_dict = utils.clean_detection_dict( 715 class_name=label, 716 image_name=image, 717 detection_dict=outputs[image]["detection_dict"], 718 ) 719 720 # Export JSON 721 utils.export_dict_to_json( 722 data=final_dict, 723 filename=f"{self.output_dir}/measurement_json/{transformed_name}_measurement.json", 724 ) 725 726 metadata.at[ 727 idx, "measurement_json" 728 ] = f"{self.output_dir}/measurement_json/{transformed_name}_measurement.json" 729 730 # Step 12: Save metadata as a CSV 731 metadata.to_csv(f"{self.output_dir}/metadata.csv", index=False) 732 733 return metadata, outputs
17class tailor: 18 """ 19 The `tailor` class acts as a central agent for the GarmentIQ pipeline, 20 orchestrating garment measurement from classification to landmark derivation. 21 22 It integrates functionalities from other modules (classification, segmentation, landmark) 23 to provide a smooth end-to-end process for automated garment measurement from images. 24 25 Attributes: 26 input_dir (str): Directory containing input images. 27 model_dir (str): Directory where models are stored. 28 output_dir (str): Directory to save processed outputs. 29 class_dict (dict): Dictionary defining garment classes and their properties. 30 do_derive (bool): Flag to enable landmark derivation. 31 do_refine (bool): Flag to enable landmark refinement. 32 classification_model_path (str): Path to the classification model. 33 classification_model_class (Type[nn.Module]): Class definition for the classification model. 34 classification_model_args (Dict): Arguments for the classification model. 35 segmentation_model_name (str): Name or path for the segmentation model. 36 segmentation_model_args (Dict): Arguments for the segmentation model. 37 landmark_detection_model_path (str): Path to the landmark detection model. 38 landmark_detection_model_class (Type[nn.Module]): Class definition for the landmark detection model. 39 landmark_detection_model_args (Dict): Arguments for the landmark detection model. 40 refinement_args (Optional[Dict]): Arguments for landmark refinement. 41 derivation_dict (Optional[Dict]): Dictionary for landmark derivation rules. 42 """ 43 44 def __init__( 45 self, 46 input_dir: str, 47 model_dir: str, 48 output_dir: str, 49 class_dict: dict, 50 do_derive: bool, 51 do_refine: bool, 52 classification_model_path: str, 53 classification_model_class: Type[nn.Module], 54 classification_model_args: Dict, 55 segmentation_model_name: str, 56 segmentation_model_args: Dict, 57 landmark_detection_model_path: str, 58 landmark_detection_model_class: Type[nn.Module], 59 landmark_detection_model_args: Dict, 60 refinement_args: Optional[Dict] = None, 61 derivation_dict: Optional[Dict] = None, 62 ): 63 """ 64 Initializes the `tailor` agent with paths, model configurations, and processing flags. 65 66 Args: 67 input_dir (str): Path to the directory containing input images. 68 model_dir (str): Path to the directory where all required models are stored. 69 output_dir (str): Path to the directory where all processed outputs will be saved. 70 class_dict (dict): A dictionary defining the garment classes, their predefined points, 71 index ranges, and instruction JSON file paths. 72 do_derive (bool): If True, enables the landmark derivation step. 73 do_refine (bool): If True, enables the landmark refinement step. 74 classification_model_path (str): The filename or relative path to the classification model. 75 classification_model_class (Type[nn.Module]): The Python class of the classification model. 76 classification_model_args (Dict): A dictionary of arguments to initialize the classification model. 77 segmentation_model_name (str): The name or path of the pretrained segmentation model. 78 segmentation_model_args (Dict): A dictionary of arguments for the segmentation model. 79 landmark_detection_model_path (str): The filename or relative path to the landmark detection model. 80 landmark_detection_model_class (Type[nn.Module]): The Python class of the landmark detection model. 81 landmark_detection_model_args (Dict): A dictionary of arguments for the landmark detection model. 82 refinement_args (Optional[Dict]): Optional arguments for the refinement process, 83 e.g., `window_size`, `ksize`, `sigmaX`. Defaults to None. 84 derivation_dict (Optional[Dict]): A dictionary defining derivation rules for non-predefined landmarks. 85 Required if `do_derive` is True. 86 87 Raises: 88 ValueError: If `do_derive` is True but `derivation_dict` is None. 89 """ 90 # Directories 91 self.input_dir = input_dir 92 self.model_dir = model_dir 93 self.output_dir = output_dir 94 95 # Classes 96 self.class_dict = class_dict 97 self.classes = sorted(list(class_dict.keys())) 98 99 # Derivation 100 self.do_derive = do_derive 101 if self.do_derive: 102 if derivation_dict is None: 103 raise ValueError( 104 "`derivation_dict` must be provided if `do_derive=True`." 105 ) 106 self.derivation_dict = derivation_dict 107 else: 108 self.derivation_dict = None 109 110 # Refinement setup 111 self.do_refine = do_refine 112 self.do_refine = do_refine 113 if self.do_refine: 114 if refinement_args is None: 115 self.refinement_args = {} 116 self.refinement_args = refinement_args 117 else: 118 self.refinement_args = None 119 120 # Classification model setup 121 self.classification_model_path = classification_model_path 122 self.classification_model_args = classification_model_args 123 self.classification_model_class = classification_model_class 124 filtered_model_args = { 125 k: v 126 for k, v in self.classification_model_args.items() 127 if k not in ("resize_dim", "normalize_mean", "normalize_std") 128 } 129 130 # Load the model using the filtered arguments 131 self.classification_model = classification.load_model( 132 model_path=f"{self.model_dir}/{self.classification_model_path}", 133 model_class=self.classification_model_class, 134 model_args=filtered_model_args, 135 ) 136 137 # Segmentation model setup 138 self.segmentation_model_name = segmentation_model_name 139 self.segmentation_model_args = segmentation_model_args 140 self.segmentation_has_bg_color = "background_color" in segmentation_model_args 141 self.segmentation_model = segmentation.load_model( 142 pretrained_model=self.segmentation_model_name, 143 pretrained_model_args={ 144 "trust_remote_code": segmentation_model_args["trust_remote_code"] 145 }, 146 high_precision=segmentation_model_args["high_precision"], 147 ) 148 149 # Landmark detection model setup 150 self.landmark_detection_model_path = landmark_detection_model_path 151 self.landmark_detection_model_class = landmark_detection_model_class 152 self.landmark_detection_model_args = landmark_detection_model_args 153 self.landmark_detection_model = landmark.detection.load_model( 154 model_path=f"{self.model_dir}/{self.landmark_detection_model_path}", 155 model_class=self.landmark_detection_model_class, 156 ) 157 158 def summary(self): 159 """ 160 Prints a summary of the `tailor` agent's configuration, including directory paths, 161 defined classes, processing options (refine, derive), and loaded models. 162 """ 163 width = 80 164 sep = "=" * width 165 166 print(sep) 167 print("TAILOR AGENT SUMMARY".center(width)) 168 print(sep) 169 170 # Directories 171 print("DIRECTORY PATHS".center(width, "-")) 172 print(f"{'Input directory:':25} {self.input_dir}") 173 print(f"{'Model directory:':25} {self.model_dir}") 174 print(f"{'Output directory:':25} {self.output_dir}") 175 print() 176 177 # Classes 178 print("CLASSES".center(width, "-")) 179 print(f"{'Class Index':<11} | Class Name") 180 print(f"{'-'*11} | {'-'*66}") 181 for i, cls in enumerate(self.classes): 182 print(f"{i:<11} | {cls}") 183 print() 184 185 # Flags 186 print("OPTIONS".center(width, "-")) 187 print(f"{'Do refine?:':25} {self.do_refine}") 188 print(f"{'Do derive?:':25} {self.do_derive}") 189 print() 190 191 # Models 192 print("MODELS".center(width, "-")) 193 print( 194 f"{'Classification Model:':25} {self.classification_model_class.__name__}" 195 ) 196 print(f"{'Segmentation Model:':25} {self.segmentation_model_name}") 197 print(f"{' └─ Change BG color?:':25} {self.segmentation_has_bg_color}") 198 print( 199 f"{'Landmark Detection Model:':25} {self.landmark_detection_model_class.__class__.__name__}" 200 ) 201 print(sep) 202 203 def classify(self, image: str, verbose=False): 204 """ 205 Classifies a single garment image using the configured classification model. 206 207 Args: 208 image (str): The filename of the image to classify, located in `self.input_dir`. 209 verbose (bool): If True, prints detailed classification output. Defaults to False. 210 211 Returns: 212 tuple: 213 - label (str): The predicted class label of the garment. 214 - probabilities (List[float]): A list of probabilities for each class. 215 """ 216 label, probablities = classification.predict( 217 model=self.classification_model, 218 image_path=f"{self.input_dir}/{image}", 219 classes=self.classes, 220 resize_dim=self.classification_model_args.get("resize_dim"), 221 normalize_mean=self.classification_model_args.get("normalize_mean"), 222 normalize_std=self.classification_model_args.get("normalize_std"), 223 verbose=verbose, 224 ) 225 return label, probablities 226 227 def segment(self, image: str): 228 """ 229 Segments a single garment image to extract its mask and optionally modifies the background color. 230 231 Args: 232 image (str): The filename of the image to segment, located in `self.input_dir`. 233 234 Returns: 235 tuple: 236 - original_img (np.ndarray): The original image with the mask overlaid. 237 - mask (np.ndarray): The binary segmentation mask. 238 - bg_modified_img (np.ndarray, optional): The image with the background color changed, 239 returned only if `background_color` is specified 240 in `segmentation_model_args`. 241 """ 242 original_img, mask = segmentation.extract( 243 model=self.segmentation_model, 244 image_path=f"{self.input_dir}/{image}", 245 resize_dim=self.segmentation_model_args.get("resize_dim"), 246 normalize_mean=self.segmentation_model_args.get("normalize_mean"), 247 normalize_std=self.segmentation_model_args.get("normalize_std"), 248 high_precision=self.segmentation_model_args.get("high_precision"), 249 ) 250 251 background_color = self.segmentation_model_args.get("background_color") 252 253 if background_color is None: 254 return original_img, mask 255 else: 256 bg_modified_img = segmentation.change_background_color( 257 image_np=original_img, mask_np=mask, background_color=background_color 258 ) 259 return original_img, mask, bg_modified_img 260 261 def detect(self, class_name: str, image: Union[str, np.ndarray]): 262 """ 263 Detects predefined landmarks on a garment image based on its classified class. 264 265 Args: 266 class_name (str): The classified name of the garment. 267 image (Union[str, np.ndarray]): The path to the image file or a NumPy array of the image. 268 269 Returns: 270 tuple: 271 - coords (np.array): Detected landmark coordinates. 272 - maxval (np.array): Confidence scores for detected landmarks. 273 - detection_dict (dict): A dictionary containing detailed landmark detection data. 274 """ 275 if isinstance(image, str): 276 image = f"{self.input_dir}/{image}" 277 278 coords, maxval, detection_dict = landmark.detect( 279 class_name=class_name, 280 class_dict=self.class_dict, 281 image_path=image, 282 model=self.landmark_detection_model, 283 scale_std=self.landmark_detection_model_args.get("scale_std"), 284 resize_dim=self.landmark_detection_model_args.get("resize_dim"), 285 normalize_mean=self.landmark_detection_model_args.get("normalize_mean"), 286 normalize_std=self.landmark_detection_model_args.get("normalize_std"), 287 ) 288 return coords, maxval, detection_dict 289 290 def derive( 291 self, 292 class_name: str, 293 detection_dict: dict, 294 derivation_dict: dict, 295 landmark_coords: np.array, 296 np_mask: np.array, 297 ): 298 """ 299 Derives non-predefined landmark coordinates based on predefined landmarks and a mask. 300 301 Args: 302 class_name (str): The name of the garment class. 303 detection_dict (dict): The dictionary containing detected landmarks. 304 derivation_dict (dict): The dictionary defining derivation rules. 305 landmark_coords (np.array): NumPy array of initial landmark coordinates. 306 np_mask (np.array): NumPy array of the segmentation mask. 307 308 Returns: 309 tuple: 310 - derived_coords (dict): A dictionary of the newly derived landmark coordinates. 311 - updated_detection_dict (dict): The detection dictionary updated with derived landmarks. 312 """ 313 derived_coords, updated_detection_dict = landmark.derive( 314 class_name=class_name, 315 detection_dict=detection_dict, 316 derivation_dict=derivation_dict, 317 landmark_coords=landmark_coords, 318 np_mask=np_mask, 319 ) 320 return derived_coords, updated_detection_dict 321 322 def refine( 323 self, 324 class_name: str, 325 detection_np: np.array, 326 detection_conf: np.array, 327 detection_dict: dict, 328 mask: np.array, 329 window_size: int = 5, 330 ksize: tuple = (11, 11), 331 sigmaX: float = 0.0, 332 ): 333 """ 334 Refines detected landmark coordinates using a blurred segmentation mask. 335 336 Args: 337 class_name (str): The name of the garment class. 338 detection_np (np.array): NumPy array of initial landmark predictions. 339 detection_conf (np.array): NumPy array of confidence scores for each predicted landmark. 340 detection_dict (dict): Dictionary containing landmark data for each class. 341 mask (np.array): Grayscale mask image used to guide refinement. 342 window_size (int, optional): Size of the window used in the refinement algorithm. Defaults to 5. 343 ksize (tuple, optional): Kernel size for Gaussian blur. Must be odd integers. Defaults to (11, 11). 344 sigmaX (float, optional): Gaussian kernel standard deviation in the X direction. Defaults to 0.0. 345 346 Returns: 347 tuple: 348 - refined_detection_np (np.array): Array of the same shape as `detection_np` with refined coordinates. 349 - detection_dict (dict): Updated detection dictionary with refined landmark coordinates. 350 """ 351 if self.refinement_args: 352 if self.refinement_args.get("window_size") is not None: 353 window_size = self.refinement_args["window_size"] 354 if self.refinement_args.get("ksize") is not None: 355 ksize = self.refinement_args["ksize"] 356 if self.refinement_args.get("sigmaX") is not None: 357 sigmaX = self.refinement_args["sigmaX"] 358 359 refined_detection_np, refined_detection_dict = landmark.refine( 360 class_name=class_name, 361 detection_np=detection_np, 362 detection_conf=detection_conf, 363 detection_dict=detection_dict, 364 mask=mask, 365 window_size=window_size, 366 ksize=ksize, 367 sigmaX=sigmaX, 368 ) 369 370 return refined_detection_np, refined_detection_dict 371 372 def measure( 373 self, 374 save_segmentation_image: bool = False, 375 save_measurement_image: bool = False, 376 ): 377 """ 378 Executes the full garment measurement pipeline for all images in the input directory. 379 380 This method processes each image through a multi-stage pipeline that includes garment classification, 381 segmentation, landmark detection, optional refinement, and measurement derivation. During classification, 382 the system identifies the type of garment (e.g., shirt, dress, pants). Segmentation follows, producing 383 binary or instance masks that separate the garment from the background. Landmark detection is then 384 performed to locate anatomical or garment-specific keypoints such as shoulders or waist positions. If 385 enabled, an optional refinement step applies post-processing or model-based corrections to improve the 386 accuracy of detected keypoints. Finally, the system calculates key garment dimensions - such as chest width, 387 waist width, and full length - based on the detected landmarks. In addition to this processing pipeline, 388 the method also manages data and visual output exports. For each input image, a cleaned JSON file is 389 generated containing the predicted garment class, landmark coordinates, and the resulting measurements. 390 Optionally, visual outputs such as segmentation masks and images annotated with landmarks and measurements 391 can be saved to assist in inspection or debugging. 392 393 Args: 394 save_segmentation_image (bool): If True, saves segmentation masks and background-modified images. 395 Defaults to False. 396 save_measurement_image (bool): If True, saves images overlaid with detected landmarks and measurements. 397 Defaults to False. 398 399 Returns: 400 tuple: 401 - metadata (pd.DataFrame): A DataFrame containing metadata for each processed image, such as: 402 - Original image path 403 - Paths to any saved segmentation or annotated images 404 - Class and measurement results 405 - outputs (dict): A dictionary mapping image filenames to their detailed processing results, including: 406 - Predicted class 407 - Detected landmarks with coordinates and confidence scores 408 - Calculated measurements 409 - File paths to any saved images (if applicable) 410 411 Example of exported JSON: 412 ``` 413 { 414 "cloth_3.jpg": { 415 "class": "vest dress", 416 "landmarks": { 417 "10": { 418 "conf": 0.7269417643547058, 419 "x": 611.0, 420 "y": 861.0 421 }, 422 "16": { 423 "conf": 0.6769524812698364, 424 "x": 1226.0, 425 "y": 838.0 426 }, 427 "17": { 428 "conf": 0.7472652196884155, 429 "x": 1213.0, 430 "y": 726.0 431 }, 432 "18": { 433 "conf": 0.7360446453094482, 434 "x": 1238.0, 435 "y": 613.0 436 }, 437 "2": { 438 "conf": 0.9256571531295776, 439 "x": 703.0, 440 "y": 264.0 441 }, 442 "20": { 443 "x": 700.936, 444 "y": 2070.0 445 }, 446 "8": { 447 "conf": 0.7129100561141968, 448 "x": 563.0, 449 "y": 613.0 450 }, 451 "9": { 452 "conf": 0.8203497529029846, 453 "x": 598.0, 454 "y": 726.0 455 } 456 }, 457 "measurements": { 458 "chest": { 459 "distance": 675.0, 460 "landmarks": { 461 "end": "18", 462 "start": "8" 463 } 464 }, 465 "full length": { 466 "distance": 1806.0011794281863, 467 "landmarks": { 468 "end": "20", 469 "start": "2" 470 } 471 }, 472 "hips": { 473 "distance": 615.4299310238331, 474 "landmarks": { 475 "end": "16", 476 "start": "10" 477 } 478 }, 479 "waist": { 480 "distance": 615.0, 481 "landmarks": { 482 "end": "17", 483 "start": "9" 484 } 485 } 486 } 487 } 488 } 489 ``` 490 """ 491 # Some helper variables 492 use_bg_color = self.segmentation_model_args.get("background_color") is not None 493 outputs = {} 494 495 # Step 1: Create the output directory 496 Path(self.output_dir).mkdir(parents=True, exist_ok=True) 497 Path(f"{self.output_dir}/measurement_json").mkdir(parents=True, exist_ok=True) 498 499 if save_segmentation_image and ( 500 use_bg_color or self.do_derive or self.do_refine 501 ): 502 Path(f"{self.output_dir}/mask_image").mkdir(parents=True, exist_ok=True) 503 if use_bg_color: 504 Path(f"{self.output_dir}/bg_modified_image").mkdir( 505 parents=True, exist_ok=True 506 ) 507 508 if save_measurement_image: 509 Path(f"{self.output_dir}/measurement_image").mkdir( 510 parents=True, exist_ok=True 511 ) 512 513 # Step 2: Collect image filenames from input_dir 514 image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.tiff"] 515 input_path = Path(self.input_dir) 516 517 image_files = [] 518 for ext in image_extensions: 519 image_files.extend(input_path.glob(ext)) 520 521 # Step 3: Determine column structure 522 columns = [ 523 "filename", 524 "class", 525 "mask_image" if use_bg_color or self.do_derive or self.do_refine else None, 526 "bg_modified_image" if use_bg_color else None, 527 "measurement_image", 528 "measurement_json", 529 ] 530 columns = [col for col in columns if col is not None] 531 532 metadata = pd.DataFrame(columns=columns) 533 metadata["filename"] = [img.name for img in image_files] 534 535 # Step 4: Print start message and information 536 print(f"Start measuring {len(metadata['filename'])} garment images ...") 537 538 if self.do_derive and self.do_refine: 539 message = ( 540 "There are 5 measurement steps: classification, segmentation, " 541 "landmark detection, landmark refinement, and landmark derivation." 542 ) 543 elif self.do_derive: 544 message = ( 545 "There are 4 measurement steps: classification, segmentation, " 546 "landmark detection, and landmark derivation." 547 ) 548 elif self.do_refine: 549 message = ( 550 "There are 4 measurement steps: classification, segmentation, " 551 "landmark detection, and landmark refinement." 552 ) 553 elif use_bg_color: 554 message = ( 555 "There are 3 measurement steps: classification, segmentation, " 556 "and landmark detection." 557 ) 558 else: 559 message = ( 560 "There are 2 measurement steps: classification and landmark detection." 561 ) 562 563 print(textwrap.fill(message, width=80)) 564 565 # Step 5: Classification 566 for idx, image in tqdm( 567 enumerate(metadata["filename"]), total=len(metadata), desc="Classification" 568 ): 569 label, _ = self.classify(image=image, verbose=False) 570 metadata.at[idx, "class"] = label 571 outputs[image] = {} 572 573 # Step 6: Segmentation 574 if use_bg_color or (self.do_derive or self.do_refine): 575 for idx, image in tqdm( 576 enumerate(metadata["filename"]), 577 total=len(metadata), 578 desc="Segmentation", 579 ): 580 if use_bg_color: 581 original_img, mask, bg_modified_image = self.segment(image=image) 582 outputs[image] = { 583 "mask": mask, 584 "bg_modified_image": bg_modified_image, 585 } 586 else: 587 original_img, mask = self.segment(image=image) 588 outputs[image] = { 589 "mask": mask, 590 } 591 592 # Step 7: Landmark detection 593 for idx, image in tqdm( 594 enumerate(metadata["filename"]), 595 total=len(metadata), 596 desc="Landmark detection", 597 ): 598 label = metadata.loc[metadata["filename"] == image, "class"].values[0] 599 if use_bg_color: 600 coords, maxvals, detection_dict = self.detect( 601 class_name=label, image=outputs[image]["bg_modified_image"] 602 ) 603 outputs[image]["detection_dict"] = detection_dict 604 if self.do_derive or self.do_refine: 605 outputs[image]["coords"] = coords 606 outputs[image]["maxvals"] = maxvals 607 else: 608 coords, maxvals, detection_dict = self.detect( 609 class_name=label, image=image 610 ) 611 outputs[image]["detection_dict"] = detection_dict 612 if self.do_derive or self.do_refine: 613 outputs[image]["coords"] = coords 614 outputs[image]["maxvals"] = maxvals 615 616 # Step 8: Landmark refinement 617 if self.do_refine: 618 for idx, image in tqdm( 619 enumerate(metadata["filename"]), 620 total=len(metadata), 621 desc="Landmark refinement", 622 ): 623 label = metadata.loc[metadata["filename"] == image, "class"].values[0] 624 updated_coords, updated_detection_dict = self.refine( 625 class_name=label, 626 detection_np=outputs[image]["coords"], 627 detection_conf=outputs[image]["maxvals"], 628 detection_dict=outputs[image]["detection_dict"], 629 mask=outputs[image]["mask"], 630 ) 631 outputs[image]["coords"] = updated_coords 632 outputs[image]["detection_dict"] = updated_detection_dict 633 634 # Step 9: Landmark derivation 635 if self.do_derive: 636 for idx, image in tqdm( 637 enumerate(metadata["filename"]), 638 total=len(metadata), 639 desc="Landmark derivation", 640 ): 641 label = metadata.loc[metadata["filename"] == image, "class"].values[0] 642 derived_coords, updated_detection_dict = self.derive( 643 class_name=label, 644 detection_dict=outputs[image]["detection_dict"], 645 derivation_dict=self.derivation_dict, 646 landmark_coords=outputs[image]["coords"], 647 np_mask=outputs[image]["mask"], 648 ) 649 outputs[image]["detection_dict"] = updated_detection_dict 650 651 # Step 10: Save segmentation image 652 if save_segmentation_image and ( 653 use_bg_color or self.do_derive or self.do_refine 654 ): 655 for idx, image in tqdm( 656 enumerate(metadata["filename"]), 657 total=len(metadata), 658 desc="Save segmentation image", 659 ): 660 transformed_name = os.path.splitext(image)[0] 661 Image.fromarray(outputs[image]["mask"]).save( 662 f"{self.output_dir}/mask_image/{transformed_name}_mask.png" 663 ) 664 metadata.at[ 665 idx, "mask_image" 666 ] = f"{self.output_dir}/mask_image/{transformed_name}_mask.png" 667 if use_bg_color: 668 Image.fromarray(outputs[image]["bg_modified_image"]).save( 669 f"{self.output_dir}/bg_modified_image/{transformed_name}_bg_modified.png" 670 ) 671 metadata.at[ 672 idx, "bg_modified_image" 673 ] = f"{self.output_dir}/bg_modified_image/{transformed_name}_bg_modified.png" 674 675 # Step 10: Save measurement image 676 if save_measurement_image: 677 for idx, image in tqdm( 678 enumerate(metadata["filename"]), 679 total=len(metadata), 680 desc="Save measurement image", 681 ): 682 label = metadata.loc[metadata["filename"] == image, "class"].values[0] 683 transformed_name = os.path.splitext(image)[0] 684 685 image_to_save = Image.open(f"{self.input_dir}/{image}").convert("RGB") 686 draw = ImageDraw.Draw(image_to_save) 687 font = ImageFont.load_default() 688 landmarks = outputs[image]["detection_dict"][label]["landmarks"] 689 690 for lm_id, lm_data in landmarks.items(): 691 x, y = lm_data["x"], lm_data["y"] 692 radius = 5 693 draw.ellipse( 694 (x - radius, y - radius, x + radius, y + radius), fill="green" 695 ) 696 draw.text((x + 8, y - 8), lm_id, fill="green", font=font) 697 698 image_to_save.save( 699 f"{self.output_dir}/measurement_image/{transformed_name}_measurement.png" 700 ) 701 metadata.at[ 702 idx, "measurement_image" 703 ] = f"{self.output_dir}/measurement_image/{transformed_name}_measurement.png" 704 705 # Step 11: Save measurement json 706 for idx, image in tqdm( 707 enumerate(metadata["filename"]), 708 total=len(metadata), 709 desc="Save measurement json", 710 ): 711 label = metadata.loc[metadata["filename"] == image, "class"].values[0] 712 transformed_name = os.path.splitext(image)[0] 713 714 # Clean the detection dictionary 715 final_dict = utils.clean_detection_dict( 716 class_name=label, 717 image_name=image, 718 detection_dict=outputs[image]["detection_dict"], 719 ) 720 721 # Export JSON 722 utils.export_dict_to_json( 723 data=final_dict, 724 filename=f"{self.output_dir}/measurement_json/{transformed_name}_measurement.json", 725 ) 726 727 metadata.at[ 728 idx, "measurement_json" 729 ] = f"{self.output_dir}/measurement_json/{transformed_name}_measurement.json" 730 731 # Step 12: Save metadata as a CSV 732 metadata.to_csv(f"{self.output_dir}/metadata.csv", index=False) 733 734 return metadata, outputs
The tailor
class acts as a central agent for the GarmentIQ pipeline,
orchestrating garment measurement from classification to landmark derivation.
It integrates functionalities from other modules (classification, segmentation, landmark) to provide a smooth end-to-end process for automated garment measurement from images.
Attributes:
- input_dir (str): Directory containing input images.
- model_dir (str): Directory where models are stored.
- output_dir (str): Directory to save processed outputs.
- class_dict (dict): Dictionary defining garment classes and their properties.
- do_derive (bool): Flag to enable landmark derivation.
- do_refine (bool): Flag to enable landmark refinement.
- classification_model_path (str): Path to the classification model.
- classification_model_class (Type[nn.Module]): Class definition for the classification model.
- classification_model_args (Dict): Arguments for the classification model.
- segmentation_model_name (str): Name or path for the segmentation model.
- segmentation_model_args (Dict): Arguments for the segmentation model.
- landmark_detection_model_path (str): Path to the landmark detection model.
- landmark_detection_model_class (Type[nn.Module]): Class definition for the landmark detection model.
- landmark_detection_model_args (Dict): Arguments for the landmark detection model.
- refinement_args (Optional[Dict]): Arguments for landmark refinement.
- derivation_dict (Optional[Dict]): Dictionary for landmark derivation rules.
44 def __init__( 45 self, 46 input_dir: str, 47 model_dir: str, 48 output_dir: str, 49 class_dict: dict, 50 do_derive: bool, 51 do_refine: bool, 52 classification_model_path: str, 53 classification_model_class: Type[nn.Module], 54 classification_model_args: Dict, 55 segmentation_model_name: str, 56 segmentation_model_args: Dict, 57 landmark_detection_model_path: str, 58 landmark_detection_model_class: Type[nn.Module], 59 landmark_detection_model_args: Dict, 60 refinement_args: Optional[Dict] = None, 61 derivation_dict: Optional[Dict] = None, 62 ): 63 """ 64 Initializes the `tailor` agent with paths, model configurations, and processing flags. 65 66 Args: 67 input_dir (str): Path to the directory containing input images. 68 model_dir (str): Path to the directory where all required models are stored. 69 output_dir (str): Path to the directory where all processed outputs will be saved. 70 class_dict (dict): A dictionary defining the garment classes, their predefined points, 71 index ranges, and instruction JSON file paths. 72 do_derive (bool): If True, enables the landmark derivation step. 73 do_refine (bool): If True, enables the landmark refinement step. 74 classification_model_path (str): The filename or relative path to the classification model. 75 classification_model_class (Type[nn.Module]): The Python class of the classification model. 76 classification_model_args (Dict): A dictionary of arguments to initialize the classification model. 77 segmentation_model_name (str): The name or path of the pretrained segmentation model. 78 segmentation_model_args (Dict): A dictionary of arguments for the segmentation model. 79 landmark_detection_model_path (str): The filename or relative path to the landmark detection model. 80 landmark_detection_model_class (Type[nn.Module]): The Python class of the landmark detection model. 81 landmark_detection_model_args (Dict): A dictionary of arguments for the landmark detection model. 82 refinement_args (Optional[Dict]): Optional arguments for the refinement process, 83 e.g., `window_size`, `ksize`, `sigmaX`. Defaults to None. 84 derivation_dict (Optional[Dict]): A dictionary defining derivation rules for non-predefined landmarks. 85 Required if `do_derive` is True. 86 87 Raises: 88 ValueError: If `do_derive` is True but `derivation_dict` is None. 89 """ 90 # Directories 91 self.input_dir = input_dir 92 self.model_dir = model_dir 93 self.output_dir = output_dir 94 95 # Classes 96 self.class_dict = class_dict 97 self.classes = sorted(list(class_dict.keys())) 98 99 # Derivation 100 self.do_derive = do_derive 101 if self.do_derive: 102 if derivation_dict is None: 103 raise ValueError( 104 "`derivation_dict` must be provided if `do_derive=True`." 105 ) 106 self.derivation_dict = derivation_dict 107 else: 108 self.derivation_dict = None 109 110 # Refinement setup 111 self.do_refine = do_refine 112 self.do_refine = do_refine 113 if self.do_refine: 114 if refinement_args is None: 115 self.refinement_args = {} 116 self.refinement_args = refinement_args 117 else: 118 self.refinement_args = None 119 120 # Classification model setup 121 self.classification_model_path = classification_model_path 122 self.classification_model_args = classification_model_args 123 self.classification_model_class = classification_model_class 124 filtered_model_args = { 125 k: v 126 for k, v in self.classification_model_args.items() 127 if k not in ("resize_dim", "normalize_mean", "normalize_std") 128 } 129 130 # Load the model using the filtered arguments 131 self.classification_model = classification.load_model( 132 model_path=f"{self.model_dir}/{self.classification_model_path}", 133 model_class=self.classification_model_class, 134 model_args=filtered_model_args, 135 ) 136 137 # Segmentation model setup 138 self.segmentation_model_name = segmentation_model_name 139 self.segmentation_model_args = segmentation_model_args 140 self.segmentation_has_bg_color = "background_color" in segmentation_model_args 141 self.segmentation_model = segmentation.load_model( 142 pretrained_model=self.segmentation_model_name, 143 pretrained_model_args={ 144 "trust_remote_code": segmentation_model_args["trust_remote_code"] 145 }, 146 high_precision=segmentation_model_args["high_precision"], 147 ) 148 149 # Landmark detection model setup 150 self.landmark_detection_model_path = landmark_detection_model_path 151 self.landmark_detection_model_class = landmark_detection_model_class 152 self.landmark_detection_model_args = landmark_detection_model_args 153 self.landmark_detection_model = landmark.detection.load_model( 154 model_path=f"{self.model_dir}/{self.landmark_detection_model_path}", 155 model_class=self.landmark_detection_model_class, 156 )
Initializes the tailor
agent with paths, model configurations, and processing flags.
Arguments:
- input_dir (str): Path to the directory containing input images.
- model_dir (str): Path to the directory where all required models are stored.
- output_dir (str): Path to the directory where all processed outputs will be saved.
- class_dict (dict): A dictionary defining the garment classes, their predefined points, index ranges, and instruction JSON file paths.
- do_derive (bool): If True, enables the landmark derivation step.
- do_refine (bool): If True, enables the landmark refinement step.
- classification_model_path (str): The filename or relative path to the classification model.
- classification_model_class (Type[nn.Module]): The Python class of the classification model.
- classification_model_args (Dict): A dictionary of arguments to initialize the classification model.
- segmentation_model_name (str): The name or path of the pretrained segmentation model.
- segmentation_model_args (Dict): A dictionary of arguments for the segmentation model.
- landmark_detection_model_path (str): The filename or relative path to the landmark detection model.
- landmark_detection_model_class (Type[nn.Module]): The Python class of the landmark detection model.
- landmark_detection_model_args (Dict): A dictionary of arguments for the landmark detection model.
- refinement_args (Optional[Dict]): Optional arguments for the refinement process,
e.g.,
window_size
,ksize
,sigmaX
. Defaults to None. - derivation_dict (Optional[Dict]): A dictionary defining derivation rules for non-predefined landmarks.
Required if
do_derive
is True.
Raises:
- ValueError: If
do_derive
is True butderivation_dict
is None.
158 def summary(self): 159 """ 160 Prints a summary of the `tailor` agent's configuration, including directory paths, 161 defined classes, processing options (refine, derive), and loaded models. 162 """ 163 width = 80 164 sep = "=" * width 165 166 print(sep) 167 print("TAILOR AGENT SUMMARY".center(width)) 168 print(sep) 169 170 # Directories 171 print("DIRECTORY PATHS".center(width, "-")) 172 print(f"{'Input directory:':25} {self.input_dir}") 173 print(f"{'Model directory:':25} {self.model_dir}") 174 print(f"{'Output directory:':25} {self.output_dir}") 175 print() 176 177 # Classes 178 print("CLASSES".center(width, "-")) 179 print(f"{'Class Index':<11} | Class Name") 180 print(f"{'-'*11} | {'-'*66}") 181 for i, cls in enumerate(self.classes): 182 print(f"{i:<11} | {cls}") 183 print() 184 185 # Flags 186 print("OPTIONS".center(width, "-")) 187 print(f"{'Do refine?:':25} {self.do_refine}") 188 print(f"{'Do derive?:':25} {self.do_derive}") 189 print() 190 191 # Models 192 print("MODELS".center(width, "-")) 193 print( 194 f"{'Classification Model:':25} {self.classification_model_class.__name__}" 195 ) 196 print(f"{'Segmentation Model:':25} {self.segmentation_model_name}") 197 print(f"{' └─ Change BG color?:':25} {self.segmentation_has_bg_color}") 198 print( 199 f"{'Landmark Detection Model:':25} {self.landmark_detection_model_class.__class__.__name__}" 200 ) 201 print(sep)
Prints a summary of the tailor
agent's configuration, including directory paths,
defined classes, processing options (refine, derive), and loaded models.
203 def classify(self, image: str, verbose=False): 204 """ 205 Classifies a single garment image using the configured classification model. 206 207 Args: 208 image (str): The filename of the image to classify, located in `self.input_dir`. 209 verbose (bool): If True, prints detailed classification output. Defaults to False. 210 211 Returns: 212 tuple: 213 - label (str): The predicted class label of the garment. 214 - probabilities (List[float]): A list of probabilities for each class. 215 """ 216 label, probablities = classification.predict( 217 model=self.classification_model, 218 image_path=f"{self.input_dir}/{image}", 219 classes=self.classes, 220 resize_dim=self.classification_model_args.get("resize_dim"), 221 normalize_mean=self.classification_model_args.get("normalize_mean"), 222 normalize_std=self.classification_model_args.get("normalize_std"), 223 verbose=verbose, 224 ) 225 return label, probablities
Classifies a single garment image using the configured classification model.
Arguments:
- image (str): The filename of the image to classify, located in
self.input_dir
. - verbose (bool): If True, prints detailed classification output. Defaults to False.
Returns:
tuple: - label (str): The predicted class label of the garment. - probabilities (List[float]): A list of probabilities for each class.
227 def segment(self, image: str): 228 """ 229 Segments a single garment image to extract its mask and optionally modifies the background color. 230 231 Args: 232 image (str): The filename of the image to segment, located in `self.input_dir`. 233 234 Returns: 235 tuple: 236 - original_img (np.ndarray): The original image with the mask overlaid. 237 - mask (np.ndarray): The binary segmentation mask. 238 - bg_modified_img (np.ndarray, optional): The image with the background color changed, 239 returned only if `background_color` is specified 240 in `segmentation_model_args`. 241 """ 242 original_img, mask = segmentation.extract( 243 model=self.segmentation_model, 244 image_path=f"{self.input_dir}/{image}", 245 resize_dim=self.segmentation_model_args.get("resize_dim"), 246 normalize_mean=self.segmentation_model_args.get("normalize_mean"), 247 normalize_std=self.segmentation_model_args.get("normalize_std"), 248 high_precision=self.segmentation_model_args.get("high_precision"), 249 ) 250 251 background_color = self.segmentation_model_args.get("background_color") 252 253 if background_color is None: 254 return original_img, mask 255 else: 256 bg_modified_img = segmentation.change_background_color( 257 image_np=original_img, mask_np=mask, background_color=background_color 258 ) 259 return original_img, mask, bg_modified_img
Segments a single garment image to extract its mask and optionally modifies the background color.
Arguments:
- image (str): The filename of the image to segment, located in
self.input_dir
.
Returns:
tuple: - original_img (np.ndarray): The original image with the mask overlaid. - mask (np.ndarray): The binary segmentation mask. - bg_modified_img (np.ndarray, optional): The image with the background color changed, returned only if
background_color
is specified insegmentation_model_args
.
261 def detect(self, class_name: str, image: Union[str, np.ndarray]): 262 """ 263 Detects predefined landmarks on a garment image based on its classified class. 264 265 Args: 266 class_name (str): The classified name of the garment. 267 image (Union[str, np.ndarray]): The path to the image file or a NumPy array of the image. 268 269 Returns: 270 tuple: 271 - coords (np.array): Detected landmark coordinates. 272 - maxval (np.array): Confidence scores for detected landmarks. 273 - detection_dict (dict): A dictionary containing detailed landmark detection data. 274 """ 275 if isinstance(image, str): 276 image = f"{self.input_dir}/{image}" 277 278 coords, maxval, detection_dict = landmark.detect( 279 class_name=class_name, 280 class_dict=self.class_dict, 281 image_path=image, 282 model=self.landmark_detection_model, 283 scale_std=self.landmark_detection_model_args.get("scale_std"), 284 resize_dim=self.landmark_detection_model_args.get("resize_dim"), 285 normalize_mean=self.landmark_detection_model_args.get("normalize_mean"), 286 normalize_std=self.landmark_detection_model_args.get("normalize_std"), 287 ) 288 return coords, maxval, detection_dict
Detects predefined landmarks on a garment image based on its classified class.
Arguments:
- class_name (str): The classified name of the garment.
- image (Union[str, np.ndarray]): The path to the image file or a NumPy array of the image.
Returns:
tuple: - coords (np.array): Detected landmark coordinates. - maxval (np.array): Confidence scores for detected landmarks. - detection_dict (dict): A dictionary containing detailed landmark detection data.
290 def derive( 291 self, 292 class_name: str, 293 detection_dict: dict, 294 derivation_dict: dict, 295 landmark_coords: np.array, 296 np_mask: np.array, 297 ): 298 """ 299 Derives non-predefined landmark coordinates based on predefined landmarks and a mask. 300 301 Args: 302 class_name (str): The name of the garment class. 303 detection_dict (dict): The dictionary containing detected landmarks. 304 derivation_dict (dict): The dictionary defining derivation rules. 305 landmark_coords (np.array): NumPy array of initial landmark coordinates. 306 np_mask (np.array): NumPy array of the segmentation mask. 307 308 Returns: 309 tuple: 310 - derived_coords (dict): A dictionary of the newly derived landmark coordinates. 311 - updated_detection_dict (dict): The detection dictionary updated with derived landmarks. 312 """ 313 derived_coords, updated_detection_dict = landmark.derive( 314 class_name=class_name, 315 detection_dict=detection_dict, 316 derivation_dict=derivation_dict, 317 landmark_coords=landmark_coords, 318 np_mask=np_mask, 319 ) 320 return derived_coords, updated_detection_dict
Derives non-predefined landmark coordinates based on predefined landmarks and a mask.
Arguments:
- class_name (str): The name of the garment class.
- detection_dict (dict): The dictionary containing detected landmarks.
- derivation_dict (dict): The dictionary defining derivation rules.
- landmark_coords (np.array): NumPy array of initial landmark coordinates.
- np_mask (np.array): NumPy array of the segmentation mask.
Returns:
tuple: - derived_coords (dict): A dictionary of the newly derived landmark coordinates. - updated_detection_dict (dict): The detection dictionary updated with derived landmarks.
322 def refine( 323 self, 324 class_name: str, 325 detection_np: np.array, 326 detection_conf: np.array, 327 detection_dict: dict, 328 mask: np.array, 329 window_size: int = 5, 330 ksize: tuple = (11, 11), 331 sigmaX: float = 0.0, 332 ): 333 """ 334 Refines detected landmark coordinates using a blurred segmentation mask. 335 336 Args: 337 class_name (str): The name of the garment class. 338 detection_np (np.array): NumPy array of initial landmark predictions. 339 detection_conf (np.array): NumPy array of confidence scores for each predicted landmark. 340 detection_dict (dict): Dictionary containing landmark data for each class. 341 mask (np.array): Grayscale mask image used to guide refinement. 342 window_size (int, optional): Size of the window used in the refinement algorithm. Defaults to 5. 343 ksize (tuple, optional): Kernel size for Gaussian blur. Must be odd integers. Defaults to (11, 11). 344 sigmaX (float, optional): Gaussian kernel standard deviation in the X direction. Defaults to 0.0. 345 346 Returns: 347 tuple: 348 - refined_detection_np (np.array): Array of the same shape as `detection_np` with refined coordinates. 349 - detection_dict (dict): Updated detection dictionary with refined landmark coordinates. 350 """ 351 if self.refinement_args: 352 if self.refinement_args.get("window_size") is not None: 353 window_size = self.refinement_args["window_size"] 354 if self.refinement_args.get("ksize") is not None: 355 ksize = self.refinement_args["ksize"] 356 if self.refinement_args.get("sigmaX") is not None: 357 sigmaX = self.refinement_args["sigmaX"] 358 359 refined_detection_np, refined_detection_dict = landmark.refine( 360 class_name=class_name, 361 detection_np=detection_np, 362 detection_conf=detection_conf, 363 detection_dict=detection_dict, 364 mask=mask, 365 window_size=window_size, 366 ksize=ksize, 367 sigmaX=sigmaX, 368 ) 369 370 return refined_detection_np, refined_detection_dict
Refines detected landmark coordinates using a blurred segmentation mask.
Arguments:
- class_name (str): The name of the garment class.
- detection_np (np.array): NumPy array of initial landmark predictions.
- detection_conf (np.array): NumPy array of confidence scores for each predicted landmark.
- detection_dict (dict): Dictionary containing landmark data for each class.
- mask (np.array): Grayscale mask image used to guide refinement.
- window_size (int, optional): Size of the window used in the refinement algorithm. Defaults to 5.
- ksize (tuple, optional): Kernel size for Gaussian blur. Must be odd integers. Defaults to (11, 11).
- sigmaX (float, optional): Gaussian kernel standard deviation in the X direction. Defaults to 0.0.
Returns:
tuple: - refined_detection_np (np.array): Array of the same shape as
detection_np
with refined coordinates. - detection_dict (dict): Updated detection dictionary with refined landmark coordinates.
372 def measure( 373 self, 374 save_segmentation_image: bool = False, 375 save_measurement_image: bool = False, 376 ): 377 """ 378 Executes the full garment measurement pipeline for all images in the input directory. 379 380 This method processes each image through a multi-stage pipeline that includes garment classification, 381 segmentation, landmark detection, optional refinement, and measurement derivation. During classification, 382 the system identifies the type of garment (e.g., shirt, dress, pants). Segmentation follows, producing 383 binary or instance masks that separate the garment from the background. Landmark detection is then 384 performed to locate anatomical or garment-specific keypoints such as shoulders or waist positions. If 385 enabled, an optional refinement step applies post-processing or model-based corrections to improve the 386 accuracy of detected keypoints. Finally, the system calculates key garment dimensions - such as chest width, 387 waist width, and full length - based on the detected landmarks. In addition to this processing pipeline, 388 the method also manages data and visual output exports. For each input image, a cleaned JSON file is 389 generated containing the predicted garment class, landmark coordinates, and the resulting measurements. 390 Optionally, visual outputs such as segmentation masks and images annotated with landmarks and measurements 391 can be saved to assist in inspection or debugging. 392 393 Args: 394 save_segmentation_image (bool): If True, saves segmentation masks and background-modified images. 395 Defaults to False. 396 save_measurement_image (bool): If True, saves images overlaid with detected landmarks and measurements. 397 Defaults to False. 398 399 Returns: 400 tuple: 401 - metadata (pd.DataFrame): A DataFrame containing metadata for each processed image, such as: 402 - Original image path 403 - Paths to any saved segmentation or annotated images 404 - Class and measurement results 405 - outputs (dict): A dictionary mapping image filenames to their detailed processing results, including: 406 - Predicted class 407 - Detected landmarks with coordinates and confidence scores 408 - Calculated measurements 409 - File paths to any saved images (if applicable) 410 411 Example of exported JSON: 412 ``` 413 { 414 "cloth_3.jpg": { 415 "class": "vest dress", 416 "landmarks": { 417 "10": { 418 "conf": 0.7269417643547058, 419 "x": 611.0, 420 "y": 861.0 421 }, 422 "16": { 423 "conf": 0.6769524812698364, 424 "x": 1226.0, 425 "y": 838.0 426 }, 427 "17": { 428 "conf": 0.7472652196884155, 429 "x": 1213.0, 430 "y": 726.0 431 }, 432 "18": { 433 "conf": 0.7360446453094482, 434 "x": 1238.0, 435 "y": 613.0 436 }, 437 "2": { 438 "conf": 0.9256571531295776, 439 "x": 703.0, 440 "y": 264.0 441 }, 442 "20": { 443 "x": 700.936, 444 "y": 2070.0 445 }, 446 "8": { 447 "conf": 0.7129100561141968, 448 "x": 563.0, 449 "y": 613.0 450 }, 451 "9": { 452 "conf": 0.8203497529029846, 453 "x": 598.0, 454 "y": 726.0 455 } 456 }, 457 "measurements": { 458 "chest": { 459 "distance": 675.0, 460 "landmarks": { 461 "end": "18", 462 "start": "8" 463 } 464 }, 465 "full length": { 466 "distance": 1806.0011794281863, 467 "landmarks": { 468 "end": "20", 469 "start": "2" 470 } 471 }, 472 "hips": { 473 "distance": 615.4299310238331, 474 "landmarks": { 475 "end": "16", 476 "start": "10" 477 } 478 }, 479 "waist": { 480 "distance": 615.0, 481 "landmarks": { 482 "end": "17", 483 "start": "9" 484 } 485 } 486 } 487 } 488 } 489 ``` 490 """ 491 # Some helper variables 492 use_bg_color = self.segmentation_model_args.get("background_color") is not None 493 outputs = {} 494 495 # Step 1: Create the output directory 496 Path(self.output_dir).mkdir(parents=True, exist_ok=True) 497 Path(f"{self.output_dir}/measurement_json").mkdir(parents=True, exist_ok=True) 498 499 if save_segmentation_image and ( 500 use_bg_color or self.do_derive or self.do_refine 501 ): 502 Path(f"{self.output_dir}/mask_image").mkdir(parents=True, exist_ok=True) 503 if use_bg_color: 504 Path(f"{self.output_dir}/bg_modified_image").mkdir( 505 parents=True, exist_ok=True 506 ) 507 508 if save_measurement_image: 509 Path(f"{self.output_dir}/measurement_image").mkdir( 510 parents=True, exist_ok=True 511 ) 512 513 # Step 2: Collect image filenames from input_dir 514 image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.tiff"] 515 input_path = Path(self.input_dir) 516 517 image_files = [] 518 for ext in image_extensions: 519 image_files.extend(input_path.glob(ext)) 520 521 # Step 3: Determine column structure 522 columns = [ 523 "filename", 524 "class", 525 "mask_image" if use_bg_color or self.do_derive or self.do_refine else None, 526 "bg_modified_image" if use_bg_color else None, 527 "measurement_image", 528 "measurement_json", 529 ] 530 columns = [col for col in columns if col is not None] 531 532 metadata = pd.DataFrame(columns=columns) 533 metadata["filename"] = [img.name for img in image_files] 534 535 # Step 4: Print start message and information 536 print(f"Start measuring {len(metadata['filename'])} garment images ...") 537 538 if self.do_derive and self.do_refine: 539 message = ( 540 "There are 5 measurement steps: classification, segmentation, " 541 "landmark detection, landmark refinement, and landmark derivation." 542 ) 543 elif self.do_derive: 544 message = ( 545 "There are 4 measurement steps: classification, segmentation, " 546 "landmark detection, and landmark derivation." 547 ) 548 elif self.do_refine: 549 message = ( 550 "There are 4 measurement steps: classification, segmentation, " 551 "landmark detection, and landmark refinement." 552 ) 553 elif use_bg_color: 554 message = ( 555 "There are 3 measurement steps: classification, segmentation, " 556 "and landmark detection." 557 ) 558 else: 559 message = ( 560 "There are 2 measurement steps: classification and landmark detection." 561 ) 562 563 print(textwrap.fill(message, width=80)) 564 565 # Step 5: Classification 566 for idx, image in tqdm( 567 enumerate(metadata["filename"]), total=len(metadata), desc="Classification" 568 ): 569 label, _ = self.classify(image=image, verbose=False) 570 metadata.at[idx, "class"] = label 571 outputs[image] = {} 572 573 # Step 6: Segmentation 574 if use_bg_color or (self.do_derive or self.do_refine): 575 for idx, image in tqdm( 576 enumerate(metadata["filename"]), 577 total=len(metadata), 578 desc="Segmentation", 579 ): 580 if use_bg_color: 581 original_img, mask, bg_modified_image = self.segment(image=image) 582 outputs[image] = { 583 "mask": mask, 584 "bg_modified_image": bg_modified_image, 585 } 586 else: 587 original_img, mask = self.segment(image=image) 588 outputs[image] = { 589 "mask": mask, 590 } 591 592 # Step 7: Landmark detection 593 for idx, image in tqdm( 594 enumerate(metadata["filename"]), 595 total=len(metadata), 596 desc="Landmark detection", 597 ): 598 label = metadata.loc[metadata["filename"] == image, "class"].values[0] 599 if use_bg_color: 600 coords, maxvals, detection_dict = self.detect( 601 class_name=label, image=outputs[image]["bg_modified_image"] 602 ) 603 outputs[image]["detection_dict"] = detection_dict 604 if self.do_derive or self.do_refine: 605 outputs[image]["coords"] = coords 606 outputs[image]["maxvals"] = maxvals 607 else: 608 coords, maxvals, detection_dict = self.detect( 609 class_name=label, image=image 610 ) 611 outputs[image]["detection_dict"] = detection_dict 612 if self.do_derive or self.do_refine: 613 outputs[image]["coords"] = coords 614 outputs[image]["maxvals"] = maxvals 615 616 # Step 8: Landmark refinement 617 if self.do_refine: 618 for idx, image in tqdm( 619 enumerate(metadata["filename"]), 620 total=len(metadata), 621 desc="Landmark refinement", 622 ): 623 label = metadata.loc[metadata["filename"] == image, "class"].values[0] 624 updated_coords, updated_detection_dict = self.refine( 625 class_name=label, 626 detection_np=outputs[image]["coords"], 627 detection_conf=outputs[image]["maxvals"], 628 detection_dict=outputs[image]["detection_dict"], 629 mask=outputs[image]["mask"], 630 ) 631 outputs[image]["coords"] = updated_coords 632 outputs[image]["detection_dict"] = updated_detection_dict 633 634 # Step 9: Landmark derivation 635 if self.do_derive: 636 for idx, image in tqdm( 637 enumerate(metadata["filename"]), 638 total=len(metadata), 639 desc="Landmark derivation", 640 ): 641 label = metadata.loc[metadata["filename"] == image, "class"].values[0] 642 derived_coords, updated_detection_dict = self.derive( 643 class_name=label, 644 detection_dict=outputs[image]["detection_dict"], 645 derivation_dict=self.derivation_dict, 646 landmark_coords=outputs[image]["coords"], 647 np_mask=outputs[image]["mask"], 648 ) 649 outputs[image]["detection_dict"] = updated_detection_dict 650 651 # Step 10: Save segmentation image 652 if save_segmentation_image and ( 653 use_bg_color or self.do_derive or self.do_refine 654 ): 655 for idx, image in tqdm( 656 enumerate(metadata["filename"]), 657 total=len(metadata), 658 desc="Save segmentation image", 659 ): 660 transformed_name = os.path.splitext(image)[0] 661 Image.fromarray(outputs[image]["mask"]).save( 662 f"{self.output_dir}/mask_image/{transformed_name}_mask.png" 663 ) 664 metadata.at[ 665 idx, "mask_image" 666 ] = f"{self.output_dir}/mask_image/{transformed_name}_mask.png" 667 if use_bg_color: 668 Image.fromarray(outputs[image]["bg_modified_image"]).save( 669 f"{self.output_dir}/bg_modified_image/{transformed_name}_bg_modified.png" 670 ) 671 metadata.at[ 672 idx, "bg_modified_image" 673 ] = f"{self.output_dir}/bg_modified_image/{transformed_name}_bg_modified.png" 674 675 # Step 10: Save measurement image 676 if save_measurement_image: 677 for idx, image in tqdm( 678 enumerate(metadata["filename"]), 679 total=len(metadata), 680 desc="Save measurement image", 681 ): 682 label = metadata.loc[metadata["filename"] == image, "class"].values[0] 683 transformed_name = os.path.splitext(image)[0] 684 685 image_to_save = Image.open(f"{self.input_dir}/{image}").convert("RGB") 686 draw = ImageDraw.Draw(image_to_save) 687 font = ImageFont.load_default() 688 landmarks = outputs[image]["detection_dict"][label]["landmarks"] 689 690 for lm_id, lm_data in landmarks.items(): 691 x, y = lm_data["x"], lm_data["y"] 692 radius = 5 693 draw.ellipse( 694 (x - radius, y - radius, x + radius, y + radius), fill="green" 695 ) 696 draw.text((x + 8, y - 8), lm_id, fill="green", font=font) 697 698 image_to_save.save( 699 f"{self.output_dir}/measurement_image/{transformed_name}_measurement.png" 700 ) 701 metadata.at[ 702 idx, "measurement_image" 703 ] = f"{self.output_dir}/measurement_image/{transformed_name}_measurement.png" 704 705 # Step 11: Save measurement json 706 for idx, image in tqdm( 707 enumerate(metadata["filename"]), 708 total=len(metadata), 709 desc="Save measurement json", 710 ): 711 label = metadata.loc[metadata["filename"] == image, "class"].values[0] 712 transformed_name = os.path.splitext(image)[0] 713 714 # Clean the detection dictionary 715 final_dict = utils.clean_detection_dict( 716 class_name=label, 717 image_name=image, 718 detection_dict=outputs[image]["detection_dict"], 719 ) 720 721 # Export JSON 722 utils.export_dict_to_json( 723 data=final_dict, 724 filename=f"{self.output_dir}/measurement_json/{transformed_name}_measurement.json", 725 ) 726 727 metadata.at[ 728 idx, "measurement_json" 729 ] = f"{self.output_dir}/measurement_json/{transformed_name}_measurement.json" 730 731 # Step 12: Save metadata as a CSV 732 metadata.to_csv(f"{self.output_dir}/metadata.csv", index=False) 733 734 return metadata, outputs
Executes the full garment measurement pipeline for all images in the input directory.
This method processes each image through a multi-stage pipeline that includes garment classification, segmentation, landmark detection, optional refinement, and measurement derivation. During classification, the system identifies the type of garment (e.g., shirt, dress, pants). Segmentation follows, producing binary or instance masks that separate the garment from the background. Landmark detection is then performed to locate anatomical or garment-specific keypoints such as shoulders or waist positions. If enabled, an optional refinement step applies post-processing or model-based corrections to improve the accuracy of detected keypoints. Finally, the system calculates key garment dimensions - such as chest width, waist width, and full length - based on the detected landmarks. In addition to this processing pipeline, the method also manages data and visual output exports. For each input image, a cleaned JSON file is generated containing the predicted garment class, landmark coordinates, and the resulting measurements. Optionally, visual outputs such as segmentation masks and images annotated with landmarks and measurements can be saved to assist in inspection or debugging.
Arguments:
- save_segmentation_image (bool): If True, saves segmentation masks and background-modified images. Defaults to False.
- save_measurement_image (bool): If True, saves images overlaid with detected landmarks and measurements. Defaults to False.
Returns:
tuple: - metadata (pd.DataFrame): A DataFrame containing metadata for each processed image, such as: - Original image path - Paths to any saved segmentation or annotated images - Class and measurement results - outputs (dict): A dictionary mapping image filenames to their detailed processing results, including: - Predicted class - Detected landmarks with coordinates and confidence scores - Calculated measurements - File paths to any saved images (if applicable)
Example of exported JSON:
{ "cloth_3.jpg": { "class": "vest dress", "landmarks": { "10": { "conf": 0.7269417643547058, "x": 611.0, "y": 861.0 }, "16": { "conf": 0.6769524812698364, "x": 1226.0, "y": 838.0 }, "17": { "conf": 0.7472652196884155, "x": 1213.0, "y": 726.0 }, "18": { "conf": 0.7360446453094482, "x": 1238.0, "y": 613.0 }, "2": { "conf": 0.9256571531295776, "x": 703.0, "y": 264.0 }, "20": { "x": 700.936, "y": 2070.0 }, "8": { "conf": 0.7129100561141968, "x": 563.0, "y": 613.0 }, "9": { "conf": 0.8203497529029846, "x": 598.0, "y": 726.0 } }, "measurements": { "chest": { "distance": 675.0, "landmarks": { "end": "18", "start": "8" } }, "full length": { "distance": 1806.0011794281863, "landmarks": { "end": "20", "start": "2" } }, "hips": { "distance": 615.4299310238331, "landmarks": { "end": "16", "start": "10" } }, "waist": { "distance": 615.0, "landmarks": { "end": "17", "start": "9" } } } } }