garmentiq.tailor

View Source

  1import os
  2from typing import List, Dict, Type, Any, Optional, Union
  3import torch.nn as nn
  4import numpy as np
  5from pathlib import Path
  6import pandas as pd
  7from tqdm.auto import tqdm
  8import textwrap
  9from PIL import Image, ImageDraw, ImageFont
 10from . import classification
 11from . import segmentation
 12from . import landmark
 13from . import utils
 14
 15
 16class tailor:
 17    """
 18    The `tailor` class acts as a central agent for the GarmentIQ pipeline,
 19    orchestrating garment measurement from classification to landmark derivation.
 20
 21    It integrates functionalities from other modules (classification, segmentation, landmark)
 22    to provide a smooth end-to-end process for automated garment measurement from images.
 23
 24    Attributes:
 25        input_dir (str): Directory containing input images.
 26        model_dir (str): Directory where models are stored.
 27        output_dir (str): Directory to save processed outputs.
 28        class_dict (dict): Dictionary defining garment classes and their properties.
 29        do_derive (bool): Flag to enable landmark derivation.
 30        do_refine (bool): Flag to enable landmark refinement.
 31        classification_model_path (str): Path to the classification model.
 32        classification_model_class (Type[nn.Module]): Class definition for the classification model.
 33        classification_model_args (Dict): Arguments for the classification model.
 34        segmentation_model_name (str): Name or path for the segmentation model.
 35        segmentation_model_args (Dict): Arguments for the segmentation model.
 36        landmark_detection_model_path (str): Path to the landmark detection model.
 37        landmark_detection_model_class (Type[nn.Module]): Class definition for the landmark detection model.
 38        landmark_detection_model_args (Dict): Arguments for the landmark detection model.
 39        refinement_args (Optional[Dict]): Arguments for landmark refinement.
 40        derivation_dict (Optional[Dict]): Dictionary for landmark derivation rules.
 41    """
 42
 43    def __init__(
 44        self,
 45        input_dir: str,
 46        model_dir: str,
 47        output_dir: str,
 48        class_dict: dict,
 49        do_derive: bool,
 50        do_refine: bool,
 51        classification_model_path: str,
 52        classification_model_class: Type[nn.Module],
 53        classification_model_args: Dict,
 54        segmentation_model_name: str,
 55        segmentation_model_args: Dict,
 56        landmark_detection_model_path: str,
 57        landmark_detection_model_class: Type[nn.Module],
 58        landmark_detection_model_args: Dict,
 59        refinement_args: Optional[Dict] = None,
 60        derivation_dict: Optional[Dict] = None,
 61    ):
 62        """
 63        Initializes the `tailor` agent with paths, model configurations, and processing flags.
 64
 65        Args:
 66            input_dir (str): Path to the directory containing input images.
 67            model_dir (str): Path to the directory where all required models are stored.
 68            output_dir (str): Path to the directory where all processed outputs will be saved.
 69            class_dict (dict): A dictionary defining the garment classes, their predefined points,
 70                                index ranges, and instruction JSON file paths.
 71            do_derive (bool): If True, enables the landmark derivation step.
 72            do_refine (bool): If True, enables the landmark refinement step.
 73            classification_model_path (str): The filename or relative path to the classification model.
 74            classification_model_class (Type[nn.Module]): The Python class of the classification model.
 75            classification_model_args (Dict): A dictionary of arguments to initialize the classification model.
 76            segmentation_model_name (str): The name or path of the pretrained segmentation model.
 77            segmentation_model_args (Dict): A dictionary of arguments for the segmentation model.
 78            landmark_detection_model_path (str): The filename or relative path to the landmark detection model.
 79            landmark_detection_model_class (Type[nn.Module]): The Python class of the landmark detection model.
 80            landmark_detection_model_args (Dict): A dictionary of arguments for the landmark detection model.
 81            refinement_args (Optional[Dict]): Optional arguments for the refinement process,
 82                                              e.g., `window_size`, `ksize`, `sigmaX`. Defaults to None.
 83            derivation_dict (Optional[Dict]): A dictionary defining derivation rules for non-predefined landmarks.
 84                                               Required if `do_derive` is True.
 85
 86        Raises:
 87            ValueError: If `do_derive` is True but `derivation_dict` is None.
 88        """
 89        # Directories
 90        self.input_dir = input_dir
 91        self.model_dir = model_dir
 92        self.output_dir = output_dir
 93
 94        # Classes
 95        self.class_dict = class_dict
 96        self.classes = sorted(list(class_dict.keys()))
 97
 98        # Derivation
 99        self.do_derive = do_derive
100        if self.do_derive:
101            if derivation_dict is None:
102                raise ValueError(
103                    "`derivation_dict` must be provided if `do_derive=True`."
104                )
105            self.derivation_dict = derivation_dict
106        else:
107            self.derivation_dict = None
108
109        # Refinement setup
110        self.do_refine = do_refine
111        self.do_refine = do_refine
112        if self.do_refine:
113            if refinement_args is None:
114                self.refinement_args = {}
115            self.refinement_args = refinement_args
116        else:
117            self.refinement_args = None
118
119        # Classification model setup
120        self.classification_model_path = classification_model_path
121        self.classification_model_args = classification_model_args
122        self.classification_model_class = classification_model_class
123        filtered_model_args = {
124            k: v
125            for k, v in self.classification_model_args.items()
126            if k not in ("resize_dim", "normalize_mean", "normalize_std")
127        }
128
129        # Load the model using the filtered arguments
130        self.classification_model = classification.load_model(
131            model_path=f"{self.model_dir}/{self.classification_model_path}",
132            model_class=self.classification_model_class,
133            model_args=filtered_model_args,
134        )
135
136        # Segmentation model setup
137        self.segmentation_model_name = segmentation_model_name
138        self.segmentation_model_args = segmentation_model_args
139        self.segmentation_has_bg_color = "background_color" in segmentation_model_args
140        self.segmentation_model = segmentation.load_model(
141            pretrained_model=self.segmentation_model_name,
142            pretrained_model_args={
143                "trust_remote_code": segmentation_model_args["trust_remote_code"]
144            },
145            high_precision=segmentation_model_args["high_precision"],
146        )
147
148        # Landmark detection model setup
149        self.landmark_detection_model_path = landmark_detection_model_path
150        self.landmark_detection_model_class = landmark_detection_model_class
151        self.landmark_detection_model_args = landmark_detection_model_args
152        self.landmark_detection_model = landmark.detection.load_model(
153            model_path=f"{self.model_dir}/{self.landmark_detection_model_path}",
154            model_class=self.landmark_detection_model_class,
155        )
156
157    def summary(self):
158        """
159        Prints a summary of the `tailor` agent's configuration, including directory paths,
160        defined classes, processing options (refine, derive), and loaded models.
161        """
162        width = 80
163        sep = "=" * width
164
165        print(sep)
166        print("TAILOR AGENT SUMMARY".center(width))
167        print(sep)
168
169        # Directories
170        print("DIRECTORY PATHS".center(width, "-"))
171        print(f"{'Input directory:':25} {self.input_dir}")
172        print(f"{'Model directory:':25} {self.model_dir}")
173        print(f"{'Output directory:':25} {self.output_dir}")
174        print()
175
176        # Classes
177        print("CLASSES".center(width, "-"))
178        print(f"{'Class Index':<11} | Class Name")
179        print(f"{'-'*11} | {'-'*66}")
180        for i, cls in enumerate(self.classes):
181            print(f"{i:<11} | {cls}")
182        print()
183
184        # Flags
185        print("OPTIONS".center(width, "-"))
186        print(f"{'Do refine?:':25} {self.do_refine}")
187        print(f"{'Do derive?:':25} {self.do_derive}")
188        print()
189
190        # Models
191        print("MODELS".center(width, "-"))
192        print(
193            f"{'Classification Model:':25} {self.classification_model_class.__name__}"
194        )
195        print(f"{'Segmentation Model:':25} {self.segmentation_model_name}")
196        print(f"{'  └─ Change BG color?:':25} {self.segmentation_has_bg_color}")
197        print(
198            f"{'Landmark Detection Model:':25} {self.landmark_detection_model_class.__class__.__name__}"
199        )
200        print(sep)
201
202    def classify(self, image: str, verbose=False):
203        """
204        Classifies a single garment image using the configured classification model.
205
206        Args:
207            image (str): The filename of the image to classify, located in `self.input_dir`.
208            verbose (bool): If True, prints detailed classification output. Defaults to False.
209
210        Returns:
211            tuple:
212                - label (str): The predicted class label of the garment.
213                - probabilities (List[float]): A list of probabilities for each class.
214        """
215        label, probablities = classification.predict(
216            model=self.classification_model,
217            image_path=f"{self.input_dir}/{image}",
218            classes=self.classes,
219            resize_dim=self.classification_model_args.get("resize_dim"),
220            normalize_mean=self.classification_model_args.get("normalize_mean"),
221            normalize_std=self.classification_model_args.get("normalize_std"),
222            verbose=verbose,
223        )
224        return label, probablities
225
226    def segment(self, image: str):
227        """
228        Segments a single garment image to extract its mask and optionally modifies the background color.
229
230        Args:
231            image (str): The filename of the image to segment, located in `self.input_dir`.
232
233        Returns:
234            tuple:
235                - original_img (np.ndarray): The original image with the mask overlaid.
236                - mask (np.ndarray): The binary segmentation mask.
237                - bg_modified_img (np.ndarray, optional): The image with the background color changed,
238                                                         returned only if `background_color` is specified
239                                                         in `segmentation_model_args`.
240        """
241        original_img, mask = segmentation.extract(
242            model=self.segmentation_model,
243            image_path=f"{self.input_dir}/{image}",
244            resize_dim=self.segmentation_model_args.get("resize_dim"),
245            normalize_mean=self.segmentation_model_args.get("normalize_mean"),
246            normalize_std=self.segmentation_model_args.get("normalize_std"),
247            high_precision=self.segmentation_model_args.get("high_precision"),
248        )
249
250        background_color = self.segmentation_model_args.get("background_color")
251
252        if background_color is None:
253            return original_img, mask
254        else:
255            bg_modified_img = segmentation.change_background_color(
256                image_np=original_img, mask_np=mask, background_color=background_color
257            )
258            return original_img, mask, bg_modified_img
259
260    def detect(self, class_name: str, image: Union[str, np.ndarray]):
261        """
262        Detects predefined landmarks on a garment image based on its classified class.
263
264        Args:
265            class_name (str): The classified name of the garment.
266            image (Union[str, np.ndarray]): The path to the image file or a NumPy array of the image.
267
268        Returns:
269            tuple:
270                - coords (np.array): Detected landmark coordinates.
271                - maxval (np.array): Confidence scores for detected landmarks.
272                - detection_dict (dict): A dictionary containing detailed landmark detection data.
273        """
274        if isinstance(image, str):
275            image = f"{self.input_dir}/{image}"
276
277        coords, maxval, detection_dict = landmark.detect(
278            class_name=class_name,
279            class_dict=self.class_dict,
280            image_path=image,
281            model=self.landmark_detection_model,
282            scale_std=self.landmark_detection_model_args.get("scale_std"),
283            resize_dim=self.landmark_detection_model_args.get("resize_dim"),
284            normalize_mean=self.landmark_detection_model_args.get("normalize_mean"),
285            normalize_std=self.landmark_detection_model_args.get("normalize_std"),
286        )
287        return coords, maxval, detection_dict
288
289    def derive(
290        self,
291        class_name: str,
292        detection_dict: dict,
293        derivation_dict: dict,
294        landmark_coords: np.array,
295        np_mask: np.array,
296    ):
297        """
298        Derives non-predefined landmark coordinates based on predefined landmarks and a mask.
299
300        Args:
301            class_name (str): The name of the garment class.
302            detection_dict (dict): The dictionary containing detected landmarks.
303            derivation_dict (dict): The dictionary defining derivation rules.
304            landmark_coords (np.array): NumPy array of initial landmark coordinates.
305            np_mask (np.array): NumPy array of the segmentation mask.
306
307        Returns:
308            tuple:
309                - derived_coords (dict): A dictionary of the newly derived landmark coordinates.
310                - updated_detection_dict (dict): The detection dictionary updated with derived landmarks.
311        """
312        derived_coords, updated_detection_dict = landmark.derive(
313            class_name=class_name,
314            detection_dict=detection_dict,
315            derivation_dict=derivation_dict,
316            landmark_coords=landmark_coords,
317            np_mask=np_mask,
318        )
319        return derived_coords, updated_detection_dict
320
321    def refine(
322        self,
323        class_name: str,
324        detection_np: np.array,
325        detection_conf: np.array,
326        detection_dict: dict,
327        mask: np.array,
328        window_size: int = 5,
329        ksize: tuple = (11, 11),
330        sigmaX: float = 0.0,
331    ):
332        """
333        Refines detected landmark coordinates using a blurred segmentation mask.
334
335        Args:
336            class_name (str): The name of the garment class.
337            detection_np (np.array): NumPy array of initial landmark predictions.
338            detection_conf (np.array): NumPy array of confidence scores for each predicted landmark.
339            detection_dict (dict): Dictionary containing landmark data for each class.
340            mask (np.array): Grayscale mask image used to guide refinement.
341            window_size (int, optional): Size of the window used in the refinement algorithm. Defaults to 5.
342            ksize (tuple, optional): Kernel size for Gaussian blur. Must be odd integers. Defaults to (11, 11).
343            sigmaX (float, optional): Gaussian kernel standard deviation in the X direction. Defaults to 0.0.
344
345        Returns:
346            tuple:
347                - refined_detection_np (np.array): Array of the same shape as `detection_np` with refined coordinates.
348                - detection_dict (dict): Updated detection dictionary with refined landmark coordinates.
349        """
350        if self.refinement_args:
351            if self.refinement_args.get("window_size") is not None:
352                window_size = self.refinement_args["window_size"]
353            if self.refinement_args.get("ksize") is not None:
354                ksize = self.refinement_args["ksize"]
355            if self.refinement_args.get("sigmaX") is not None:
356                sigmaX = self.refinement_args["sigmaX"]
357
358        refined_detection_np, refined_detection_dict = landmark.refine(
359            class_name=class_name,
360            detection_np=detection_np,
361            detection_conf=detection_conf,
362            detection_dict=detection_dict,
363            mask=mask,
364            window_size=window_size,
365            ksize=ksize,
366            sigmaX=sigmaX,
367        )
368
369        return refined_detection_np, refined_detection_dict
370
371    def measure(
372        self,
373        save_segmentation_image: bool = False,
374        save_measurement_image: bool = False,
375    ):
376        """
377        Executes the full garment measurement pipeline for all images in the input directory.
378    
379        This method processes each image through a multi-stage pipeline that includes garment classification, 
380        segmentation, landmark detection, optional refinement, and measurement derivation. During classification, 
381        the system identifies the type of garment (e.g., shirt, dress, pants). Segmentation follows, producing 
382        binary or instance masks that separate the garment from the background. Landmark detection is then 
383        performed to locate anatomical or garment-specific keypoints such as shoulders or waist positions. If 
384        enabled, an optional refinement step applies post-processing or model-based corrections to improve the 
385        accuracy of detected keypoints. Finally, the system calculates key garment dimensions - such as chest width, 
386        waist width, and full length - based on the detected landmarks. In addition to this processing pipeline, 
387        the method also manages data and visual output exports. For each input image, a cleaned JSON file is 
388        generated containing the predicted garment class, landmark coordinates, and the resulting measurements. 
389        Optionally, visual outputs such as segmentation masks and images annotated with landmarks and measurements 
390        can be saved to assist in inspection or debugging.
391    
392        Args:
393            save_segmentation_image (bool): If True, saves segmentation masks and background-modified images.
394                                            Defaults to False.
395            save_measurement_image (bool): If True, saves images overlaid with detected landmarks and measurements.
396                                           Defaults to False.
397    
398        Returns:
399            tuple:
400                - metadata (pd.DataFrame): A DataFrame containing metadata for each processed image, such as:
401                    - Original image path
402                    - Paths to any saved segmentation or annotated images
403                    - Class and measurement results
404                - outputs (dict): A dictionary mapping image filenames to their detailed processing results, including:
405                    - Predicted class
406                    - Detected landmarks with coordinates and confidence scores
407                    - Calculated measurements
408                    - File paths to any saved images (if applicable)
409    
410        Example of exported JSON:
411            ```
412            {
413                "cloth_3.jpg": {
414                    "class": "vest dress",
415                    "landmarks": {
416                        "10": {
417                            "conf": 0.7269417643547058,
418                            "x": 611.0,
419                            "y": 861.0
420                        },
421                        "16": {
422                            "conf": 0.6769524812698364,
423                            "x": 1226.0,
424                            "y": 838.0
425                        },
426                        "17": {
427                            "conf": 0.7472652196884155,
428                            "x": 1213.0,
429                            "y": 726.0
430                        },
431                        "18": {
432                            "conf": 0.7360446453094482,
433                            "x": 1238.0,
434                            "y": 613.0
435                        },
436                        "2": {
437                            "conf": 0.9256571531295776,
438                            "x": 703.0,
439                            "y": 264.0
440                        },
441                        "20": {
442                            "x": 700.936,
443                            "y": 2070.0
444                        },
445                        "8": {
446                            "conf": 0.7129100561141968,
447                            "x": 563.0,
448                            "y": 613.0
449                        },
450                        "9": {
451                            "conf": 0.8203497529029846,
452                            "x": 598.0,
453                            "y": 726.0
454                        }
455                    },
456                    "measurements": {
457                        "chest": {
458                            "distance": 675.0,
459                            "landmarks": {
460                                "end": "18",
461                                "start": "8"
462                            }
463                        },
464                        "full length": {
465                            "distance": 1806.0011794281863,
466                            "landmarks": {
467                                "end": "20",
468                                "start": "2"
469                            }
470                        },
471                        "hips": {
472                            "distance": 615.4299310238331,
473                            "landmarks": {
474                                "end": "16",
475                                "start": "10"
476                            }
477                        },
478                        "waist": {
479                            "distance": 615.0,
480                            "landmarks": {
481                                "end": "17",
482                                "start": "9"
483                            }
484                        }
485                    }
486                }
487            }
488            ```
489        """
490        # Some helper variables
491        use_bg_color = self.segmentation_model_args.get("background_color") is not None
492        outputs = {}
493
494        # Step 1: Create the output directory
495        Path(self.output_dir).mkdir(parents=True, exist_ok=True)
496        Path(f"{self.output_dir}/measurement_json").mkdir(parents=True, exist_ok=True)
497
498        if save_segmentation_image and (
499            use_bg_color or self.do_derive or self.do_refine
500        ):
501            Path(f"{self.output_dir}/mask_image").mkdir(parents=True, exist_ok=True)
502            if use_bg_color:
503                Path(f"{self.output_dir}/bg_modified_image").mkdir(
504                    parents=True, exist_ok=True
505                )
506
507        if save_measurement_image:
508            Path(f"{self.output_dir}/measurement_image").mkdir(
509                parents=True, exist_ok=True
510            )
511
512        # Step 2: Collect image filenames from input_dir
513        image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.tiff"]
514        input_path = Path(self.input_dir)
515
516        image_files = []
517        for ext in image_extensions:
518            image_files.extend(input_path.glob(ext))
519
520        # Step 3: Determine column structure
521        columns = [
522            "filename",
523            "class",
524            "mask_image" if use_bg_color or self.do_derive or self.do_refine else None,
525            "bg_modified_image" if use_bg_color else None,
526            "measurement_image",
527            "measurement_json",
528        ]
529        columns = [col for col in columns if col is not None]
530
531        metadata = pd.DataFrame(columns=columns)
532        metadata["filename"] = [img.name for img in image_files]
533
534        # Step 4: Print start message and information
535        print(f"Start measuring {len(metadata['filename'])} garment images ...")
536
537        if self.do_derive and self.do_refine:
538            message = (
539                "There are 5 measurement steps: classification, segmentation, "
540                "landmark detection, landmark refinement, and landmark derivation."
541            )
542        elif self.do_derive:
543            message = (
544                "There are 4 measurement steps: classification, segmentation, "
545                "landmark detection, and landmark derivation."
546            )
547        elif self.do_refine:
548            message = (
549                "There are 4 measurement steps: classification, segmentation, "
550                "landmark detection, and landmark refinement."
551            )
552        elif use_bg_color:
553            message = (
554                "There are 3 measurement steps: classification, segmentation, "
555                "and landmark detection."
556            )
557        else:
558            message = (
559                "There are 2 measurement steps: classification and landmark detection."
560            )
561
562        print(textwrap.fill(message, width=80))
563
564        # Step 5: Classification
565        for idx, image in tqdm(
566            enumerate(metadata["filename"]), total=len(metadata), desc="Classification"
567        ):
568            label, _ = self.classify(image=image, verbose=False)
569            metadata.at[idx, "class"] = label
570            outputs[image] = {}
571
572        # Step 6: Segmentation
573        if use_bg_color or (self.do_derive or self.do_refine):
574            for idx, image in tqdm(
575                enumerate(metadata["filename"]),
576                total=len(metadata),
577                desc="Segmentation",
578            ):
579                if use_bg_color:
580                    original_img, mask, bg_modified_image = self.segment(image=image)
581                    outputs[image] = {
582                        "mask": mask,
583                        "bg_modified_image": bg_modified_image,
584                    }
585                else:
586                    original_img, mask = self.segment(image=image)
587                    outputs[image] = {
588                        "mask": mask,
589                    }
590
591        # Step 7: Landmark detection
592        for idx, image in tqdm(
593            enumerate(metadata["filename"]),
594            total=len(metadata),
595            desc="Landmark detection",
596        ):
597            label = metadata.loc[metadata["filename"] == image, "class"].values[0]
598            if use_bg_color:
599                coords, maxvals, detection_dict = self.detect(
600                    class_name=label, image=outputs[image]["bg_modified_image"]
601                )
602                outputs[image]["detection_dict"] = detection_dict
603                if self.do_derive or self.do_refine:
604                    outputs[image]["coords"] = coords
605                    outputs[image]["maxvals"] = maxvals
606            else:
607                coords, maxvals, detection_dict = self.detect(
608                    class_name=label, image=image
609                )
610                outputs[image]["detection_dict"] = detection_dict
611                if self.do_derive or self.do_refine:
612                    outputs[image]["coords"] = coords
613                    outputs[image]["maxvals"] = maxvals
614
615        # Step 8: Landmark refinement
616        if self.do_refine:
617            for idx, image in tqdm(
618                enumerate(metadata["filename"]),
619                total=len(metadata),
620                desc="Landmark refinement",
621            ):
622                label = metadata.loc[metadata["filename"] == image, "class"].values[0]
623                updated_coords, updated_detection_dict = self.refine(
624                    class_name=label,
625                    detection_np=outputs[image]["coords"],
626                    detection_conf=outputs[image]["maxvals"],
627                    detection_dict=outputs[image]["detection_dict"],
628                    mask=outputs[image]["mask"],
629                )
630                outputs[image]["coords"] = updated_coords
631                outputs[image]["detection_dict"] = updated_detection_dict
632
633        # Step 9: Landmark derivation
634        if self.do_derive:
635            for idx, image in tqdm(
636                enumerate(metadata["filename"]),
637                total=len(metadata),
638                desc="Landmark derivation",
639            ):
640                label = metadata.loc[metadata["filename"] == image, "class"].values[0]
641                derived_coords, updated_detection_dict = self.derive(
642                    class_name=label,
643                    detection_dict=outputs[image]["detection_dict"],
644                    derivation_dict=self.derivation_dict,
645                    landmark_coords=outputs[image]["coords"],
646                    np_mask=outputs[image]["mask"],
647                )
648                outputs[image]["detection_dict"] = updated_detection_dict
649
650        # Step 10: Save segmentation image
651        if save_segmentation_image and (
652            use_bg_color or self.do_derive or self.do_refine
653        ):
654            for idx, image in tqdm(
655                enumerate(metadata["filename"]),
656                total=len(metadata),
657                desc="Save segmentation image",
658            ):
659                transformed_name = os.path.splitext(image)[0]
660                Image.fromarray(outputs[image]["mask"]).save(
661                    f"{self.output_dir}/mask_image/{transformed_name}_mask.png"
662                )
663                metadata.at[
664                    idx, "mask_image"
665                ] = f"{self.output_dir}/mask_image/{transformed_name}_mask.png"
666                if use_bg_color:
667                    Image.fromarray(outputs[image]["bg_modified_image"]).save(
668                        f"{self.output_dir}/bg_modified_image/{transformed_name}_bg_modified.png"
669                    )
670                    metadata.at[
671                        idx, "bg_modified_image"
672                    ] = f"{self.output_dir}/bg_modified_image/{transformed_name}_bg_modified.png"
673
674        # Step 10: Save measurement image
675        if save_measurement_image:
676            for idx, image in tqdm(
677                enumerate(metadata["filename"]),
678                total=len(metadata),
679                desc="Save measurement image",
680            ):
681                label = metadata.loc[metadata["filename"] == image, "class"].values[0]
682                transformed_name = os.path.splitext(image)[0]
683
684                image_to_save = Image.open(f"{self.input_dir}/{image}").convert("RGB")
685                draw = ImageDraw.Draw(image_to_save)
686                font = ImageFont.load_default()
687                landmarks = outputs[image]["detection_dict"][label]["landmarks"]
688
689                for lm_id, lm_data in landmarks.items():
690                    x, y = lm_data["x"], lm_data["y"]
691                    radius = 5
692                    draw.ellipse(
693                        (x - radius, y - radius, x + radius, y + radius), fill="green"
694                    )
695                    draw.text((x + 8, y - 8), lm_id, fill="green", font=font)
696
697                image_to_save.save(
698                    f"{self.output_dir}/measurement_image/{transformed_name}_measurement.png"
699                )
700                metadata.at[
701                    idx, "measurement_image"
702                ] = f"{self.output_dir}/measurement_image/{transformed_name}_measurement.png"
703
704        # Step 11: Save measurement json
705        for idx, image in tqdm(
706            enumerate(metadata["filename"]),
707            total=len(metadata),
708            desc="Save measurement json",
709        ):
710            label = metadata.loc[metadata["filename"] == image, "class"].values[0]
711            transformed_name = os.path.splitext(image)[0]
712
713            # Clean the detection dictionary
714            final_dict = utils.clean_detection_dict(
715                class_name=label,
716                image_name=image,
717                detection_dict=outputs[image]["detection_dict"],
718            )
719
720            # Export JSON
721            utils.export_dict_to_json(
722                data=final_dict,
723                filename=f"{self.output_dir}/measurement_json/{transformed_name}_measurement.json",
724            )
725
726            metadata.at[
727                idx, "measurement_json"
728            ] = f"{self.output_dir}/measurement_json/{transformed_name}_measurement.json"
729
730        # Step 12: Save metadata as a CSV
731        metadata.to_csv(f"{self.output_dir}/metadata.csv", index=False)
732
733        return metadata, outputs

class tailor: View Source

 17class tailor:
 18    """
 19    The `tailor` class acts as a central agent for the GarmentIQ pipeline,
 20    orchestrating garment measurement from classification to landmark derivation.
 21
 22    It integrates functionalities from other modules (classification, segmentation, landmark)
 23    to provide a smooth end-to-end process for automated garment measurement from images.
 24
 25    Attributes:
 26        input_dir (str): Directory containing input images.
 27        model_dir (str): Directory where models are stored.
 28        output_dir (str): Directory to save processed outputs.
 29        class_dict (dict): Dictionary defining garment classes and their properties.
 30        do_derive (bool): Flag to enable landmark derivation.
 31        do_refine (bool): Flag to enable landmark refinement.
 32        classification_model_path (str): Path to the classification model.
 33        classification_model_class (Type[nn.Module]): Class definition for the classification model.
 34        classification_model_args (Dict): Arguments for the classification model.
 35        segmentation_model_name (str): Name or path for the segmentation model.
 36        segmentation_model_args (Dict): Arguments for the segmentation model.
 37        landmark_detection_model_path (str): Path to the landmark detection model.
 38        landmark_detection_model_class (Type[nn.Module]): Class definition for the landmark detection model.
 39        landmark_detection_model_args (Dict): Arguments for the landmark detection model.
 40        refinement_args (Optional[Dict]): Arguments for landmark refinement.
 41        derivation_dict (Optional[Dict]): Dictionary for landmark derivation rules.
 42    """
 43
 44    def __init__(
 45        self,
 46        input_dir: str,
 47        model_dir: str,
 48        output_dir: str,
 49        class_dict: dict,
 50        do_derive: bool,
 51        do_refine: bool,
 52        classification_model_path: str,
 53        classification_model_class: Type[nn.Module],
 54        classification_model_args: Dict,
 55        segmentation_model_name: str,
 56        segmentation_model_args: Dict,
 57        landmark_detection_model_path: str,
 58        landmark_detection_model_class: Type[nn.Module],
 59        landmark_detection_model_args: Dict,
 60        refinement_args: Optional[Dict] = None,
 61        derivation_dict: Optional[Dict] = None,
 62    ):
 63        """
 64        Initializes the `tailor` agent with paths, model configurations, and processing flags.
 65
 66        Args:
 67            input_dir (str): Path to the directory containing input images.
 68            model_dir (str): Path to the directory where all required models are stored.
 69            output_dir (str): Path to the directory where all processed outputs will be saved.
 70            class_dict (dict): A dictionary defining the garment classes, their predefined points,
 71                                index ranges, and instruction JSON file paths.
 72            do_derive (bool): If True, enables the landmark derivation step.
 73            do_refine (bool): If True, enables the landmark refinement step.
 74            classification_model_path (str): The filename or relative path to the classification model.
 75            classification_model_class (Type[nn.Module]): The Python class of the classification model.
 76            classification_model_args (Dict): A dictionary of arguments to initialize the classification model.
 77            segmentation_model_name (str): The name or path of the pretrained segmentation model.
 78            segmentation_model_args (Dict): A dictionary of arguments for the segmentation model.
 79            landmark_detection_model_path (str): The filename or relative path to the landmark detection model.
 80            landmark_detection_model_class (Type[nn.Module]): The Python class of the landmark detection model.
 81            landmark_detection_model_args (Dict): A dictionary of arguments for the landmark detection model.
 82            refinement_args (Optional[Dict]): Optional arguments for the refinement process,
 83                                              e.g., `window_size`, `ksize`, `sigmaX`. Defaults to None.
 84            derivation_dict (Optional[Dict]): A dictionary defining derivation rules for non-predefined landmarks.
 85                                               Required if `do_derive` is True.
 86
 87        Raises:
 88            ValueError: If `do_derive` is True but `derivation_dict` is None.
 89        """
 90        # Directories
 91        self.input_dir = input_dir
 92        self.model_dir = model_dir
 93        self.output_dir = output_dir
 94
 95        # Classes
 96        self.class_dict = class_dict
 97        self.classes = sorted(list(class_dict.keys()))
 98
 99        # Derivation
100        self.do_derive = do_derive
101        if self.do_derive:
102            if derivation_dict is None:
103                raise ValueError(
104                    "`derivation_dict` must be provided if `do_derive=True`."
105                )
106            self.derivation_dict = derivation_dict
107        else:
108            self.derivation_dict = None
109
110        # Refinement setup
111        self.do_refine = do_refine
112        self.do_refine = do_refine
113        if self.do_refine:
114            if refinement_args is None:
115                self.refinement_args = {}
116            self.refinement_args = refinement_args
117        else:
118            self.refinement_args = None
119
120        # Classification model setup
121        self.classification_model_path = classification_model_path
122        self.classification_model_args = classification_model_args
123        self.classification_model_class = classification_model_class
124        filtered_model_args = {
125            k: v
126            for k, v in self.classification_model_args.items()
127            if k not in ("resize_dim", "normalize_mean", "normalize_std")
128        }
129
130        # Load the model using the filtered arguments
131        self.classification_model = classification.load_model(
132            model_path=f"{self.model_dir}/{self.classification_model_path}",
133            model_class=self.classification_model_class,
134            model_args=filtered_model_args,
135        )
136
137        # Segmentation model setup
138        self.segmentation_model_name = segmentation_model_name
139        self.segmentation_model_args = segmentation_model_args
140        self.segmentation_has_bg_color = "background_color" in segmentation_model_args
141        self.segmentation_model = segmentation.load_model(
142            pretrained_model=self.segmentation_model_name,
143            pretrained_model_args={
144                "trust_remote_code": segmentation_model_args["trust_remote_code"]
145            },
146            high_precision=segmentation_model_args["high_precision"],
147        )
148
149        # Landmark detection model setup
150        self.landmark_detection_model_path = landmark_detection_model_path
151        self.landmark_detection_model_class = landmark_detection_model_class
152        self.landmark_detection_model_args = landmark_detection_model_args
153        self.landmark_detection_model = landmark.detection.load_model(
154            model_path=f"{self.model_dir}/{self.landmark_detection_model_path}",
155            model_class=self.landmark_detection_model_class,
156        )
157
158    def summary(self):
159        """
160        Prints a summary of the `tailor` agent's configuration, including directory paths,
161        defined classes, processing options (refine, derive), and loaded models.
162        """
163        width = 80
164        sep = "=" * width
165
166        print(sep)
167        print("TAILOR AGENT SUMMARY".center(width))
168        print(sep)
169
170        # Directories
171        print("DIRECTORY PATHS".center(width, "-"))
172        print(f"{'Input directory:':25} {self.input_dir}")
173        print(f"{'Model directory:':25} {self.model_dir}")
174        print(f"{'Output directory:':25} {self.output_dir}")
175        print()
176
177        # Classes
178        print("CLASSES".center(width, "-"))
179        print(f"{'Class Index':<11} | Class Name")
180        print(f"{'-'*11} | {'-'*66}")
181        for i, cls in enumerate(self.classes):
182            print(f"{i:<11} | {cls}")
183        print()
184
185        # Flags
186        print("OPTIONS".center(width, "-"))
187        print(f"{'Do refine?:':25} {self.do_refine}")
188        print(f"{'Do derive?:':25} {self.do_derive}")
189        print()
190
191        # Models
192        print("MODELS".center(width, "-"))
193        print(
194            f"{'Classification Model:':25} {self.classification_model_class.__name__}"
195        )
196        print(f"{'Segmentation Model:':25} {self.segmentation_model_name}")
197        print(f"{'  └─ Change BG color?:':25} {self.segmentation_has_bg_color}")
198        print(
199            f"{'Landmark Detection Model:':25} {self.landmark_detection_model_class.__class__.__name__}"
200        )
201        print(sep)
202
203    def classify(self, image: str, verbose=False):
204        """
205        Classifies a single garment image using the configured classification model.
206
207        Args:
208            image (str): The filename of the image to classify, located in `self.input_dir`.
209            verbose (bool): If True, prints detailed classification output. Defaults to False.
210
211        Returns:
212            tuple:
213                - label (str): The predicted class label of the garment.
214                - probabilities (List[float]): A list of probabilities for each class.
215        """
216        label, probablities = classification.predict(
217            model=self.classification_model,
218            image_path=f"{self.input_dir}/{image}",
219            classes=self.classes,
220            resize_dim=self.classification_model_args.get("resize_dim"),
221            normalize_mean=self.classification_model_args.get("normalize_mean"),
222            normalize_std=self.classification_model_args.get("normalize_std"),
223            verbose=verbose,
224        )
225        return label, probablities
226
227    def segment(self, image: str):
228        """
229        Segments a single garment image to extract its mask and optionally modifies the background color.
230
231        Args:
232            image (str): The filename of the image to segment, located in `self.input_dir`.
233
234        Returns:
235            tuple:
236                - original_img (np.ndarray): The original image with the mask overlaid.
237                - mask (np.ndarray): The binary segmentation mask.
238                - bg_modified_img (np.ndarray, optional): The image with the background color changed,
239                                                         returned only if `background_color` is specified
240                                                         in `segmentation_model_args`.
241        """
242        original_img, mask = segmentation.extract(
243            model=self.segmentation_model,
244            image_path=f"{self.input_dir}/{image}",
245            resize_dim=self.segmentation_model_args.get("resize_dim"),
246            normalize_mean=self.segmentation_model_args.get("normalize_mean"),
247            normalize_std=self.segmentation_model_args.get("normalize_std"),
248            high_precision=self.segmentation_model_args.get("high_precision"),
249        )
250
251        background_color = self.segmentation_model_args.get("background_color")
252
253        if background_color is None:
254            return original_img, mask
255        else:
256            bg_modified_img = segmentation.change_background_color(
257                image_np=original_img, mask_np=mask, background_color=background_color
258            )
259            return original_img, mask, bg_modified_img
260
261    def detect(self, class_name: str, image: Union[str, np.ndarray]):
262        """
263        Detects predefined landmarks on a garment image based on its classified class.
264
265        Args:
266            class_name (str): The classified name of the garment.
267            image (Union[str, np.ndarray]): The path to the image file or a NumPy array of the image.
268
269        Returns:
270            tuple:
271                - coords (np.array): Detected landmark coordinates.
272                - maxval (np.array): Confidence scores for detected landmarks.
273                - detection_dict (dict): A dictionary containing detailed landmark detection data.
274        """
275        if isinstance(image, str):
276            image = f"{self.input_dir}/{image}"
277
278        coords, maxval, detection_dict = landmark.detect(
279            class_name=class_name,
280            class_dict=self.class_dict,
281            image_path=image,
282            model=self.landmark_detection_model,
283            scale_std=self.landmark_detection_model_args.get("scale_std"),
284            resize_dim=self.landmark_detection_model_args.get("resize_dim"),
285            normalize_mean=self.landmark_detection_model_args.get("normalize_mean"),
286            normalize_std=self.landmark_detection_model_args.get("normalize_std"),
287        )
288        return coords, maxval, detection_dict
289
290    def derive(
291        self,
292        class_name: str,
293        detection_dict: dict,
294        derivation_dict: dict,
295        landmark_coords: np.array,
296        np_mask: np.array,
297    ):
298        """
299        Derives non-predefined landmark coordinates based on predefined landmarks and a mask.
300
301        Args:
302            class_name (str): The name of the garment class.
303            detection_dict (dict): The dictionary containing detected landmarks.
304            derivation_dict (dict): The dictionary defining derivation rules.
305            landmark_coords (np.array): NumPy array of initial landmark coordinates.
306            np_mask (np.array): NumPy array of the segmentation mask.
307
308        Returns:
309            tuple:
310                - derived_coords (dict): A dictionary of the newly derived landmark coordinates.
311                - updated_detection_dict (dict): The detection dictionary updated with derived landmarks.
312        """
313        derived_coords, updated_detection_dict = landmark.derive(
314            class_name=class_name,
315            detection_dict=detection_dict,
316            derivation_dict=derivation_dict,
317            landmark_coords=landmark_coords,
318            np_mask=np_mask,
319        )
320        return derived_coords, updated_detection_dict
321
322    def refine(
323        self,
324        class_name: str,
325        detection_np: np.array,
326        detection_conf: np.array,
327        detection_dict: dict,
328        mask: np.array,
329        window_size: int = 5,
330        ksize: tuple = (11, 11),
331        sigmaX: float = 0.0,
332    ):
333        """
334        Refines detected landmark coordinates using a blurred segmentation mask.
335
336        Args:
337            class_name (str): The name of the garment class.
338            detection_np (np.array): NumPy array of initial landmark predictions.
339            detection_conf (np.array): NumPy array of confidence scores for each predicted landmark.
340            detection_dict (dict): Dictionary containing landmark data for each class.
341            mask (np.array): Grayscale mask image used to guide refinement.
342            window_size (int, optional): Size of the window used in the refinement algorithm. Defaults to 5.
343            ksize (tuple, optional): Kernel size for Gaussian blur. Must be odd integers. Defaults to (11, 11).
344            sigmaX (float, optional): Gaussian kernel standard deviation in the X direction. Defaults to 0.0.
345
346        Returns:
347            tuple:
348                - refined_detection_np (np.array): Array of the same shape as `detection_np` with refined coordinates.
349                - detection_dict (dict): Updated detection dictionary with refined landmark coordinates.
350        """
351        if self.refinement_args:
352            if self.refinement_args.get("window_size") is not None:
353                window_size = self.refinement_args["window_size"]
354            if self.refinement_args.get("ksize") is not None:
355                ksize = self.refinement_args["ksize"]
356            if self.refinement_args.get("sigmaX") is not None:
357                sigmaX = self.refinement_args["sigmaX"]
358
359        refined_detection_np, refined_detection_dict = landmark.refine(
360            class_name=class_name,
361            detection_np=detection_np,
362            detection_conf=detection_conf,
363            detection_dict=detection_dict,
364            mask=mask,
365            window_size=window_size,
366            ksize=ksize,
367            sigmaX=sigmaX,
368        )
369
370        return refined_detection_np, refined_detection_dict
371
372    def measure(
373        self,
374        save_segmentation_image: bool = False,
375        save_measurement_image: bool = False,
376    ):
377        """
378        Executes the full garment measurement pipeline for all images in the input directory.
379    
380        This method processes each image through a multi-stage pipeline that includes garment classification, 
381        segmentation, landmark detection, optional refinement, and measurement derivation. During classification, 
382        the system identifies the type of garment (e.g., shirt, dress, pants). Segmentation follows, producing 
383        binary or instance masks that separate the garment from the background. Landmark detection is then 
384        performed to locate anatomical or garment-specific keypoints such as shoulders or waist positions. If 
385        enabled, an optional refinement step applies post-processing or model-based corrections to improve the 
386        accuracy of detected keypoints. Finally, the system calculates key garment dimensions - such as chest width, 
387        waist width, and full length - based on the detected landmarks. In addition to this processing pipeline, 
388        the method also manages data and visual output exports. For each input image, a cleaned JSON file is 
389        generated containing the predicted garment class, landmark coordinates, and the resulting measurements. 
390        Optionally, visual outputs such as segmentation masks and images annotated with landmarks and measurements 
391        can be saved to assist in inspection or debugging.
392    
393        Args:
394            save_segmentation_image (bool): If True, saves segmentation masks and background-modified images.
395                                            Defaults to False.
396            save_measurement_image (bool): If True, saves images overlaid with detected landmarks and measurements.
397                                           Defaults to False.
398    
399        Returns:
400            tuple:
401                - metadata (pd.DataFrame): A DataFrame containing metadata for each processed image, such as:
402                    - Original image path
403                    - Paths to any saved segmentation or annotated images
404                    - Class and measurement results
405                - outputs (dict): A dictionary mapping image filenames to their detailed processing results, including:
406                    - Predicted class
407                    - Detected landmarks with coordinates and confidence scores
408                    - Calculated measurements
409                    - File paths to any saved images (if applicable)
410    
411        Example of exported JSON:
412            ```
413            {
414                "cloth_3.jpg": {
415                    "class": "vest dress",
416                    "landmarks": {
417                        "10": {
418                            "conf": 0.7269417643547058,
419                            "x": 611.0,
420                            "y": 861.0
421                        },
422                        "16": {
423                            "conf": 0.6769524812698364,
424                            "x": 1226.0,
425                            "y": 838.0
426                        },
427                        "17": {
428                            "conf": 0.7472652196884155,
429                            "x": 1213.0,
430                            "y": 726.0
431                        },
432                        "18": {
433                            "conf": 0.7360446453094482,
434                            "x": 1238.0,
435                            "y": 613.0
436                        },
437                        "2": {
438                            "conf": 0.9256571531295776,
439                            "x": 703.0,
440                            "y": 264.0
441                        },
442                        "20": {
443                            "x": 700.936,
444                            "y": 2070.0
445                        },
446                        "8": {
447                            "conf": 0.7129100561141968,
448                            "x": 563.0,
449                            "y": 613.0
450                        },
451                        "9": {
452                            "conf": 0.8203497529029846,
453                            "x": 598.0,
454                            "y": 726.0
455                        }
456                    },
457                    "measurements": {
458                        "chest": {
459                            "distance": 675.0,
460                            "landmarks": {
461                                "end": "18",
462                                "start": "8"
463                            }
464                        },
465                        "full length": {
466                            "distance": 1806.0011794281863,
467                            "landmarks": {
468                                "end": "20",
469                                "start": "2"
470                            }
471                        },
472                        "hips": {
473                            "distance": 615.4299310238331,
474                            "landmarks": {
475                                "end": "16",
476                                "start": "10"
477                            }
478                        },
479                        "waist": {
480                            "distance": 615.0,
481                            "landmarks": {
482                                "end": "17",
483                                "start": "9"
484                            }
485                        }
486                    }
487                }
488            }
489            ```
490        """
491        # Some helper variables
492        use_bg_color = self.segmentation_model_args.get("background_color") is not None
493        outputs = {}
494
495        # Step 1: Create the output directory
496        Path(self.output_dir).mkdir(parents=True, exist_ok=True)
497        Path(f"{self.output_dir}/measurement_json").mkdir(parents=True, exist_ok=True)
498
499        if save_segmentation_image and (
500            use_bg_color or self.do_derive or self.do_refine
501        ):
502            Path(f"{self.output_dir}/mask_image").mkdir(parents=True, exist_ok=True)
503            if use_bg_color:
504                Path(f"{self.output_dir}/bg_modified_image").mkdir(
505                    parents=True, exist_ok=True
506                )
507
508        if save_measurement_image:
509            Path(f"{self.output_dir}/measurement_image").mkdir(
510                parents=True, exist_ok=True
511            )
512
513        # Step 2: Collect image filenames from input_dir
514        image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.tiff"]
515        input_path = Path(self.input_dir)
516
517        image_files = []
518        for ext in image_extensions:
519            image_files.extend(input_path.glob(ext))
520
521        # Step 3: Determine column structure
522        columns = [
523            "filename",
524            "class",
525            "mask_image" if use_bg_color or self.do_derive or self.do_refine else None,
526            "bg_modified_image" if use_bg_color else None,
527            "measurement_image",
528            "measurement_json",
529        ]
530        columns = [col for col in columns if col is not None]
531
532        metadata = pd.DataFrame(columns=columns)
533        metadata["filename"] = [img.name for img in image_files]
534
535        # Step 4: Print start message and information
536        print(f"Start measuring {len(metadata['filename'])} garment images ...")
537
538        if self.do_derive and self.do_refine:
539            message = (
540                "There are 5 measurement steps: classification, segmentation, "
541                "landmark detection, landmark refinement, and landmark derivation."
542            )
543        elif self.do_derive:
544            message = (
545                "There are 4 measurement steps: classification, segmentation, "
546                "landmark detection, and landmark derivation."
547            )
548        elif self.do_refine:
549            message = (
550                "There are 4 measurement steps: classification, segmentation, "
551                "landmark detection, and landmark refinement."
552            )
553        elif use_bg_color:
554            message = (
555                "There are 3 measurement steps: classification, segmentation, "
556                "and landmark detection."
557            )
558        else:
559            message = (
560                "There are 2 measurement steps: classification and landmark detection."
561            )
562
563        print(textwrap.fill(message, width=80))
564
565        # Step 5: Classification
566        for idx, image in tqdm(
567            enumerate(metadata["filename"]), total=len(metadata), desc="Classification"
568        ):
569            label, _ = self.classify(image=image, verbose=False)
570            metadata.at[idx, "class"] = label
571            outputs[image] = {}
572
573        # Step 6: Segmentation
574        if use_bg_color or (self.do_derive or self.do_refine):
575            for idx, image in tqdm(
576                enumerate(metadata["filename"]),
577                total=len(metadata),
578                desc="Segmentation",
579            ):
580                if use_bg_color:
581                    original_img, mask, bg_modified_image = self.segment(image=image)
582                    outputs[image] = {
583                        "mask": mask,
584                        "bg_modified_image": bg_modified_image,
585                    }
586                else:
587                    original_img, mask = self.segment(image=image)
588                    outputs[image] = {
589                        "mask": mask,
590                    }
591
592        # Step 7: Landmark detection
593        for idx, image in tqdm(
594            enumerate(metadata["filename"]),
595            total=len(metadata),
596            desc="Landmark detection",
597        ):
598            label = metadata.loc[metadata["filename"] == image, "class"].values[0]
599            if use_bg_color:
600                coords, maxvals, detection_dict = self.detect(
601                    class_name=label, image=outputs[image]["bg_modified_image"]
602                )
603                outputs[image]["detection_dict"] = detection_dict
604                if self.do_derive or self.do_refine:
605                    outputs[image]["coords"] = coords
606                    outputs[image]["maxvals"] = maxvals
607            else:
608                coords, maxvals, detection_dict = self.detect(
609                    class_name=label, image=image
610                )
611                outputs[image]["detection_dict"] = detection_dict
612                if self.do_derive or self.do_refine:
613                    outputs[image]["coords"] = coords
614                    outputs[image]["maxvals"] = maxvals
615
616        # Step 8: Landmark refinement
617        if self.do_refine:
618            for idx, image in tqdm(
619                enumerate(metadata["filename"]),
620                total=len(metadata),
621                desc="Landmark refinement",
622            ):
623                label = metadata.loc[metadata["filename"] == image, "class"].values[0]
624                updated_coords, updated_detection_dict = self.refine(
625                    class_name=label,
626                    detection_np=outputs[image]["coords"],
627                    detection_conf=outputs[image]["maxvals"],
628                    detection_dict=outputs[image]["detection_dict"],
629                    mask=outputs[image]["mask"],
630                )
631                outputs[image]["coords"] = updated_coords
632                outputs[image]["detection_dict"] = updated_detection_dict
633
634        # Step 9: Landmark derivation
635        if self.do_derive:
636            for idx, image in tqdm(
637                enumerate(metadata["filename"]),
638                total=len(metadata),
639                desc="Landmark derivation",
640            ):
641                label = metadata.loc[metadata["filename"] == image, "class"].values[0]
642                derived_coords, updated_detection_dict = self.derive(
643                    class_name=label,
644                    detection_dict=outputs[image]["detection_dict"],
645                    derivation_dict=self.derivation_dict,
646                    landmark_coords=outputs[image]["coords"],
647                    np_mask=outputs[image]["mask"],
648                )
649                outputs[image]["detection_dict"] = updated_detection_dict
650
651        # Step 10: Save segmentation image
652        if save_segmentation_image and (
653            use_bg_color or self.do_derive or self.do_refine
654        ):
655            for idx, image in tqdm(
656                enumerate(metadata["filename"]),
657                total=len(metadata),
658                desc="Save segmentation image",
659            ):
660                transformed_name = os.path.splitext(image)[0]
661                Image.fromarray(outputs[image]["mask"]).save(
662                    f"{self.output_dir}/mask_image/{transformed_name}_mask.png"
663                )
664                metadata.at[
665                    idx, "mask_image"
666                ] = f"{self.output_dir}/mask_image/{transformed_name}_mask.png"
667                if use_bg_color:
668                    Image.fromarray(outputs[image]["bg_modified_image"]).save(
669                        f"{self.output_dir}/bg_modified_image/{transformed_name}_bg_modified.png"
670                    )
671                    metadata.at[
672                        idx, "bg_modified_image"
673                    ] = f"{self.output_dir}/bg_modified_image/{transformed_name}_bg_modified.png"
674
675        # Step 10: Save measurement image
676        if save_measurement_image:
677            for idx, image in tqdm(
678                enumerate(metadata["filename"]),
679                total=len(metadata),
680                desc="Save measurement image",
681            ):
682                label = metadata.loc[metadata["filename"] == image, "class"].values[0]
683                transformed_name = os.path.splitext(image)[0]
684
685                image_to_save = Image.open(f"{self.input_dir}/{image}").convert("RGB")
686                draw = ImageDraw.Draw(image_to_save)
687                font = ImageFont.load_default()
688                landmarks = outputs[image]["detection_dict"][label]["landmarks"]
689
690                for lm_id, lm_data in landmarks.items():
691                    x, y = lm_data["x"], lm_data["y"]
692                    radius = 5
693                    draw.ellipse(
694                        (x - radius, y - radius, x + radius, y + radius), fill="green"
695                    )
696                    draw.text((x + 8, y - 8), lm_id, fill="green", font=font)
697
698                image_to_save.save(
699                    f"{self.output_dir}/measurement_image/{transformed_name}_measurement.png"
700                )
701                metadata.at[
702                    idx, "measurement_image"
703                ] = f"{self.output_dir}/measurement_image/{transformed_name}_measurement.png"
704
705        # Step 11: Save measurement json
706        for idx, image in tqdm(
707            enumerate(metadata["filename"]),
708            total=len(metadata),
709            desc="Save measurement json",
710        ):
711            label = metadata.loc[metadata["filename"] == image, "class"].values[0]
712            transformed_name = os.path.splitext(image)[0]
713
714            # Clean the detection dictionary
715            final_dict = utils.clean_detection_dict(
716                class_name=label,
717                image_name=image,
718                detection_dict=outputs[image]["detection_dict"],
719            )
720
721            # Export JSON
722            utils.export_dict_to_json(
723                data=final_dict,
724                filename=f"{self.output_dir}/measurement_json/{transformed_name}_measurement.json",
725            )
726
727            metadata.at[
728                idx, "measurement_json"
729            ] = f"{self.output_dir}/measurement_json/{transformed_name}_measurement.json"
730
731        # Step 12: Save metadata as a CSV
732        metadata.to_csv(f"{self.output_dir}/metadata.csv", index=False)
733
734        return metadata, outputs

The tailor class acts as a central agent for the GarmentIQ pipeline, orchestrating garment measurement from classification to landmark derivation.

It integrates functionalities from other modules (classification, segmentation, landmark) to provide a smooth end-to-end process for automated garment measurement from images.

Attributes:

input_dir (str): Directory containing input images.
model_dir (str): Directory where models are stored.
output_dir (str): Directory to save processed outputs.
class_dict (dict): Dictionary defining garment classes and their properties.
do_derive (bool): Flag to enable landmark derivation.
do_refine (bool): Flag to enable landmark refinement.
classification_model_path (str): Path to the classification model.
classification_model_class (Type[nn.Module]): Class definition for the classification model.
classification_model_args (Dict): Arguments for the classification model.
segmentation_model_name (str): Name or path for the segmentation model.
segmentation_model_args (Dict): Arguments for the segmentation model.
landmark_detection_model_path (str): Path to the landmark detection model.
landmark_detection_model_class (Type[nn.Module]): Class definition for the landmark detection model.
landmark_detection_model_args (Dict): Arguments for the landmark detection model.
refinement_args (Optional[Dict]): Arguments for landmark refinement.
derivation_dict (Optional[Dict]): Dictionary for landmark derivation rules.

tailor( input_dir: str, model_dir: str, output_dir: str, class_dict: dict, do_derive: bool, do_refine: bool, classification_model_path: str, classification_model_class: Type[torch.nn.modules.module.Module], classification_model_args: Dict, segmentation_model_name: str, segmentation_model_args: Dict, landmark_detection_model_path: str, landmark_detection_model_class: Type[torch.nn.modules.module.Module], landmark_detection_model_args: Dict, refinement_args: Optional[Dict] = None, derivation_dict: Optional[Dict] = None) View Source

 44    def __init__(
 45        self,
 46        input_dir: str,
 47        model_dir: str,
 48        output_dir: str,
 49        class_dict: dict,
 50        do_derive: bool,
 51        do_refine: bool,
 52        classification_model_path: str,
 53        classification_model_class: Type[nn.Module],
 54        classification_model_args: Dict,
 55        segmentation_model_name: str,
 56        segmentation_model_args: Dict,
 57        landmark_detection_model_path: str,
 58        landmark_detection_model_class: Type[nn.Module],
 59        landmark_detection_model_args: Dict,
 60        refinement_args: Optional[Dict] = None,
 61        derivation_dict: Optional[Dict] = None,
 62    ):
 63        """
 64        Initializes the `tailor` agent with paths, model configurations, and processing flags.
 65
 66        Args:
 67            input_dir (str): Path to the directory containing input images.
 68            model_dir (str): Path to the directory where all required models are stored.
 69            output_dir (str): Path to the directory where all processed outputs will be saved.
 70            class_dict (dict): A dictionary defining the garment classes, their predefined points,
 71                                index ranges, and instruction JSON file paths.
 72            do_derive (bool): If True, enables the landmark derivation step.
 73            do_refine (bool): If True, enables the landmark refinement step.
 74            classification_model_path (str): The filename or relative path to the classification model.
 75            classification_model_class (Type[nn.Module]): The Python class of the classification model.
 76            classification_model_args (Dict): A dictionary of arguments to initialize the classification model.
 77            segmentation_model_name (str): The name or path of the pretrained segmentation model.
 78            segmentation_model_args (Dict): A dictionary of arguments for the segmentation model.
 79            landmark_detection_model_path (str): The filename or relative path to the landmark detection model.
 80            landmark_detection_model_class (Type[nn.Module]): The Python class of the landmark detection model.
 81            landmark_detection_model_args (Dict): A dictionary of arguments for the landmark detection model.
 82            refinement_args (Optional[Dict]): Optional arguments for the refinement process,
 83                                              e.g., `window_size`, `ksize`, `sigmaX`. Defaults to None.
 84            derivation_dict (Optional[Dict]): A dictionary defining derivation rules for non-predefined landmarks.
 85                                               Required if `do_derive` is True.
 86
 87        Raises:
 88            ValueError: If `do_derive` is True but `derivation_dict` is None.
 89        """
 90        # Directories
 91        self.input_dir = input_dir
 92        self.model_dir = model_dir
 93        self.output_dir = output_dir
 94
 95        # Classes
 96        self.class_dict = class_dict
 97        self.classes = sorted(list(class_dict.keys()))
 98
 99        # Derivation
100        self.do_derive = do_derive
101        if self.do_derive:
102            if derivation_dict is None:
103                raise ValueError(
104                    "`derivation_dict` must be provided if `do_derive=True`."
105                )
106            self.derivation_dict = derivation_dict
107        else:
108            self.derivation_dict = None
109
110        # Refinement setup
111        self.do_refine = do_refine
112        self.do_refine = do_refine
113        if self.do_refine:
114            if refinement_args is None:
115                self.refinement_args = {}
116            self.refinement_args = refinement_args
117        else:
118            self.refinement_args = None
119
120        # Classification model setup
121        self.classification_model_path = classification_model_path
122        self.classification_model_args = classification_model_args
123        self.classification_model_class = classification_model_class
124        filtered_model_args = {
125            k: v
126            for k, v in self.classification_model_args.items()
127            if k not in ("resize_dim", "normalize_mean", "normalize_std")
128        }
129
130        # Load the model using the filtered arguments
131        self.classification_model = classification.load_model(
132            model_path=f"{self.model_dir}/{self.classification_model_path}",
133            model_class=self.classification_model_class,
134            model_args=filtered_model_args,
135        )
136
137        # Segmentation model setup
138        self.segmentation_model_name = segmentation_model_name
139        self.segmentation_model_args = segmentation_model_args
140        self.segmentation_has_bg_color = "background_color" in segmentation_model_args
141        self.segmentation_model = segmentation.load_model(
142            pretrained_model=self.segmentation_model_name,
143            pretrained_model_args={
144                "trust_remote_code": segmentation_model_args["trust_remote_code"]
145            },
146            high_precision=segmentation_model_args["high_precision"],
147        )
148
149        # Landmark detection model setup
150        self.landmark_detection_model_path = landmark_detection_model_path
151        self.landmark_detection_model_class = landmark_detection_model_class
152        self.landmark_detection_model_args = landmark_detection_model_args
153        self.landmark_detection_model = landmark.detection.load_model(
154            model_path=f"{self.model_dir}/{self.landmark_detection_model_path}",
155            model_class=self.landmark_detection_model_class,
156        )

Initializes the tailor agent with paths, model configurations, and processing flags.

Arguments:

input_dir (str): Path to the directory containing input images.
model_dir (str): Path to the directory where all required models are stored.
output_dir (str): Path to the directory where all processed outputs will be saved.
class_dict (dict): A dictionary defining the garment classes, their predefined points, index ranges, and instruction JSON file paths.
do_derive (bool): If True, enables the landmark derivation step.
do_refine (bool): If True, enables the landmark refinement step.
classification_model_path (str): The filename or relative path to the classification model.
classification_model_class (Type[nn.Module]): The Python class of the classification model.
classification_model_args (Dict): A dictionary of arguments to initialize the classification model.
segmentation_model_name (str): The name or path of the pretrained segmentation model.
segmentation_model_args (Dict): A dictionary of arguments for the segmentation model.
landmark_detection_model_path (str): The filename or relative path to the landmark detection model.
landmark_detection_model_class (Type[nn.Module]): The Python class of the landmark detection model.
landmark_detection_model_args (Dict): A dictionary of arguments for the landmark detection model.
refinement_args (Optional[Dict]): Optional arguments for the refinement process, e.g., window_size, ksize, sigmaX. Defaults to None.
derivation_dict (Optional[Dict]): A dictionary defining derivation rules for non-predefined landmarks. Required if do_derive is True.

Raises:

ValueError: If do_derive is True but derivation_dict is None.

input_dir

model_dir

output_dir

class_dict

classes

do_derive

do_refine

classification_model_path

classification_model_args

classification_model_class

classification_model

segmentation_model_name

segmentation_model_args

segmentation_has_bg_color

segmentation_model

landmark_detection_model_path

landmark_detection_model_class

landmark_detection_model_args

landmark_detection_model

def summary(self): View Source

158    def summary(self):
159        """
160        Prints a summary of the `tailor` agent's configuration, including directory paths,
161        defined classes, processing options (refine, derive), and loaded models.
162        """
163        width = 80
164        sep = "=" * width
165
166        print(sep)
167        print("TAILOR AGENT SUMMARY".center(width))
168        print(sep)
169
170        # Directories
171        print("DIRECTORY PATHS".center(width, "-"))
172        print(f"{'Input directory:':25} {self.input_dir}")
173        print(f"{'Model directory:':25} {self.model_dir}")
174        print(f"{'Output directory:':25} {self.output_dir}")
175        print()
176
177        # Classes
178        print("CLASSES".center(width, "-"))
179        print(f"{'Class Index':<11} | Class Name")
180        print(f"{'-'*11} | {'-'*66}")
181        for i, cls in enumerate(self.classes):
182            print(f"{i:<11} | {cls}")
183        print()
184
185        # Flags
186        print("OPTIONS".center(width, "-"))
187        print(f"{'Do refine?:':25} {self.do_refine}")
188        print(f"{'Do derive?:':25} {self.do_derive}")
189        print()
190
191        # Models
192        print("MODELS".center(width, "-"))
193        print(
194            f"{'Classification Model:':25} {self.classification_model_class.__name__}"
195        )
196        print(f"{'Segmentation Model:':25} {self.segmentation_model_name}")
197        print(f"{'  └─ Change BG color?:':25} {self.segmentation_has_bg_color}")
198        print(
199            f"{'Landmark Detection Model:':25} {self.landmark_detection_model_class.__class__.__name__}"
200        )
201        print(sep)

Prints a summary of the tailor agent's configuration, including directory paths, defined classes, processing options (refine, derive), and loaded models.

def classify(self, image: str, verbose=False): View Source

203    def classify(self, image: str, verbose=False):
204        """
205        Classifies a single garment image using the configured classification model.
206
207        Args:
208            image (str): The filename of the image to classify, located in `self.input_dir`.
209            verbose (bool): If True, prints detailed classification output. Defaults to False.
210
211        Returns:
212            tuple:
213                - label (str): The predicted class label of the garment.
214                - probabilities (List[float]): A list of probabilities for each class.
215        """
216        label, probablities = classification.predict(
217            model=self.classification_model,
218            image_path=f"{self.input_dir}/{image}",
219            classes=self.classes,
220            resize_dim=self.classification_model_args.get("resize_dim"),
221            normalize_mean=self.classification_model_args.get("normalize_mean"),
222            normalize_std=self.classification_model_args.get("normalize_std"),
223            verbose=verbose,
224        )
225        return label, probablities

Classifies a single garment image using the configured classification model.

Arguments:

image (str): The filename of the image to classify, located in self.input_dir.
verbose (bool): If True, prints detailed classification output. Defaults to False.

Returns:

tuple: - label (str): The predicted class label of the garment. - probabilities (List[float]): A list of probabilities for each class.

def segment(self, image: str): View Source

227    def segment(self, image: str):
228        """
229        Segments a single garment image to extract its mask and optionally modifies the background color.
230
231        Args:
232            image (str): The filename of the image to segment, located in `self.input_dir`.
233
234        Returns:
235            tuple:
236                - original_img (np.ndarray): The original image with the mask overlaid.
237                - mask (np.ndarray): The binary segmentation mask.
238                - bg_modified_img (np.ndarray, optional): The image with the background color changed,
239                                                         returned only if `background_color` is specified
240                                                         in `segmentation_model_args`.
241        """
242        original_img, mask = segmentation.extract(
243            model=self.segmentation_model,
244            image_path=f"{self.input_dir}/{image}",
245            resize_dim=self.segmentation_model_args.get("resize_dim"),
246            normalize_mean=self.segmentation_model_args.get("normalize_mean"),
247            normalize_std=self.segmentation_model_args.get("normalize_std"),
248            high_precision=self.segmentation_model_args.get("high_precision"),
249        )
250
251        background_color = self.segmentation_model_args.get("background_color")
252
253        if background_color is None:
254            return original_img, mask
255        else:
256            bg_modified_img = segmentation.change_background_color(
257                image_np=original_img, mask_np=mask, background_color=background_color
258            )
259            return original_img, mask, bg_modified_img

Segments a single garment image to extract its mask and optionally modifies the background color.

Arguments:

image (str): The filename of the image to segment, located in self.input_dir.

Returns:

tuple: - original_img (np.ndarray): The original image with the mask overlaid. - mask (np.ndarray): The binary segmentation mask. - bg_modified_img (np.ndarray, optional): The image with the background color changed, returned only if background_color is specified in segmentation_model_args.

def detect(self, class_name: str, image: Union[str, numpy.ndarray]): View Source

261    def detect(self, class_name: str, image: Union[str, np.ndarray]):
262        """
263        Detects predefined landmarks on a garment image based on its classified class.
264
265        Args:
266            class_name (str): The classified name of the garment.
267            image (Union[str, np.ndarray]): The path to the image file or a NumPy array of the image.
268
269        Returns:
270            tuple:
271                - coords (np.array): Detected landmark coordinates.
272                - maxval (np.array): Confidence scores for detected landmarks.
273                - detection_dict (dict): A dictionary containing detailed landmark detection data.
274        """
275        if isinstance(image, str):
276            image = f"{self.input_dir}/{image}"
277
278        coords, maxval, detection_dict = landmark.detect(
279            class_name=class_name,
280            class_dict=self.class_dict,
281            image_path=image,
282            model=self.landmark_detection_model,
283            scale_std=self.landmark_detection_model_args.get("scale_std"),
284            resize_dim=self.landmark_detection_model_args.get("resize_dim"),
285            normalize_mean=self.landmark_detection_model_args.get("normalize_mean"),
286            normalize_std=self.landmark_detection_model_args.get("normalize_std"),
287        )
288        return coords, maxval, detection_dict

Detects predefined landmarks on a garment image based on its classified class.

Arguments:

class_name (str): The classified name of the garment.
image (Union[str, np.ndarray]): The path to the image file or a NumPy array of the image.

Returns:

tuple: - coords (np.array): Detected landmark coordinates. - maxval (np.array): Confidence scores for detected landmarks. - detection_dict (dict): A dictionary containing detailed landmark detection data.

def derive( self, class_name: str, detection_dict: dict, derivation_dict: dict, landmark_coords: <built-in function array>, np_mask: <built-in function array>): View Source

290    def derive(
291        self,
292        class_name: str,
293        detection_dict: dict,
294        derivation_dict: dict,
295        landmark_coords: np.array,
296        np_mask: np.array,
297    ):
298        """
299        Derives non-predefined landmark coordinates based on predefined landmarks and a mask.
300
301        Args:
302            class_name (str): The name of the garment class.
303            detection_dict (dict): The dictionary containing detected landmarks.
304            derivation_dict (dict): The dictionary defining derivation rules.
305            landmark_coords (np.array): NumPy array of initial landmark coordinates.
306            np_mask (np.array): NumPy array of the segmentation mask.
307
308        Returns:
309            tuple:
310                - derived_coords (dict): A dictionary of the newly derived landmark coordinates.
311                - updated_detection_dict (dict): The detection dictionary updated with derived landmarks.
312        """
313        derived_coords, updated_detection_dict = landmark.derive(
314            class_name=class_name,
315            detection_dict=detection_dict,
316            derivation_dict=derivation_dict,
317            landmark_coords=landmark_coords,
318            np_mask=np_mask,
319        )
320        return derived_coords, updated_detection_dict

Derives non-predefined landmark coordinates based on predefined landmarks and a mask.

Arguments:

class_name (str): The name of the garment class.
detection_dict (dict): The dictionary containing detected landmarks.
derivation_dict (dict): The dictionary defining derivation rules.
landmark_coords (np.array): NumPy array of initial landmark coordinates.
np_mask (np.array): NumPy array of the segmentation mask.

Returns:

tuple: - derived_coords (dict): A dictionary of the newly derived landmark coordinates. - updated_detection_dict (dict): The detection dictionary updated with derived landmarks.

def refine( self, class_name: str, detection_np: <built-in function array>, detection_conf: <built-in function array>, detection_dict: dict, mask: <built-in function array>, window_size: int = 5, ksize: tuple = (11, 11), sigmaX: float = 0.0): View Source

322    def refine(
323        self,
324        class_name: str,
325        detection_np: np.array,
326        detection_conf: np.array,
327        detection_dict: dict,
328        mask: np.array,
329        window_size: int = 5,
330        ksize: tuple = (11, 11),
331        sigmaX: float = 0.0,
332    ):
333        """
334        Refines detected landmark coordinates using a blurred segmentation mask.
335
336        Args:
337            class_name (str): The name of the garment class.
338            detection_np (np.array): NumPy array of initial landmark predictions.
339            detection_conf (np.array): NumPy array of confidence scores for each predicted landmark.
340            detection_dict (dict): Dictionary containing landmark data for each class.
341            mask (np.array): Grayscale mask image used to guide refinement.
342            window_size (int, optional): Size of the window used in the refinement algorithm. Defaults to 5.
343            ksize (tuple, optional): Kernel size for Gaussian blur. Must be odd integers. Defaults to (11, 11).
344            sigmaX (float, optional): Gaussian kernel standard deviation in the X direction. Defaults to 0.0.
345
346        Returns:
347            tuple:
348                - refined_detection_np (np.array): Array of the same shape as `detection_np` with refined coordinates.
349                - detection_dict (dict): Updated detection dictionary with refined landmark coordinates.
350        """
351        if self.refinement_args:
352            if self.refinement_args.get("window_size") is not None:
353                window_size = self.refinement_args["window_size"]
354            if self.refinement_args.get("ksize") is not None:
355                ksize = self.refinement_args["ksize"]
356            if self.refinement_args.get("sigmaX") is not None:
357                sigmaX = self.refinement_args["sigmaX"]
358
359        refined_detection_np, refined_detection_dict = landmark.refine(
360            class_name=class_name,
361            detection_np=detection_np,
362            detection_conf=detection_conf,
363            detection_dict=detection_dict,
364            mask=mask,
365            window_size=window_size,
366            ksize=ksize,
367            sigmaX=sigmaX,
368        )
369
370        return refined_detection_np, refined_detection_dict

Refines detected landmark coordinates using a blurred segmentation mask.

Arguments:

class_name (str): The name of the garment class.
detection_np (np.array): NumPy array of initial landmark predictions.
detection_conf (np.array): NumPy array of confidence scores for each predicted landmark.
detection_dict (dict): Dictionary containing landmark data for each class.
mask (np.array): Grayscale mask image used to guide refinement.
window_size (int, optional): Size of the window used in the refinement algorithm. Defaults to 5.
ksize (tuple, optional): Kernel size for Gaussian blur. Must be odd integers. Defaults to (11, 11).
sigmaX (float, optional): Gaussian kernel standard deviation in the X direction. Defaults to 0.0.

Returns:

tuple: - refined_detection_np (np.array): Array of the same shape as detection_np with refined coordinates. - detection_dict (dict): Updated detection dictionary with refined landmark coordinates.

def measure( self, save_segmentation_image: bool = False, save_measurement_image: bool = False): View Source

372    def measure(
373        self,
374        save_segmentation_image: bool = False,
375        save_measurement_image: bool = False,
376    ):
377        """
378        Executes the full garment measurement pipeline for all images in the input directory.
379    
380        This method processes each image through a multi-stage pipeline that includes garment classification, 
381        segmentation, landmark detection, optional refinement, and measurement derivation. During classification, 
382        the system identifies the type of garment (e.g., shirt, dress, pants). Segmentation follows, producing 
383        binary or instance masks that separate the garment from the background. Landmark detection is then 
384        performed to locate anatomical or garment-specific keypoints such as shoulders or waist positions. If 
385        enabled, an optional refinement step applies post-processing or model-based corrections to improve the 
386        accuracy of detected keypoints. Finally, the system calculates key garment dimensions - such as chest width, 
387        waist width, and full length - based on the detected landmarks. In addition to this processing pipeline, 
388        the method also manages data and visual output exports. For each input image, a cleaned JSON file is 
389        generated containing the predicted garment class, landmark coordinates, and the resulting measurements. 
390        Optionally, visual outputs such as segmentation masks and images annotated with landmarks and measurements 
391        can be saved to assist in inspection or debugging.
392    
393        Args:
394            save_segmentation_image (bool): If True, saves segmentation masks and background-modified images.
395                                            Defaults to False.
396            save_measurement_image (bool): If True, saves images overlaid with detected landmarks and measurements.
397                                           Defaults to False.
398    
399        Returns:
400            tuple:
401                - metadata (pd.DataFrame): A DataFrame containing metadata for each processed image, such as:
402                    - Original image path
403                    - Paths to any saved segmentation or annotated images
404                    - Class and measurement results
405                - outputs (dict): A dictionary mapping image filenames to their detailed processing results, including:
406                    - Predicted class
407                    - Detected landmarks with coordinates and confidence scores
408                    - Calculated measurements
409                    - File paths to any saved images (if applicable)
410    
411        Example of exported JSON:
412            ```
413            {
414                "cloth_3.jpg": {
415                    "class": "vest dress",
416                    "landmarks": {
417                        "10": {
418                            "conf": 0.7269417643547058,
419                            "x": 611.0,
420                            "y": 861.0
421                        },
422                        "16": {
423                            "conf": 0.6769524812698364,
424                            "x": 1226.0,
425                            "y": 838.0
426                        },
427                        "17": {
428                            "conf": 0.7472652196884155,
429                            "x": 1213.0,
430                            "y": 726.0
431                        },
432                        "18": {
433                            "conf": 0.7360446453094482,
434                            "x": 1238.0,
435                            "y": 613.0
436                        },
437                        "2": {
438                            "conf": 0.9256571531295776,
439                            "x": 703.0,
440                            "y": 264.0
441                        },
442                        "20": {
443                            "x": 700.936,
444                            "y": 2070.0
445                        },
446                        "8": {
447                            "conf": 0.7129100561141968,
448                            "x": 563.0,
449                            "y": 613.0
450                        },
451                        "9": {
452                            "conf": 0.8203497529029846,
453                            "x": 598.0,
454                            "y": 726.0
455                        }
456                    },
457                    "measurements": {
458                        "chest": {
459                            "distance": 675.0,
460                            "landmarks": {
461                                "end": "18",
462                                "start": "8"
463                            }
464                        },
465                        "full length": {
466                            "distance": 1806.0011794281863,
467                            "landmarks": {
468                                "end": "20",
469                                "start": "2"
470                            }
471                        },
472                        "hips": {
473                            "distance": 615.4299310238331,
474                            "landmarks": {
475                                "end": "16",
476                                "start": "10"
477                            }
478                        },
479                        "waist": {
480                            "distance": 615.0,
481                            "landmarks": {
482                                "end": "17",
483                                "start": "9"
484                            }
485                        }
486                    }
487                }
488            }
489            ```
490        """
491        # Some helper variables
492        use_bg_color = self.segmentation_model_args.get("background_color") is not None
493        outputs = {}
494
495        # Step 1: Create the output directory
496        Path(self.output_dir).mkdir(parents=True, exist_ok=True)
497        Path(f"{self.output_dir}/measurement_json").mkdir(parents=True, exist_ok=True)
498
499        if save_segmentation_image and (
500            use_bg_color or self.do_derive or self.do_refine
501        ):
502            Path(f"{self.output_dir}/mask_image").mkdir(parents=True, exist_ok=True)
503            if use_bg_color:
504                Path(f"{self.output_dir}/bg_modified_image").mkdir(
505                    parents=True, exist_ok=True
506                )
507
508        if save_measurement_image:
509            Path(f"{self.output_dir}/measurement_image").mkdir(
510                parents=True, exist_ok=True
511            )
512
513        # Step 2: Collect image filenames from input_dir
514        image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.tiff"]
515        input_path = Path(self.input_dir)
516
517        image_files = []
518        for ext in image_extensions:
519            image_files.extend(input_path.glob(ext))
520
521        # Step 3: Determine column structure
522        columns = [
523            "filename",
524            "class",
525            "mask_image" if use_bg_color or self.do_derive or self.do_refine else None,
526            "bg_modified_image" if use_bg_color else None,
527            "measurement_image",
528            "measurement_json",
529        ]
530        columns = [col for col in columns if col is not None]
531
532        metadata = pd.DataFrame(columns=columns)
533        metadata["filename"] = [img.name for img in image_files]
534
535        # Step 4: Print start message and information
536        print(f"Start measuring {len(metadata['filename'])} garment images ...")
537
538        if self.do_derive and self.do_refine:
539            message = (
540                "There are 5 measurement steps: classification, segmentation, "
541                "landmark detection, landmark refinement, and landmark derivation."
542            )
543        elif self.do_derive:
544            message = (
545                "There are 4 measurement steps: classification, segmentation, "
546                "landmark detection, and landmark derivation."
547            )
548        elif self.do_refine:
549            message = (
550                "There are 4 measurement steps: classification, segmentation, "
551                "landmark detection, and landmark refinement."
552            )
553        elif use_bg_color:
554            message = (
555                "There are 3 measurement steps: classification, segmentation, "
556                "and landmark detection."
557            )
558        else:
559            message = (
560                "There are 2 measurement steps: classification and landmark detection."
561            )
562
563        print(textwrap.fill(message, width=80))
564
565        # Step 5: Classification
566        for idx, image in tqdm(
567            enumerate(metadata["filename"]), total=len(metadata), desc="Classification"
568        ):
569            label, _ = self.classify(image=image, verbose=False)
570            metadata.at[idx, "class"] = label
571            outputs[image] = {}
572
573        # Step 6: Segmentation
574        if use_bg_color or (self.do_derive or self.do_refine):
575            for idx, image in tqdm(
576                enumerate(metadata["filename"]),
577                total=len(metadata),
578                desc="Segmentation",
579            ):
580                if use_bg_color:
581                    original_img, mask, bg_modified_image = self.segment(image=image)
582                    outputs[image] = {
583                        "mask": mask,
584                        "bg_modified_image": bg_modified_image,
585                    }
586                else:
587                    original_img, mask = self.segment(image=image)
588                    outputs[image] = {
589                        "mask": mask,
590                    }
591
592        # Step 7: Landmark detection
593        for idx, image in tqdm(
594            enumerate(metadata["filename"]),
595            total=len(metadata),
596            desc="Landmark detection",
597        ):
598            label = metadata.loc[metadata["filename"] == image, "class"].values[0]
599            if use_bg_color:
600                coords, maxvals, detection_dict = self.detect(
601                    class_name=label, image=outputs[image]["bg_modified_image"]
602                )
603                outputs[image]["detection_dict"] = detection_dict
604                if self.do_derive or self.do_refine:
605                    outputs[image]["coords"] = coords
606                    outputs[image]["maxvals"] = maxvals
607            else:
608                coords, maxvals, detection_dict = self.detect(
609                    class_name=label, image=image
610                )
611                outputs[image]["detection_dict"] = detection_dict
612                if self.do_derive or self.do_refine:
613                    outputs[image]["coords"] = coords
614                    outputs[image]["maxvals"] = maxvals
615
616        # Step 8: Landmark refinement
617        if self.do_refine:
618            for idx, image in tqdm(
619                enumerate(metadata["filename"]),
620                total=len(metadata),
621                desc="Landmark refinement",
622            ):
623                label = metadata.loc[metadata["filename"] == image, "class"].values[0]
624                updated_coords, updated_detection_dict = self.refine(
625                    class_name=label,
626                    detection_np=outputs[image]["coords"],
627                    detection_conf=outputs[image]["maxvals"],
628                    detection_dict=outputs[image]["detection_dict"],
629                    mask=outputs[image]["mask"],
630                )
631                outputs[image]["coords"] = updated_coords
632                outputs[image]["detection_dict"] = updated_detection_dict
633
634        # Step 9: Landmark derivation
635        if self.do_derive:
636            for idx, image in tqdm(
637                enumerate(metadata["filename"]),
638                total=len(metadata),
639                desc="Landmark derivation",
640            ):
641                label = metadata.loc[metadata["filename"] == image, "class"].values[0]
642                derived_coords, updated_detection_dict = self.derive(
643                    class_name=label,
644                    detection_dict=outputs[image]["detection_dict"],
645                    derivation_dict=self.derivation_dict,
646                    landmark_coords=outputs[image]["coords"],
647                    np_mask=outputs[image]["mask"],
648                )
649                outputs[image]["detection_dict"] = updated_detection_dict
650
651        # Step 10: Save segmentation image
652        if save_segmentation_image and (
653            use_bg_color or self.do_derive or self.do_refine
654        ):
655            for idx, image in tqdm(
656                enumerate(metadata["filename"]),
657                total=len(metadata),
658                desc="Save segmentation image",
659            ):
660                transformed_name = os.path.splitext(image)[0]
661                Image.fromarray(outputs[image]["mask"]).save(
662                    f"{self.output_dir}/mask_image/{transformed_name}_mask.png"
663                )
664                metadata.at[
665                    idx, "mask_image"
666                ] = f"{self.output_dir}/mask_image/{transformed_name}_mask.png"
667                if use_bg_color:
668                    Image.fromarray(outputs[image]["bg_modified_image"]).save(
669                        f"{self.output_dir}/bg_modified_image/{transformed_name}_bg_modified.png"
670                    )
671                    metadata.at[
672                        idx, "bg_modified_image"
673                    ] = f"{self.output_dir}/bg_modified_image/{transformed_name}_bg_modified.png"
674
675        # Step 10: Save measurement image
676        if save_measurement_image:
677            for idx, image in tqdm(
678                enumerate(metadata["filename"]),
679                total=len(metadata),
680                desc="Save measurement image",
681            ):
682                label = metadata.loc[metadata["filename"] == image, "class"].values[0]
683                transformed_name = os.path.splitext(image)[0]
684
685                image_to_save = Image.open(f"{self.input_dir}/{image}").convert("RGB")
686                draw = ImageDraw.Draw(image_to_save)
687                font = ImageFont.load_default()
688                landmarks = outputs[image]["detection_dict"][label]["landmarks"]
689
690                for lm_id, lm_data in landmarks.items():
691                    x, y = lm_data["x"], lm_data["y"]
692                    radius = 5
693                    draw.ellipse(
694                        (x - radius, y - radius, x + radius, y + radius), fill="green"
695                    )
696                    draw.text((x + 8, y - 8), lm_id, fill="green", font=font)
697
698                image_to_save.save(
699                    f"{self.output_dir}/measurement_image/{transformed_name}_measurement.png"
700                )
701                metadata.at[
702                    idx, "measurement_image"
703                ] = f"{self.output_dir}/measurement_image/{transformed_name}_measurement.png"
704
705        # Step 11: Save measurement json
706        for idx, image in tqdm(
707            enumerate(metadata["filename"]),
708            total=len(metadata),
709            desc="Save measurement json",
710        ):
711            label = metadata.loc[metadata["filename"] == image, "class"].values[0]
712            transformed_name = os.path.splitext(image)[0]
713
714            # Clean the detection dictionary
715            final_dict = utils.clean_detection_dict(
716                class_name=label,
717                image_name=image,
718                detection_dict=outputs[image]["detection_dict"],
719            )
720
721            # Export JSON
722            utils.export_dict_to_json(
723                data=final_dict,
724                filename=f"{self.output_dir}/measurement_json/{transformed_name}_measurement.json",
725            )
726
727            metadata.at[
728                idx, "measurement_json"
729            ] = f"{self.output_dir}/measurement_json/{transformed_name}_measurement.json"
730
731        # Step 12: Save metadata as a CSV
732        metadata.to_csv(f"{self.output_dir}/metadata.csv", index=False)
733
734        return metadata, outputs

Executes the full garment measurement pipeline for all images in the input directory.

This method processes each image through a multi-stage pipeline that includes garment classification, segmentation, landmark detection, optional refinement, and measurement derivation. During classification, the system identifies the type of garment (e.g., shirt, dress, pants). Segmentation follows, producing binary or instance masks that separate the garment from the background. Landmark detection is then performed to locate anatomical or garment-specific keypoints such as shoulders or waist positions. If enabled, an optional refinement step applies post-processing or model-based corrections to improve the accuracy of detected keypoints. Finally, the system calculates key garment dimensions - such as chest width, waist width, and full length - based on the detected landmarks. In addition to this processing pipeline, the method also manages data and visual output exports. For each input image, a cleaned JSON file is generated containing the predicted garment class, landmark coordinates, and the resulting measurements. Optionally, visual outputs such as segmentation masks and images annotated with landmarks and measurements can be saved to assist in inspection or debugging.

Arguments:

save_segmentation_image (bool): If True, saves segmentation masks and background-modified images. Defaults to False.
save_measurement_image (bool): If True, saves images overlaid with detected landmarks and measurements. Defaults to False.

Returns:

tuple: - metadata (pd.DataFrame): A DataFrame containing metadata for each processed image, such as: - Original image path - Paths to any saved segmentation or annotated images - Class and measurement results - outputs (dict): A dictionary mapping image filenames to their detailed processing results, including: - Predicted class - Detected landmarks with coordinates and confidence scores - Calculated measurements - File paths to any saved images (if applicable)

Example of exported JSON:

{
    "cloth_3.jpg": {
        "class": "vest dress",
        "landmarks": {
            "10": {
                "conf": 0.7269417643547058,
                "x": 611.0,
                "y": 861.0
            },
            "16": {
                "conf": 0.6769524812698364,
                "x": 1226.0,
                "y": 838.0
            },
            "17": {
                "conf": 0.7472652196884155,
                "x": 1213.0,
                "y": 726.0
            },
            "18": {
                "conf": 0.7360446453094482,
                "x": 1238.0,
                "y": 613.0
            },
            "2": {
                "conf": 0.9256571531295776,
                "x": 703.0,
                "y": 264.0
            },
            "20": {
                "x": 700.936,
                "y": 2070.0
            },
            "8": {
                "conf": 0.7129100561141968,
                "x": 563.0,
                "y": 613.0
            },
            "9": {
                "conf": 0.8203497529029846,
                "x": 598.0,
                "y": 726.0
            }
        },
        "measurements": {
            "chest": {
                "distance": 675.0,
                "landmarks": {
                    "end": "18",
                    "start": "8"
                }
            },
            "full length": {
                "distance": 1806.0011794281863,
                "landmarks": {
                    "end": "20",
                    "start": "2"
                }
            },
            "hips": {
                "distance": 615.4299310238331,
                "landmarks": {
                    "end": "16",
                    "start": "10"
                }
            },
            "waist": {
                "distance": 615.0,
                "landmarks": {
                    "end": "17",
                    "start": "9"
                }
            }
        }
    }
}