garmentiq.classification.model_definition

  1import torch.nn as nn
  2import timm
  3
  4
  5class CNN3(nn.Module):
  6    """
  7    CNN3 is a convolutional neural network designed for image classification with moderate depth.
  8
  9    It consists of three convolutional blocks followed by fully connected layers.
 10
 11    Args:
 12        num_classes (int): Number of output classes for classification.
 13
 14    Attributes:
 15        features (nn.Sequential): Convolutional feature extractor.
 16        classifier (nn.Sequential): Fully connected classifier.
 17    """
 18
 19    def __init__(self, num_classes):
 20        """
 21        Initializes the CNN3 model architecture.
 22
 23        Args:
 24            num_classes (int): Number of target classes.
 25        """
 26        super(CNN3, self).__init__()
 27        self.features = nn.Sequential(
 28            # Block 1
 29            nn.Conv2d(3, 64, kernel_size=3, padding=1),
 30            nn.BatchNorm2d(64),
 31            nn.ReLU(inplace=True),
 32            nn.Conv2d(64, 64, kernel_size=3, padding=1),
 33            nn.BatchNorm2d(64),
 34            nn.ReLU(inplace=True),
 35            nn.MaxPool2d(2, 2),
 36            nn.Dropout(0.25),
 37            # Block 2
 38            nn.Conv2d(64, 128, kernel_size=3, padding=1),
 39            nn.BatchNorm2d(128),
 40            nn.ReLU(inplace=True),
 41            nn.Conv2d(128, 128, kernel_size=3, padding=1),
 42            nn.BatchNorm2d(128),
 43            nn.ReLU(inplace=True),
 44            nn.MaxPool2d(2, 2),
 45            nn.Dropout(0.25),
 46            # Block 3
 47            nn.Conv2d(128, 256, kernel_size=3, padding=1),
 48            nn.BatchNorm2d(256),
 49            nn.ReLU(inplace=True),
 50            nn.Conv2d(256, 256, kernel_size=3, padding=1),
 51            nn.BatchNorm2d(256),
 52            nn.ReLU(inplace=True),
 53            nn.AdaptiveAvgPool2d((4, 6)),
 54        )
 55
 56        self.classifier = nn.Sequential(
 57            nn.Linear(256 * 4 * 6, 512),
 58            nn.BatchNorm1d(512),
 59            nn.ReLU(inplace=True),
 60            nn.Dropout(0.5),
 61            nn.Linear(512, 256),
 62            nn.BatchNorm1d(256),
 63            nn.ReLU(inplace=True),
 64            nn.Dropout(0.3),
 65            nn.Linear(256, num_classes),
 66        )
 67
 68    def forward(self, x):
 69        """
 70        Defines the forward pass of the CNN3 model.
 71
 72        Args:
 73            x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W).
 74
 75        Returns:
 76            torch.Tensor: Output logits tensor of shape (batch_size, num_classes).
 77        """
 78        x = self.features(x)
 79        x = x.view(x.size(0), -1)
 80        x = self.classifier(x)
 81        return x
 82
 83
 84class CNN4(nn.Module):
 85    """
 86    CNN4 is a deeper convolutional neural network with four convolutional blocks.
 87
 88    This architecture is designed for improved feature extraction by adding an extra conv block
 89    and adaptive pooling before classification layers.
 90
 91    Args:
 92        num_classes (int): Number of output classes for classification.
 93
 94    Attributes:
 95        features (nn.Sequential): Convolutional feature extractor with four blocks.
 96        classifier (nn.Sequential): Fully connected classifier.
 97    """
 98
 99    def __init__(self, num_classes):
100        """
101        Initializes the CNN4 model architecture.
102
103        Args:
104            num_classes (int): Number of target classes.
105        """
106        super(CNN4, self).__init__()
107        self.features = nn.Sequential(
108            # Block 1
109            nn.Conv2d(3, 64, kernel_size=3, padding=1),
110            nn.BatchNorm2d(64),
111            nn.ReLU(inplace=True),
112            nn.Conv2d(64, 64, kernel_size=3, padding=1),
113            nn.BatchNorm2d(64),
114            nn.ReLU(inplace=True),
115            nn.MaxPool2d(2, 2),
116            nn.Dropout(0.25),
117            # Block 2
118            nn.Conv2d(64, 128, kernel_size=3, padding=1),
119            nn.BatchNorm2d(128),
120            nn.ReLU(inplace=True),
121            nn.Conv2d(128, 128, kernel_size=3, padding=1),
122            nn.BatchNorm2d(128),
123            nn.ReLU(inplace=True),
124            nn.MaxPool2d(2, 2),
125            nn.Dropout(0.25),
126            # Block 3
127            nn.Conv2d(128, 256, kernel_size=3, padding=1),
128            nn.BatchNorm2d(256),
129            nn.ReLU(inplace=True),
130            nn.Conv2d(256, 256, kernel_size=3, padding=1),
131            nn.BatchNorm2d(256),
132            nn.ReLU(inplace=True),
133            nn.MaxPool2d(2, 2),
134            nn.Dropout(0.25),
135            # Block 4 (Additional block for more depth)
136            nn.Conv2d(256, 512, kernel_size=3, padding=1),
137            nn.BatchNorm2d(512),
138            nn.ReLU(inplace=True),
139            nn.Conv2d(512, 512, kernel_size=3, padding=1),
140            nn.BatchNorm2d(512),
141            nn.ReLU(inplace=True),
142            # Replace MaxPool with adaptive pooling to fix feature map size
143            nn.AdaptiveAvgPool2d((4, 6)),
144        )
145
146        self.classifier = nn.Sequential(
147            nn.Linear(512 * 4 * 6, 1024),
148            nn.BatchNorm1d(1024),
149            nn.ReLU(inplace=True),
150            nn.Dropout(0.5),
151            nn.Linear(1024, 512),
152            nn.BatchNorm1d(512),
153            nn.ReLU(inplace=True),
154            nn.Dropout(0.3),
155            nn.Linear(512, num_classes),
156        )
157
158    def forward(self, x):
159        """
160        Defines the forward pass of the CNN4 model.
161
162        Args:
163            x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W).
164
165        Returns:
166            torch.Tensor: Output logits tensor of shape (batch_size, num_classes).
167        """
168        x = self.features(x)
169        x = x.view(x.size(0), -1)
170        x = self.classifier(x)
171        return x
172
173
174class tinyViT(nn.Module):
175    """
176    tinyViT is a vision transformer model based on the DeiT tiny architecture, pretrained on ImageNet.
177
178    It replaces the original classification head with a linear layer matching the number of classes.
179
180    Args:
181        num_classes (int): Number of output classes.
182        img_size (int): Input image size (assumed square).
183        patch_size (int): Size of the image patches used by the transformer.
184
185    Attributes:
186        backbone (nn.Module): The underlying vision transformer model with modified head.
187    """
188
189    def __init__(self, num_classes, img_size, patch_size):
190        """
191        Initializes the tinyViT model with a pretrained DeiT backbone.
192
193        Args:
194            num_classes (int): Number of target classes.
195            img_size (int): Input image size (height and width).
196            patch_size (int): Patch size for the transformer.
197        """
198        super(tinyViT, self).__init__()
199        self.backbone = timm.create_model(
200            "deit_tiny_patch16_224",
201            pretrained=True,
202            img_size=img_size,
203            patch_size=patch_size,
204        )
205        in_features = self.backbone.head.in_features
206        self.backbone.head = nn.Linear(in_features, num_classes)
207
208    def forward(self, x):
209        """
210        Defines the forward pass of the tinyViT model.
211
212        Args:
213            x (torch.Tensor): Input tensor of shape (batch_size, 3, img_size, img_size).
214
215        Returns:
216            torch.Tensor: Output logits tensor of shape (batch_size, num_classes).
217        """
218        return self.backbone(x)
class CNN3(torch.nn.modules.module.Module):
 6class CNN3(nn.Module):
 7    """
 8    CNN3 is a convolutional neural network designed for image classification with moderate depth.
 9
10    It consists of three convolutional blocks followed by fully connected layers.
11
12    Args:
13        num_classes (int): Number of output classes for classification.
14
15    Attributes:
16        features (nn.Sequential): Convolutional feature extractor.
17        classifier (nn.Sequential): Fully connected classifier.
18    """
19
20    def __init__(self, num_classes):
21        """
22        Initializes the CNN3 model architecture.
23
24        Args:
25            num_classes (int): Number of target classes.
26        """
27        super(CNN3, self).__init__()
28        self.features = nn.Sequential(
29            # Block 1
30            nn.Conv2d(3, 64, kernel_size=3, padding=1),
31            nn.BatchNorm2d(64),
32            nn.ReLU(inplace=True),
33            nn.Conv2d(64, 64, kernel_size=3, padding=1),
34            nn.BatchNorm2d(64),
35            nn.ReLU(inplace=True),
36            nn.MaxPool2d(2, 2),
37            nn.Dropout(0.25),
38            # Block 2
39            nn.Conv2d(64, 128, kernel_size=3, padding=1),
40            nn.BatchNorm2d(128),
41            nn.ReLU(inplace=True),
42            nn.Conv2d(128, 128, kernel_size=3, padding=1),
43            nn.BatchNorm2d(128),
44            nn.ReLU(inplace=True),
45            nn.MaxPool2d(2, 2),
46            nn.Dropout(0.25),
47            # Block 3
48            nn.Conv2d(128, 256, kernel_size=3, padding=1),
49            nn.BatchNorm2d(256),
50            nn.ReLU(inplace=True),
51            nn.Conv2d(256, 256, kernel_size=3, padding=1),
52            nn.BatchNorm2d(256),
53            nn.ReLU(inplace=True),
54            nn.AdaptiveAvgPool2d((4, 6)),
55        )
56
57        self.classifier = nn.Sequential(
58            nn.Linear(256 * 4 * 6, 512),
59            nn.BatchNorm1d(512),
60            nn.ReLU(inplace=True),
61            nn.Dropout(0.5),
62            nn.Linear(512, 256),
63            nn.BatchNorm1d(256),
64            nn.ReLU(inplace=True),
65            nn.Dropout(0.3),
66            nn.Linear(256, num_classes),
67        )
68
69    def forward(self, x):
70        """
71        Defines the forward pass of the CNN3 model.
72
73        Args:
74            x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W).
75
76        Returns:
77            torch.Tensor: Output logits tensor of shape (batch_size, num_classes).
78        """
79        x = self.features(x)
80        x = x.view(x.size(0), -1)
81        x = self.classifier(x)
82        return x

CNN3 is a convolutional neural network designed for image classification with moderate depth.

It consists of three convolutional blocks followed by fully connected layers.

Arguments:
  • num_classes (int): Number of output classes for classification.
Attributes:
  • features (nn.Sequential): Convolutional feature extractor.
  • classifier (nn.Sequential): Fully connected classifier.
CNN3(num_classes)
20    def __init__(self, num_classes):
21        """
22        Initializes the CNN3 model architecture.
23
24        Args:
25            num_classes (int): Number of target classes.
26        """
27        super(CNN3, self).__init__()
28        self.features = nn.Sequential(
29            # Block 1
30            nn.Conv2d(3, 64, kernel_size=3, padding=1),
31            nn.BatchNorm2d(64),
32            nn.ReLU(inplace=True),
33            nn.Conv2d(64, 64, kernel_size=3, padding=1),
34            nn.BatchNorm2d(64),
35            nn.ReLU(inplace=True),
36            nn.MaxPool2d(2, 2),
37            nn.Dropout(0.25),
38            # Block 2
39            nn.Conv2d(64, 128, kernel_size=3, padding=1),
40            nn.BatchNorm2d(128),
41            nn.ReLU(inplace=True),
42            nn.Conv2d(128, 128, kernel_size=3, padding=1),
43            nn.BatchNorm2d(128),
44            nn.ReLU(inplace=True),
45            nn.MaxPool2d(2, 2),
46            nn.Dropout(0.25),
47            # Block 3
48            nn.Conv2d(128, 256, kernel_size=3, padding=1),
49            nn.BatchNorm2d(256),
50            nn.ReLU(inplace=True),
51            nn.Conv2d(256, 256, kernel_size=3, padding=1),
52            nn.BatchNorm2d(256),
53            nn.ReLU(inplace=True),
54            nn.AdaptiveAvgPool2d((4, 6)),
55        )
56
57        self.classifier = nn.Sequential(
58            nn.Linear(256 * 4 * 6, 512),
59            nn.BatchNorm1d(512),
60            nn.ReLU(inplace=True),
61            nn.Dropout(0.5),
62            nn.Linear(512, 256),
63            nn.BatchNorm1d(256),
64            nn.ReLU(inplace=True),
65            nn.Dropout(0.3),
66            nn.Linear(256, num_classes),
67        )

Initializes the CNN3 model architecture.

Arguments:
  • num_classes (int): Number of target classes.
features
classifier
def forward(self, x):
69    def forward(self, x):
70        """
71        Defines the forward pass of the CNN3 model.
72
73        Args:
74            x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W).
75
76        Returns:
77            torch.Tensor: Output logits tensor of shape (batch_size, num_classes).
78        """
79        x = self.features(x)
80        x = x.view(x.size(0), -1)
81        x = self.classifier(x)
82        return x

Defines the forward pass of the CNN3 model.

Arguments:
  • x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W).
Returns:

torch.Tensor: Output logits tensor of shape (batch_size, num_classes).

class CNN4(torch.nn.modules.module.Module):
 85class CNN4(nn.Module):
 86    """
 87    CNN4 is a deeper convolutional neural network with four convolutional blocks.
 88
 89    This architecture is designed for improved feature extraction by adding an extra conv block
 90    and adaptive pooling before classification layers.
 91
 92    Args:
 93        num_classes (int): Number of output classes for classification.
 94
 95    Attributes:
 96        features (nn.Sequential): Convolutional feature extractor with four blocks.
 97        classifier (nn.Sequential): Fully connected classifier.
 98    """
 99
100    def __init__(self, num_classes):
101        """
102        Initializes the CNN4 model architecture.
103
104        Args:
105            num_classes (int): Number of target classes.
106        """
107        super(CNN4, self).__init__()
108        self.features = nn.Sequential(
109            # Block 1
110            nn.Conv2d(3, 64, kernel_size=3, padding=1),
111            nn.BatchNorm2d(64),
112            nn.ReLU(inplace=True),
113            nn.Conv2d(64, 64, kernel_size=3, padding=1),
114            nn.BatchNorm2d(64),
115            nn.ReLU(inplace=True),
116            nn.MaxPool2d(2, 2),
117            nn.Dropout(0.25),
118            # Block 2
119            nn.Conv2d(64, 128, kernel_size=3, padding=1),
120            nn.BatchNorm2d(128),
121            nn.ReLU(inplace=True),
122            nn.Conv2d(128, 128, kernel_size=3, padding=1),
123            nn.BatchNorm2d(128),
124            nn.ReLU(inplace=True),
125            nn.MaxPool2d(2, 2),
126            nn.Dropout(0.25),
127            # Block 3
128            nn.Conv2d(128, 256, kernel_size=3, padding=1),
129            nn.BatchNorm2d(256),
130            nn.ReLU(inplace=True),
131            nn.Conv2d(256, 256, kernel_size=3, padding=1),
132            nn.BatchNorm2d(256),
133            nn.ReLU(inplace=True),
134            nn.MaxPool2d(2, 2),
135            nn.Dropout(0.25),
136            # Block 4 (Additional block for more depth)
137            nn.Conv2d(256, 512, kernel_size=3, padding=1),
138            nn.BatchNorm2d(512),
139            nn.ReLU(inplace=True),
140            nn.Conv2d(512, 512, kernel_size=3, padding=1),
141            nn.BatchNorm2d(512),
142            nn.ReLU(inplace=True),
143            # Replace MaxPool with adaptive pooling to fix feature map size
144            nn.AdaptiveAvgPool2d((4, 6)),
145        )
146
147        self.classifier = nn.Sequential(
148            nn.Linear(512 * 4 * 6, 1024),
149            nn.BatchNorm1d(1024),
150            nn.ReLU(inplace=True),
151            nn.Dropout(0.5),
152            nn.Linear(1024, 512),
153            nn.BatchNorm1d(512),
154            nn.ReLU(inplace=True),
155            nn.Dropout(0.3),
156            nn.Linear(512, num_classes),
157        )
158
159    def forward(self, x):
160        """
161        Defines the forward pass of the CNN4 model.
162
163        Args:
164            x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W).
165
166        Returns:
167            torch.Tensor: Output logits tensor of shape (batch_size, num_classes).
168        """
169        x = self.features(x)
170        x = x.view(x.size(0), -1)
171        x = self.classifier(x)
172        return x

CNN4 is a deeper convolutional neural network with four convolutional blocks.

This architecture is designed for improved feature extraction by adding an extra conv block and adaptive pooling before classification layers.

Arguments:
  • num_classes (int): Number of output classes for classification.
Attributes:
  • features (nn.Sequential): Convolutional feature extractor with four blocks.
  • classifier (nn.Sequential): Fully connected classifier.
CNN4(num_classes)
100    def __init__(self, num_classes):
101        """
102        Initializes the CNN4 model architecture.
103
104        Args:
105            num_classes (int): Number of target classes.
106        """
107        super(CNN4, self).__init__()
108        self.features = nn.Sequential(
109            # Block 1
110            nn.Conv2d(3, 64, kernel_size=3, padding=1),
111            nn.BatchNorm2d(64),
112            nn.ReLU(inplace=True),
113            nn.Conv2d(64, 64, kernel_size=3, padding=1),
114            nn.BatchNorm2d(64),
115            nn.ReLU(inplace=True),
116            nn.MaxPool2d(2, 2),
117            nn.Dropout(0.25),
118            # Block 2
119            nn.Conv2d(64, 128, kernel_size=3, padding=1),
120            nn.BatchNorm2d(128),
121            nn.ReLU(inplace=True),
122            nn.Conv2d(128, 128, kernel_size=3, padding=1),
123            nn.BatchNorm2d(128),
124            nn.ReLU(inplace=True),
125            nn.MaxPool2d(2, 2),
126            nn.Dropout(0.25),
127            # Block 3
128            nn.Conv2d(128, 256, kernel_size=3, padding=1),
129            nn.BatchNorm2d(256),
130            nn.ReLU(inplace=True),
131            nn.Conv2d(256, 256, kernel_size=3, padding=1),
132            nn.BatchNorm2d(256),
133            nn.ReLU(inplace=True),
134            nn.MaxPool2d(2, 2),
135            nn.Dropout(0.25),
136            # Block 4 (Additional block for more depth)
137            nn.Conv2d(256, 512, kernel_size=3, padding=1),
138            nn.BatchNorm2d(512),
139            nn.ReLU(inplace=True),
140            nn.Conv2d(512, 512, kernel_size=3, padding=1),
141            nn.BatchNorm2d(512),
142            nn.ReLU(inplace=True),
143            # Replace MaxPool with adaptive pooling to fix feature map size
144            nn.AdaptiveAvgPool2d((4, 6)),
145        )
146
147        self.classifier = nn.Sequential(
148            nn.Linear(512 * 4 * 6, 1024),
149            nn.BatchNorm1d(1024),
150            nn.ReLU(inplace=True),
151            nn.Dropout(0.5),
152            nn.Linear(1024, 512),
153            nn.BatchNorm1d(512),
154            nn.ReLU(inplace=True),
155            nn.Dropout(0.3),
156            nn.Linear(512, num_classes),
157        )

Initializes the CNN4 model architecture.

Arguments:
  • num_classes (int): Number of target classes.
features
classifier
def forward(self, x):
159    def forward(self, x):
160        """
161        Defines the forward pass of the CNN4 model.
162
163        Args:
164            x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W).
165
166        Returns:
167            torch.Tensor: Output logits tensor of shape (batch_size, num_classes).
168        """
169        x = self.features(x)
170        x = x.view(x.size(0), -1)
171        x = self.classifier(x)
172        return x

Defines the forward pass of the CNN4 model.

Arguments:
  • x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W).
Returns:

torch.Tensor: Output logits tensor of shape (batch_size, num_classes).

class tinyViT(torch.nn.modules.module.Module):
175class tinyViT(nn.Module):
176    """
177    tinyViT is a vision transformer model based on the DeiT tiny architecture, pretrained on ImageNet.
178
179    It replaces the original classification head with a linear layer matching the number of classes.
180
181    Args:
182        num_classes (int): Number of output classes.
183        img_size (int): Input image size (assumed square).
184        patch_size (int): Size of the image patches used by the transformer.
185
186    Attributes:
187        backbone (nn.Module): The underlying vision transformer model with modified head.
188    """
189
190    def __init__(self, num_classes, img_size, patch_size):
191        """
192        Initializes the tinyViT model with a pretrained DeiT backbone.
193
194        Args:
195            num_classes (int): Number of target classes.
196            img_size (int): Input image size (height and width).
197            patch_size (int): Patch size for the transformer.
198        """
199        super(tinyViT, self).__init__()
200        self.backbone = timm.create_model(
201            "deit_tiny_patch16_224",
202            pretrained=True,
203            img_size=img_size,
204            patch_size=patch_size,
205        )
206        in_features = self.backbone.head.in_features
207        self.backbone.head = nn.Linear(in_features, num_classes)
208
209    def forward(self, x):
210        """
211        Defines the forward pass of the tinyViT model.
212
213        Args:
214            x (torch.Tensor): Input tensor of shape (batch_size, 3, img_size, img_size).
215
216        Returns:
217            torch.Tensor: Output logits tensor of shape (batch_size, num_classes).
218        """
219        return self.backbone(x)

tinyViT is a vision transformer model based on the DeiT tiny architecture, pretrained on ImageNet.

It replaces the original classification head with a linear layer matching the number of classes.

Arguments:
  • num_classes (int): Number of output classes.
  • img_size (int): Input image size (assumed square).
  • patch_size (int): Size of the image patches used by the transformer.
Attributes:
  • backbone (nn.Module): The underlying vision transformer model with modified head.
tinyViT(num_classes, img_size, patch_size)
190    def __init__(self, num_classes, img_size, patch_size):
191        """
192        Initializes the tinyViT model with a pretrained DeiT backbone.
193
194        Args:
195            num_classes (int): Number of target classes.
196            img_size (int): Input image size (height and width).
197            patch_size (int): Patch size for the transformer.
198        """
199        super(tinyViT, self).__init__()
200        self.backbone = timm.create_model(
201            "deit_tiny_patch16_224",
202            pretrained=True,
203            img_size=img_size,
204            patch_size=patch_size,
205        )
206        in_features = self.backbone.head.in_features
207        self.backbone.head = nn.Linear(in_features, num_classes)

Initializes the tinyViT model with a pretrained DeiT backbone.

Arguments:
  • num_classes (int): Number of target classes.
  • img_size (int): Input image size (height and width).
  • patch_size (int): Patch size for the transformer.
backbone
def forward(self, x):
209    def forward(self, x):
210        """
211        Defines the forward pass of the tinyViT model.
212
213        Args:
214            x (torch.Tensor): Input tensor of shape (batch_size, 3, img_size, img_size).
215
216        Returns:
217            torch.Tensor: Output logits tensor of shape (batch_size, num_classes).
218        """
219        return self.backbone(x)

Defines the forward pass of the tinyViT model.

Arguments:
  • x (torch.Tensor): Input tensor of shape (batch_size, 3, img_size, img_size).
Returns:

torch.Tensor: Output logits tensor of shape (batch_size, num_classes).