garmentiq.classification.model_definition
1import torch.nn as nn 2import timm 3 4 5class CNN3(nn.Module): 6 """ 7 CNN3 is a convolutional neural network designed for image classification with moderate depth. 8 9 It consists of three convolutional blocks followed by fully connected layers. 10 11 Args: 12 num_classes (int): Number of output classes for classification. 13 14 Attributes: 15 features (nn.Sequential): Convolutional feature extractor. 16 classifier (nn.Sequential): Fully connected classifier. 17 """ 18 19 def __init__(self, num_classes): 20 """ 21 Initializes the CNN3 model architecture. 22 23 Args: 24 num_classes (int): Number of target classes. 25 """ 26 super(CNN3, self).__init__() 27 self.features = nn.Sequential( 28 # Block 1 29 nn.Conv2d(3, 64, kernel_size=3, padding=1), 30 nn.BatchNorm2d(64), 31 nn.ReLU(inplace=True), 32 nn.Conv2d(64, 64, kernel_size=3, padding=1), 33 nn.BatchNorm2d(64), 34 nn.ReLU(inplace=True), 35 nn.MaxPool2d(2, 2), 36 nn.Dropout(0.25), 37 # Block 2 38 nn.Conv2d(64, 128, kernel_size=3, padding=1), 39 nn.BatchNorm2d(128), 40 nn.ReLU(inplace=True), 41 nn.Conv2d(128, 128, kernel_size=3, padding=1), 42 nn.BatchNorm2d(128), 43 nn.ReLU(inplace=True), 44 nn.MaxPool2d(2, 2), 45 nn.Dropout(0.25), 46 # Block 3 47 nn.Conv2d(128, 256, kernel_size=3, padding=1), 48 nn.BatchNorm2d(256), 49 nn.ReLU(inplace=True), 50 nn.Conv2d(256, 256, kernel_size=3, padding=1), 51 nn.BatchNorm2d(256), 52 nn.ReLU(inplace=True), 53 nn.AdaptiveAvgPool2d((4, 6)), 54 ) 55 56 self.classifier = nn.Sequential( 57 nn.Linear(256 * 4 * 6, 512), 58 nn.BatchNorm1d(512), 59 nn.ReLU(inplace=True), 60 nn.Dropout(0.5), 61 nn.Linear(512, 256), 62 nn.BatchNorm1d(256), 63 nn.ReLU(inplace=True), 64 nn.Dropout(0.3), 65 nn.Linear(256, num_classes), 66 ) 67 68 def forward(self, x): 69 """ 70 Defines the forward pass of the CNN3 model. 71 72 Args: 73 x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W). 74 75 Returns: 76 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 77 """ 78 x = self.features(x) 79 x = x.view(x.size(0), -1) 80 x = self.classifier(x) 81 return x 82 83 84class CNN4(nn.Module): 85 """ 86 CNN4 is a deeper convolutional neural network with four convolutional blocks. 87 88 This architecture is designed for improved feature extraction by adding an extra conv block 89 and adaptive pooling before classification layers. 90 91 Args: 92 num_classes (int): Number of output classes for classification. 93 94 Attributes: 95 features (nn.Sequential): Convolutional feature extractor with four blocks. 96 classifier (nn.Sequential): Fully connected classifier. 97 """ 98 99 def __init__(self, num_classes): 100 """ 101 Initializes the CNN4 model architecture. 102 103 Args: 104 num_classes (int): Number of target classes. 105 """ 106 super(CNN4, self).__init__() 107 self.features = nn.Sequential( 108 # Block 1 109 nn.Conv2d(3, 64, kernel_size=3, padding=1), 110 nn.BatchNorm2d(64), 111 nn.ReLU(inplace=True), 112 nn.Conv2d(64, 64, kernel_size=3, padding=1), 113 nn.BatchNorm2d(64), 114 nn.ReLU(inplace=True), 115 nn.MaxPool2d(2, 2), 116 nn.Dropout(0.25), 117 # Block 2 118 nn.Conv2d(64, 128, kernel_size=3, padding=1), 119 nn.BatchNorm2d(128), 120 nn.ReLU(inplace=True), 121 nn.Conv2d(128, 128, kernel_size=3, padding=1), 122 nn.BatchNorm2d(128), 123 nn.ReLU(inplace=True), 124 nn.MaxPool2d(2, 2), 125 nn.Dropout(0.25), 126 # Block 3 127 nn.Conv2d(128, 256, kernel_size=3, padding=1), 128 nn.BatchNorm2d(256), 129 nn.ReLU(inplace=True), 130 nn.Conv2d(256, 256, kernel_size=3, padding=1), 131 nn.BatchNorm2d(256), 132 nn.ReLU(inplace=True), 133 nn.MaxPool2d(2, 2), 134 nn.Dropout(0.25), 135 # Block 4 (Additional block for more depth) 136 nn.Conv2d(256, 512, kernel_size=3, padding=1), 137 nn.BatchNorm2d(512), 138 nn.ReLU(inplace=True), 139 nn.Conv2d(512, 512, kernel_size=3, padding=1), 140 nn.BatchNorm2d(512), 141 nn.ReLU(inplace=True), 142 # Replace MaxPool with adaptive pooling to fix feature map size 143 nn.AdaptiveAvgPool2d((4, 6)), 144 ) 145 146 self.classifier = nn.Sequential( 147 nn.Linear(512 * 4 * 6, 1024), 148 nn.BatchNorm1d(1024), 149 nn.ReLU(inplace=True), 150 nn.Dropout(0.5), 151 nn.Linear(1024, 512), 152 nn.BatchNorm1d(512), 153 nn.ReLU(inplace=True), 154 nn.Dropout(0.3), 155 nn.Linear(512, num_classes), 156 ) 157 158 def forward(self, x): 159 """ 160 Defines the forward pass of the CNN4 model. 161 162 Args: 163 x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W). 164 165 Returns: 166 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 167 """ 168 x = self.features(x) 169 x = x.view(x.size(0), -1) 170 x = self.classifier(x) 171 return x 172 173 174class tinyViT(nn.Module): 175 """ 176 tinyViT is a vision transformer model based on the DeiT tiny architecture, pretrained on ImageNet. 177 178 It replaces the original classification head with a linear layer matching the number of classes. 179 180 Args: 181 num_classes (int): Number of output classes. 182 img_size (int): Input image size (assumed square). 183 patch_size (int): Size of the image patches used by the transformer. 184 185 Attributes: 186 backbone (nn.Module): The underlying vision transformer model with modified head. 187 """ 188 189 def __init__( 190 self, 191 num_classes: int, 192 img_size: int, 193 patch_size: int, 194 pretrained: bool = False, 195 ): 196 """ 197 Initializes the tinyViT model with a pretrained DeiT backbone. 198 199 Args: 200 num_classes (int): Number of target classes. 201 img_size (int): Input image size (height and width). 202 patch_size (int): Patch size for the transformer. 203 pretrained (bool): Whether to use the pretrained model, deault to `False`. 204 """ 205 super(tinyViT, self).__init__() 206 self.backbone = timm.create_model( 207 "deit_tiny_patch16_224", 208 pretrained=pretrained, 209 img_size=img_size, 210 patch_size=patch_size, 211 ) 212 in_features = self.backbone.head.in_features 213 self.backbone.head = nn.Linear(in_features, num_classes) 214 215 def forward(self, x): 216 """ 217 Defines the forward pass of the tinyViT model. 218 219 Args: 220 x (torch.Tensor): Input tensor of shape (batch_size, 3, img_size, img_size). 221 222 Returns: 223 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 224 """ 225 return self.backbone(x)
6class CNN3(nn.Module): 7 """ 8 CNN3 is a convolutional neural network designed for image classification with moderate depth. 9 10 It consists of three convolutional blocks followed by fully connected layers. 11 12 Args: 13 num_classes (int): Number of output classes for classification. 14 15 Attributes: 16 features (nn.Sequential): Convolutional feature extractor. 17 classifier (nn.Sequential): Fully connected classifier. 18 """ 19 20 def __init__(self, num_classes): 21 """ 22 Initializes the CNN3 model architecture. 23 24 Args: 25 num_classes (int): Number of target classes. 26 """ 27 super(CNN3, self).__init__() 28 self.features = nn.Sequential( 29 # Block 1 30 nn.Conv2d(3, 64, kernel_size=3, padding=1), 31 nn.BatchNorm2d(64), 32 nn.ReLU(inplace=True), 33 nn.Conv2d(64, 64, kernel_size=3, padding=1), 34 nn.BatchNorm2d(64), 35 nn.ReLU(inplace=True), 36 nn.MaxPool2d(2, 2), 37 nn.Dropout(0.25), 38 # Block 2 39 nn.Conv2d(64, 128, kernel_size=3, padding=1), 40 nn.BatchNorm2d(128), 41 nn.ReLU(inplace=True), 42 nn.Conv2d(128, 128, kernel_size=3, padding=1), 43 nn.BatchNorm2d(128), 44 nn.ReLU(inplace=True), 45 nn.MaxPool2d(2, 2), 46 nn.Dropout(0.25), 47 # Block 3 48 nn.Conv2d(128, 256, kernel_size=3, padding=1), 49 nn.BatchNorm2d(256), 50 nn.ReLU(inplace=True), 51 nn.Conv2d(256, 256, kernel_size=3, padding=1), 52 nn.BatchNorm2d(256), 53 nn.ReLU(inplace=True), 54 nn.AdaptiveAvgPool2d((4, 6)), 55 ) 56 57 self.classifier = nn.Sequential( 58 nn.Linear(256 * 4 * 6, 512), 59 nn.BatchNorm1d(512), 60 nn.ReLU(inplace=True), 61 nn.Dropout(0.5), 62 nn.Linear(512, 256), 63 nn.BatchNorm1d(256), 64 nn.ReLU(inplace=True), 65 nn.Dropout(0.3), 66 nn.Linear(256, num_classes), 67 ) 68 69 def forward(self, x): 70 """ 71 Defines the forward pass of the CNN3 model. 72 73 Args: 74 x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W). 75 76 Returns: 77 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 78 """ 79 x = self.features(x) 80 x = x.view(x.size(0), -1) 81 x = self.classifier(x) 82 return x
CNN3 is a convolutional neural network designed for image classification with moderate depth.
It consists of three convolutional blocks followed by fully connected layers.
Arguments:
- num_classes (int): Number of output classes for classification.
Attributes:
- features (nn.Sequential): Convolutional feature extractor.
- classifier (nn.Sequential): Fully connected classifier.
20 def __init__(self, num_classes): 21 """ 22 Initializes the CNN3 model architecture. 23 24 Args: 25 num_classes (int): Number of target classes. 26 """ 27 super(CNN3, self).__init__() 28 self.features = nn.Sequential( 29 # Block 1 30 nn.Conv2d(3, 64, kernel_size=3, padding=1), 31 nn.BatchNorm2d(64), 32 nn.ReLU(inplace=True), 33 nn.Conv2d(64, 64, kernel_size=3, padding=1), 34 nn.BatchNorm2d(64), 35 nn.ReLU(inplace=True), 36 nn.MaxPool2d(2, 2), 37 nn.Dropout(0.25), 38 # Block 2 39 nn.Conv2d(64, 128, kernel_size=3, padding=1), 40 nn.BatchNorm2d(128), 41 nn.ReLU(inplace=True), 42 nn.Conv2d(128, 128, kernel_size=3, padding=1), 43 nn.BatchNorm2d(128), 44 nn.ReLU(inplace=True), 45 nn.MaxPool2d(2, 2), 46 nn.Dropout(0.25), 47 # Block 3 48 nn.Conv2d(128, 256, kernel_size=3, padding=1), 49 nn.BatchNorm2d(256), 50 nn.ReLU(inplace=True), 51 nn.Conv2d(256, 256, kernel_size=3, padding=1), 52 nn.BatchNorm2d(256), 53 nn.ReLU(inplace=True), 54 nn.AdaptiveAvgPool2d((4, 6)), 55 ) 56 57 self.classifier = nn.Sequential( 58 nn.Linear(256 * 4 * 6, 512), 59 nn.BatchNorm1d(512), 60 nn.ReLU(inplace=True), 61 nn.Dropout(0.5), 62 nn.Linear(512, 256), 63 nn.BatchNorm1d(256), 64 nn.ReLU(inplace=True), 65 nn.Dropout(0.3), 66 nn.Linear(256, num_classes), 67 )
Initializes the CNN3 model architecture.
Arguments:
- num_classes (int): Number of target classes.
69 def forward(self, x): 70 """ 71 Defines the forward pass of the CNN3 model. 72 73 Args: 74 x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W). 75 76 Returns: 77 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 78 """ 79 x = self.features(x) 80 x = x.view(x.size(0), -1) 81 x = self.classifier(x) 82 return x
Defines the forward pass of the CNN3 model.
Arguments:
- x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W).
Returns:
torch.Tensor: Output logits tensor of shape (batch_size, num_classes).
85class CNN4(nn.Module): 86 """ 87 CNN4 is a deeper convolutional neural network with four convolutional blocks. 88 89 This architecture is designed for improved feature extraction by adding an extra conv block 90 and adaptive pooling before classification layers. 91 92 Args: 93 num_classes (int): Number of output classes for classification. 94 95 Attributes: 96 features (nn.Sequential): Convolutional feature extractor with four blocks. 97 classifier (nn.Sequential): Fully connected classifier. 98 """ 99 100 def __init__(self, num_classes): 101 """ 102 Initializes the CNN4 model architecture. 103 104 Args: 105 num_classes (int): Number of target classes. 106 """ 107 super(CNN4, self).__init__() 108 self.features = nn.Sequential( 109 # Block 1 110 nn.Conv2d(3, 64, kernel_size=3, padding=1), 111 nn.BatchNorm2d(64), 112 nn.ReLU(inplace=True), 113 nn.Conv2d(64, 64, kernel_size=3, padding=1), 114 nn.BatchNorm2d(64), 115 nn.ReLU(inplace=True), 116 nn.MaxPool2d(2, 2), 117 nn.Dropout(0.25), 118 # Block 2 119 nn.Conv2d(64, 128, kernel_size=3, padding=1), 120 nn.BatchNorm2d(128), 121 nn.ReLU(inplace=True), 122 nn.Conv2d(128, 128, kernel_size=3, padding=1), 123 nn.BatchNorm2d(128), 124 nn.ReLU(inplace=True), 125 nn.MaxPool2d(2, 2), 126 nn.Dropout(0.25), 127 # Block 3 128 nn.Conv2d(128, 256, kernel_size=3, padding=1), 129 nn.BatchNorm2d(256), 130 nn.ReLU(inplace=True), 131 nn.Conv2d(256, 256, kernel_size=3, padding=1), 132 nn.BatchNorm2d(256), 133 nn.ReLU(inplace=True), 134 nn.MaxPool2d(2, 2), 135 nn.Dropout(0.25), 136 # Block 4 (Additional block for more depth) 137 nn.Conv2d(256, 512, kernel_size=3, padding=1), 138 nn.BatchNorm2d(512), 139 nn.ReLU(inplace=True), 140 nn.Conv2d(512, 512, kernel_size=3, padding=1), 141 nn.BatchNorm2d(512), 142 nn.ReLU(inplace=True), 143 # Replace MaxPool with adaptive pooling to fix feature map size 144 nn.AdaptiveAvgPool2d((4, 6)), 145 ) 146 147 self.classifier = nn.Sequential( 148 nn.Linear(512 * 4 * 6, 1024), 149 nn.BatchNorm1d(1024), 150 nn.ReLU(inplace=True), 151 nn.Dropout(0.5), 152 nn.Linear(1024, 512), 153 nn.BatchNorm1d(512), 154 nn.ReLU(inplace=True), 155 nn.Dropout(0.3), 156 nn.Linear(512, num_classes), 157 ) 158 159 def forward(self, x): 160 """ 161 Defines the forward pass of the CNN4 model. 162 163 Args: 164 x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W). 165 166 Returns: 167 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 168 """ 169 x = self.features(x) 170 x = x.view(x.size(0), -1) 171 x = self.classifier(x) 172 return x
CNN4 is a deeper convolutional neural network with four convolutional blocks.
This architecture is designed for improved feature extraction by adding an extra conv block and adaptive pooling before classification layers.
Arguments:
- num_classes (int): Number of output classes for classification.
Attributes:
- features (nn.Sequential): Convolutional feature extractor with four blocks.
- classifier (nn.Sequential): Fully connected classifier.
100 def __init__(self, num_classes): 101 """ 102 Initializes the CNN4 model architecture. 103 104 Args: 105 num_classes (int): Number of target classes. 106 """ 107 super(CNN4, self).__init__() 108 self.features = nn.Sequential( 109 # Block 1 110 nn.Conv2d(3, 64, kernel_size=3, padding=1), 111 nn.BatchNorm2d(64), 112 nn.ReLU(inplace=True), 113 nn.Conv2d(64, 64, kernel_size=3, padding=1), 114 nn.BatchNorm2d(64), 115 nn.ReLU(inplace=True), 116 nn.MaxPool2d(2, 2), 117 nn.Dropout(0.25), 118 # Block 2 119 nn.Conv2d(64, 128, kernel_size=3, padding=1), 120 nn.BatchNorm2d(128), 121 nn.ReLU(inplace=True), 122 nn.Conv2d(128, 128, kernel_size=3, padding=1), 123 nn.BatchNorm2d(128), 124 nn.ReLU(inplace=True), 125 nn.MaxPool2d(2, 2), 126 nn.Dropout(0.25), 127 # Block 3 128 nn.Conv2d(128, 256, kernel_size=3, padding=1), 129 nn.BatchNorm2d(256), 130 nn.ReLU(inplace=True), 131 nn.Conv2d(256, 256, kernel_size=3, padding=1), 132 nn.BatchNorm2d(256), 133 nn.ReLU(inplace=True), 134 nn.MaxPool2d(2, 2), 135 nn.Dropout(0.25), 136 # Block 4 (Additional block for more depth) 137 nn.Conv2d(256, 512, kernel_size=3, padding=1), 138 nn.BatchNorm2d(512), 139 nn.ReLU(inplace=True), 140 nn.Conv2d(512, 512, kernel_size=3, padding=1), 141 nn.BatchNorm2d(512), 142 nn.ReLU(inplace=True), 143 # Replace MaxPool with adaptive pooling to fix feature map size 144 nn.AdaptiveAvgPool2d((4, 6)), 145 ) 146 147 self.classifier = nn.Sequential( 148 nn.Linear(512 * 4 * 6, 1024), 149 nn.BatchNorm1d(1024), 150 nn.ReLU(inplace=True), 151 nn.Dropout(0.5), 152 nn.Linear(1024, 512), 153 nn.BatchNorm1d(512), 154 nn.ReLU(inplace=True), 155 nn.Dropout(0.3), 156 nn.Linear(512, num_classes), 157 )
Initializes the CNN4 model architecture.
Arguments:
- num_classes (int): Number of target classes.
159 def forward(self, x): 160 """ 161 Defines the forward pass of the CNN4 model. 162 163 Args: 164 x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W). 165 166 Returns: 167 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 168 """ 169 x = self.features(x) 170 x = x.view(x.size(0), -1) 171 x = self.classifier(x) 172 return x
Defines the forward pass of the CNN4 model.
Arguments:
- x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W).
Returns:
torch.Tensor: Output logits tensor of shape (batch_size, num_classes).
175class tinyViT(nn.Module): 176 """ 177 tinyViT is a vision transformer model based on the DeiT tiny architecture, pretrained on ImageNet. 178 179 It replaces the original classification head with a linear layer matching the number of classes. 180 181 Args: 182 num_classes (int): Number of output classes. 183 img_size (int): Input image size (assumed square). 184 patch_size (int): Size of the image patches used by the transformer. 185 186 Attributes: 187 backbone (nn.Module): The underlying vision transformer model with modified head. 188 """ 189 190 def __init__( 191 self, 192 num_classes: int, 193 img_size: int, 194 patch_size: int, 195 pretrained: bool = False, 196 ): 197 """ 198 Initializes the tinyViT model with a pretrained DeiT backbone. 199 200 Args: 201 num_classes (int): Number of target classes. 202 img_size (int): Input image size (height and width). 203 patch_size (int): Patch size for the transformer. 204 pretrained (bool): Whether to use the pretrained model, deault to `False`. 205 """ 206 super(tinyViT, self).__init__() 207 self.backbone = timm.create_model( 208 "deit_tiny_patch16_224", 209 pretrained=pretrained, 210 img_size=img_size, 211 patch_size=patch_size, 212 ) 213 in_features = self.backbone.head.in_features 214 self.backbone.head = nn.Linear(in_features, num_classes) 215 216 def forward(self, x): 217 """ 218 Defines the forward pass of the tinyViT model. 219 220 Args: 221 x (torch.Tensor): Input tensor of shape (batch_size, 3, img_size, img_size). 222 223 Returns: 224 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 225 """ 226 return self.backbone(x)
tinyViT is a vision transformer model based on the DeiT tiny architecture, pretrained on ImageNet.
It replaces the original classification head with a linear layer matching the number of classes.
Arguments:
- num_classes (int): Number of output classes.
- img_size (int): Input image size (assumed square).
- patch_size (int): Size of the image patches used by the transformer.
Attributes:
- backbone (nn.Module): The underlying vision transformer model with modified head.
190 def __init__( 191 self, 192 num_classes: int, 193 img_size: int, 194 patch_size: int, 195 pretrained: bool = False, 196 ): 197 """ 198 Initializes the tinyViT model with a pretrained DeiT backbone. 199 200 Args: 201 num_classes (int): Number of target classes. 202 img_size (int): Input image size (height and width). 203 patch_size (int): Patch size for the transformer. 204 pretrained (bool): Whether to use the pretrained model, deault to `False`. 205 """ 206 super(tinyViT, self).__init__() 207 self.backbone = timm.create_model( 208 "deit_tiny_patch16_224", 209 pretrained=pretrained, 210 img_size=img_size, 211 patch_size=patch_size, 212 ) 213 in_features = self.backbone.head.in_features 214 self.backbone.head = nn.Linear(in_features, num_classes)
Initializes the tinyViT model with a pretrained DeiT backbone.
Arguments:
- num_classes (int): Number of target classes.
- img_size (int): Input image size (height and width).
- patch_size (int): Patch size for the transformer.
- pretrained (bool): Whether to use the pretrained model, deault to
False.
216 def forward(self, x): 217 """ 218 Defines the forward pass of the tinyViT model. 219 220 Args: 221 x (torch.Tensor): Input tensor of shape (batch_size, 3, img_size, img_size). 222 223 Returns: 224 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 225 """ 226 return self.backbone(x)
Defines the forward pass of the tinyViT model.
Arguments:
- x (torch.Tensor): Input tensor of shape (batch_size, 3, img_size, img_size).
Returns:
torch.Tensor: Output logits tensor of shape (batch_size, num_classes).