garmentiq.classification.model_definition
1import torch.nn as nn 2import timm 3 4 5class CNN3(nn.Module): 6 """ 7 CNN3 is a convolutional neural network designed for image classification with moderate depth. 8 9 It consists of three convolutional blocks followed by fully connected layers. 10 11 Args: 12 num_classes (int): Number of output classes for classification. 13 14 Attributes: 15 features (nn.Sequential): Convolutional feature extractor. 16 classifier (nn.Sequential): Fully connected classifier. 17 """ 18 19 def __init__(self, num_classes): 20 """ 21 Initializes the CNN3 model architecture. 22 23 Args: 24 num_classes (int): Number of target classes. 25 """ 26 super(CNN3, self).__init__() 27 self.features = nn.Sequential( 28 # Block 1 29 nn.Conv2d(3, 64, kernel_size=3, padding=1), 30 nn.BatchNorm2d(64), 31 nn.ReLU(inplace=True), 32 nn.Conv2d(64, 64, kernel_size=3, padding=1), 33 nn.BatchNorm2d(64), 34 nn.ReLU(inplace=True), 35 nn.MaxPool2d(2, 2), 36 nn.Dropout(0.25), 37 # Block 2 38 nn.Conv2d(64, 128, kernel_size=3, padding=1), 39 nn.BatchNorm2d(128), 40 nn.ReLU(inplace=True), 41 nn.Conv2d(128, 128, kernel_size=3, padding=1), 42 nn.BatchNorm2d(128), 43 nn.ReLU(inplace=True), 44 nn.MaxPool2d(2, 2), 45 nn.Dropout(0.25), 46 # Block 3 47 nn.Conv2d(128, 256, kernel_size=3, padding=1), 48 nn.BatchNorm2d(256), 49 nn.ReLU(inplace=True), 50 nn.Conv2d(256, 256, kernel_size=3, padding=1), 51 nn.BatchNorm2d(256), 52 nn.ReLU(inplace=True), 53 nn.AdaptiveAvgPool2d((4, 6)), 54 ) 55 56 self.classifier = nn.Sequential( 57 nn.Linear(256 * 4 * 6, 512), 58 nn.BatchNorm1d(512), 59 nn.ReLU(inplace=True), 60 nn.Dropout(0.5), 61 nn.Linear(512, 256), 62 nn.BatchNorm1d(256), 63 nn.ReLU(inplace=True), 64 nn.Dropout(0.3), 65 nn.Linear(256, num_classes), 66 ) 67 68 def forward(self, x): 69 """ 70 Defines the forward pass of the CNN3 model. 71 72 Args: 73 x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W). 74 75 Returns: 76 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 77 """ 78 x = self.features(x) 79 x = x.view(x.size(0), -1) 80 x = self.classifier(x) 81 return x 82 83 84class CNN4(nn.Module): 85 """ 86 CNN4 is a deeper convolutional neural network with four convolutional blocks. 87 88 This architecture is designed for improved feature extraction by adding an extra conv block 89 and adaptive pooling before classification layers. 90 91 Args: 92 num_classes (int): Number of output classes for classification. 93 94 Attributes: 95 features (nn.Sequential): Convolutional feature extractor with four blocks. 96 classifier (nn.Sequential): Fully connected classifier. 97 """ 98 99 def __init__(self, num_classes): 100 """ 101 Initializes the CNN4 model architecture. 102 103 Args: 104 num_classes (int): Number of target classes. 105 """ 106 super(CNN4, self).__init__() 107 self.features = nn.Sequential( 108 # Block 1 109 nn.Conv2d(3, 64, kernel_size=3, padding=1), 110 nn.BatchNorm2d(64), 111 nn.ReLU(inplace=True), 112 nn.Conv2d(64, 64, kernel_size=3, padding=1), 113 nn.BatchNorm2d(64), 114 nn.ReLU(inplace=True), 115 nn.MaxPool2d(2, 2), 116 nn.Dropout(0.25), 117 # Block 2 118 nn.Conv2d(64, 128, kernel_size=3, padding=1), 119 nn.BatchNorm2d(128), 120 nn.ReLU(inplace=True), 121 nn.Conv2d(128, 128, kernel_size=3, padding=1), 122 nn.BatchNorm2d(128), 123 nn.ReLU(inplace=True), 124 nn.MaxPool2d(2, 2), 125 nn.Dropout(0.25), 126 # Block 3 127 nn.Conv2d(128, 256, kernel_size=3, padding=1), 128 nn.BatchNorm2d(256), 129 nn.ReLU(inplace=True), 130 nn.Conv2d(256, 256, kernel_size=3, padding=1), 131 nn.BatchNorm2d(256), 132 nn.ReLU(inplace=True), 133 nn.MaxPool2d(2, 2), 134 nn.Dropout(0.25), 135 # Block 4 (Additional block for more depth) 136 nn.Conv2d(256, 512, kernel_size=3, padding=1), 137 nn.BatchNorm2d(512), 138 nn.ReLU(inplace=True), 139 nn.Conv2d(512, 512, kernel_size=3, padding=1), 140 nn.BatchNorm2d(512), 141 nn.ReLU(inplace=True), 142 # Replace MaxPool with adaptive pooling to fix feature map size 143 nn.AdaptiveAvgPool2d((4, 6)), 144 ) 145 146 self.classifier = nn.Sequential( 147 nn.Linear(512 * 4 * 6, 1024), 148 nn.BatchNorm1d(1024), 149 nn.ReLU(inplace=True), 150 nn.Dropout(0.5), 151 nn.Linear(1024, 512), 152 nn.BatchNorm1d(512), 153 nn.ReLU(inplace=True), 154 nn.Dropout(0.3), 155 nn.Linear(512, num_classes), 156 ) 157 158 def forward(self, x): 159 """ 160 Defines the forward pass of the CNN4 model. 161 162 Args: 163 x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W). 164 165 Returns: 166 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 167 """ 168 x = self.features(x) 169 x = x.view(x.size(0), -1) 170 x = self.classifier(x) 171 return x 172 173 174class tinyViT(nn.Module): 175 """ 176 tinyViT is a vision transformer model based on the DeiT tiny architecture, pretrained on ImageNet. 177 178 It replaces the original classification head with a linear layer matching the number of classes. 179 180 Args: 181 num_classes (int): Number of output classes. 182 img_size (int): Input image size (assumed square). 183 patch_size (int): Size of the image patches used by the transformer. 184 185 Attributes: 186 backbone (nn.Module): The underlying vision transformer model with modified head. 187 """ 188 189 def __init__(self, num_classes, img_size, patch_size): 190 """ 191 Initializes the tinyViT model with a pretrained DeiT backbone. 192 193 Args: 194 num_classes (int): Number of target classes. 195 img_size (int): Input image size (height and width). 196 patch_size (int): Patch size for the transformer. 197 """ 198 super(tinyViT, self).__init__() 199 self.backbone = timm.create_model( 200 "deit_tiny_patch16_224", 201 pretrained=True, 202 img_size=img_size, 203 patch_size=patch_size, 204 ) 205 in_features = self.backbone.head.in_features 206 self.backbone.head = nn.Linear(in_features, num_classes) 207 208 def forward(self, x): 209 """ 210 Defines the forward pass of the tinyViT model. 211 212 Args: 213 x (torch.Tensor): Input tensor of shape (batch_size, 3, img_size, img_size). 214 215 Returns: 216 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 217 """ 218 return self.backbone(x)
6class CNN3(nn.Module): 7 """ 8 CNN3 is a convolutional neural network designed for image classification with moderate depth. 9 10 It consists of three convolutional blocks followed by fully connected layers. 11 12 Args: 13 num_classes (int): Number of output classes for classification. 14 15 Attributes: 16 features (nn.Sequential): Convolutional feature extractor. 17 classifier (nn.Sequential): Fully connected classifier. 18 """ 19 20 def __init__(self, num_classes): 21 """ 22 Initializes the CNN3 model architecture. 23 24 Args: 25 num_classes (int): Number of target classes. 26 """ 27 super(CNN3, self).__init__() 28 self.features = nn.Sequential( 29 # Block 1 30 nn.Conv2d(3, 64, kernel_size=3, padding=1), 31 nn.BatchNorm2d(64), 32 nn.ReLU(inplace=True), 33 nn.Conv2d(64, 64, kernel_size=3, padding=1), 34 nn.BatchNorm2d(64), 35 nn.ReLU(inplace=True), 36 nn.MaxPool2d(2, 2), 37 nn.Dropout(0.25), 38 # Block 2 39 nn.Conv2d(64, 128, kernel_size=3, padding=1), 40 nn.BatchNorm2d(128), 41 nn.ReLU(inplace=True), 42 nn.Conv2d(128, 128, kernel_size=3, padding=1), 43 nn.BatchNorm2d(128), 44 nn.ReLU(inplace=True), 45 nn.MaxPool2d(2, 2), 46 nn.Dropout(0.25), 47 # Block 3 48 nn.Conv2d(128, 256, kernel_size=3, padding=1), 49 nn.BatchNorm2d(256), 50 nn.ReLU(inplace=True), 51 nn.Conv2d(256, 256, kernel_size=3, padding=1), 52 nn.BatchNorm2d(256), 53 nn.ReLU(inplace=True), 54 nn.AdaptiveAvgPool2d((4, 6)), 55 ) 56 57 self.classifier = nn.Sequential( 58 nn.Linear(256 * 4 * 6, 512), 59 nn.BatchNorm1d(512), 60 nn.ReLU(inplace=True), 61 nn.Dropout(0.5), 62 nn.Linear(512, 256), 63 nn.BatchNorm1d(256), 64 nn.ReLU(inplace=True), 65 nn.Dropout(0.3), 66 nn.Linear(256, num_classes), 67 ) 68 69 def forward(self, x): 70 """ 71 Defines the forward pass of the CNN3 model. 72 73 Args: 74 x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W). 75 76 Returns: 77 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 78 """ 79 x = self.features(x) 80 x = x.view(x.size(0), -1) 81 x = self.classifier(x) 82 return x
CNN3 is a convolutional neural network designed for image classification with moderate depth.
It consists of three convolutional blocks followed by fully connected layers.
Arguments:
- num_classes (int): Number of output classes for classification.
Attributes:
- features (nn.Sequential): Convolutional feature extractor.
- classifier (nn.Sequential): Fully connected classifier.
20 def __init__(self, num_classes): 21 """ 22 Initializes the CNN3 model architecture. 23 24 Args: 25 num_classes (int): Number of target classes. 26 """ 27 super(CNN3, self).__init__() 28 self.features = nn.Sequential( 29 # Block 1 30 nn.Conv2d(3, 64, kernel_size=3, padding=1), 31 nn.BatchNorm2d(64), 32 nn.ReLU(inplace=True), 33 nn.Conv2d(64, 64, kernel_size=3, padding=1), 34 nn.BatchNorm2d(64), 35 nn.ReLU(inplace=True), 36 nn.MaxPool2d(2, 2), 37 nn.Dropout(0.25), 38 # Block 2 39 nn.Conv2d(64, 128, kernel_size=3, padding=1), 40 nn.BatchNorm2d(128), 41 nn.ReLU(inplace=True), 42 nn.Conv2d(128, 128, kernel_size=3, padding=1), 43 nn.BatchNorm2d(128), 44 nn.ReLU(inplace=True), 45 nn.MaxPool2d(2, 2), 46 nn.Dropout(0.25), 47 # Block 3 48 nn.Conv2d(128, 256, kernel_size=3, padding=1), 49 nn.BatchNorm2d(256), 50 nn.ReLU(inplace=True), 51 nn.Conv2d(256, 256, kernel_size=3, padding=1), 52 nn.BatchNorm2d(256), 53 nn.ReLU(inplace=True), 54 nn.AdaptiveAvgPool2d((4, 6)), 55 ) 56 57 self.classifier = nn.Sequential( 58 nn.Linear(256 * 4 * 6, 512), 59 nn.BatchNorm1d(512), 60 nn.ReLU(inplace=True), 61 nn.Dropout(0.5), 62 nn.Linear(512, 256), 63 nn.BatchNorm1d(256), 64 nn.ReLU(inplace=True), 65 nn.Dropout(0.3), 66 nn.Linear(256, num_classes), 67 )
Initializes the CNN3 model architecture.
Arguments:
- num_classes (int): Number of target classes.
69 def forward(self, x): 70 """ 71 Defines the forward pass of the CNN3 model. 72 73 Args: 74 x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W). 75 76 Returns: 77 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 78 """ 79 x = self.features(x) 80 x = x.view(x.size(0), -1) 81 x = self.classifier(x) 82 return x
Defines the forward pass of the CNN3 model.
Arguments:
- x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W).
Returns:
torch.Tensor: Output logits tensor of shape (batch_size, num_classes).
85class CNN4(nn.Module): 86 """ 87 CNN4 is a deeper convolutional neural network with four convolutional blocks. 88 89 This architecture is designed for improved feature extraction by adding an extra conv block 90 and adaptive pooling before classification layers. 91 92 Args: 93 num_classes (int): Number of output classes for classification. 94 95 Attributes: 96 features (nn.Sequential): Convolutional feature extractor with four blocks. 97 classifier (nn.Sequential): Fully connected classifier. 98 """ 99 100 def __init__(self, num_classes): 101 """ 102 Initializes the CNN4 model architecture. 103 104 Args: 105 num_classes (int): Number of target classes. 106 """ 107 super(CNN4, self).__init__() 108 self.features = nn.Sequential( 109 # Block 1 110 nn.Conv2d(3, 64, kernel_size=3, padding=1), 111 nn.BatchNorm2d(64), 112 nn.ReLU(inplace=True), 113 nn.Conv2d(64, 64, kernel_size=3, padding=1), 114 nn.BatchNorm2d(64), 115 nn.ReLU(inplace=True), 116 nn.MaxPool2d(2, 2), 117 nn.Dropout(0.25), 118 # Block 2 119 nn.Conv2d(64, 128, kernel_size=3, padding=1), 120 nn.BatchNorm2d(128), 121 nn.ReLU(inplace=True), 122 nn.Conv2d(128, 128, kernel_size=3, padding=1), 123 nn.BatchNorm2d(128), 124 nn.ReLU(inplace=True), 125 nn.MaxPool2d(2, 2), 126 nn.Dropout(0.25), 127 # Block 3 128 nn.Conv2d(128, 256, kernel_size=3, padding=1), 129 nn.BatchNorm2d(256), 130 nn.ReLU(inplace=True), 131 nn.Conv2d(256, 256, kernel_size=3, padding=1), 132 nn.BatchNorm2d(256), 133 nn.ReLU(inplace=True), 134 nn.MaxPool2d(2, 2), 135 nn.Dropout(0.25), 136 # Block 4 (Additional block for more depth) 137 nn.Conv2d(256, 512, kernel_size=3, padding=1), 138 nn.BatchNorm2d(512), 139 nn.ReLU(inplace=True), 140 nn.Conv2d(512, 512, kernel_size=3, padding=1), 141 nn.BatchNorm2d(512), 142 nn.ReLU(inplace=True), 143 # Replace MaxPool with adaptive pooling to fix feature map size 144 nn.AdaptiveAvgPool2d((4, 6)), 145 ) 146 147 self.classifier = nn.Sequential( 148 nn.Linear(512 * 4 * 6, 1024), 149 nn.BatchNorm1d(1024), 150 nn.ReLU(inplace=True), 151 nn.Dropout(0.5), 152 nn.Linear(1024, 512), 153 nn.BatchNorm1d(512), 154 nn.ReLU(inplace=True), 155 nn.Dropout(0.3), 156 nn.Linear(512, num_classes), 157 ) 158 159 def forward(self, x): 160 """ 161 Defines the forward pass of the CNN4 model. 162 163 Args: 164 x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W). 165 166 Returns: 167 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 168 """ 169 x = self.features(x) 170 x = x.view(x.size(0), -1) 171 x = self.classifier(x) 172 return x
CNN4 is a deeper convolutional neural network with four convolutional blocks.
This architecture is designed for improved feature extraction by adding an extra conv block and adaptive pooling before classification layers.
Arguments:
- num_classes (int): Number of output classes for classification.
Attributes:
- features (nn.Sequential): Convolutional feature extractor with four blocks.
- classifier (nn.Sequential): Fully connected classifier.
100 def __init__(self, num_classes): 101 """ 102 Initializes the CNN4 model architecture. 103 104 Args: 105 num_classes (int): Number of target classes. 106 """ 107 super(CNN4, self).__init__() 108 self.features = nn.Sequential( 109 # Block 1 110 nn.Conv2d(3, 64, kernel_size=3, padding=1), 111 nn.BatchNorm2d(64), 112 nn.ReLU(inplace=True), 113 nn.Conv2d(64, 64, kernel_size=3, padding=1), 114 nn.BatchNorm2d(64), 115 nn.ReLU(inplace=True), 116 nn.MaxPool2d(2, 2), 117 nn.Dropout(0.25), 118 # Block 2 119 nn.Conv2d(64, 128, kernel_size=3, padding=1), 120 nn.BatchNorm2d(128), 121 nn.ReLU(inplace=True), 122 nn.Conv2d(128, 128, kernel_size=3, padding=1), 123 nn.BatchNorm2d(128), 124 nn.ReLU(inplace=True), 125 nn.MaxPool2d(2, 2), 126 nn.Dropout(0.25), 127 # Block 3 128 nn.Conv2d(128, 256, kernel_size=3, padding=1), 129 nn.BatchNorm2d(256), 130 nn.ReLU(inplace=True), 131 nn.Conv2d(256, 256, kernel_size=3, padding=1), 132 nn.BatchNorm2d(256), 133 nn.ReLU(inplace=True), 134 nn.MaxPool2d(2, 2), 135 nn.Dropout(0.25), 136 # Block 4 (Additional block for more depth) 137 nn.Conv2d(256, 512, kernel_size=3, padding=1), 138 nn.BatchNorm2d(512), 139 nn.ReLU(inplace=True), 140 nn.Conv2d(512, 512, kernel_size=3, padding=1), 141 nn.BatchNorm2d(512), 142 nn.ReLU(inplace=True), 143 # Replace MaxPool with adaptive pooling to fix feature map size 144 nn.AdaptiveAvgPool2d((4, 6)), 145 ) 146 147 self.classifier = nn.Sequential( 148 nn.Linear(512 * 4 * 6, 1024), 149 nn.BatchNorm1d(1024), 150 nn.ReLU(inplace=True), 151 nn.Dropout(0.5), 152 nn.Linear(1024, 512), 153 nn.BatchNorm1d(512), 154 nn.ReLU(inplace=True), 155 nn.Dropout(0.3), 156 nn.Linear(512, num_classes), 157 )
Initializes the CNN4 model architecture.
Arguments:
- num_classes (int): Number of target classes.
159 def forward(self, x): 160 """ 161 Defines the forward pass of the CNN4 model. 162 163 Args: 164 x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W). 165 166 Returns: 167 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 168 """ 169 x = self.features(x) 170 x = x.view(x.size(0), -1) 171 x = self.classifier(x) 172 return x
Defines the forward pass of the CNN4 model.
Arguments:
- x (torch.Tensor): Input tensor of shape (batch_size, 3, H, W).
Returns:
torch.Tensor: Output logits tensor of shape (batch_size, num_classes).
175class tinyViT(nn.Module): 176 """ 177 tinyViT is a vision transformer model based on the DeiT tiny architecture, pretrained on ImageNet. 178 179 It replaces the original classification head with a linear layer matching the number of classes. 180 181 Args: 182 num_classes (int): Number of output classes. 183 img_size (int): Input image size (assumed square). 184 patch_size (int): Size of the image patches used by the transformer. 185 186 Attributes: 187 backbone (nn.Module): The underlying vision transformer model with modified head. 188 """ 189 190 def __init__(self, num_classes, img_size, patch_size): 191 """ 192 Initializes the tinyViT model with a pretrained DeiT backbone. 193 194 Args: 195 num_classes (int): Number of target classes. 196 img_size (int): Input image size (height and width). 197 patch_size (int): Patch size for the transformer. 198 """ 199 super(tinyViT, self).__init__() 200 self.backbone = timm.create_model( 201 "deit_tiny_patch16_224", 202 pretrained=True, 203 img_size=img_size, 204 patch_size=patch_size, 205 ) 206 in_features = self.backbone.head.in_features 207 self.backbone.head = nn.Linear(in_features, num_classes) 208 209 def forward(self, x): 210 """ 211 Defines the forward pass of the tinyViT model. 212 213 Args: 214 x (torch.Tensor): Input tensor of shape (batch_size, 3, img_size, img_size). 215 216 Returns: 217 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 218 """ 219 return self.backbone(x)
tinyViT is a vision transformer model based on the DeiT tiny architecture, pretrained on ImageNet.
It replaces the original classification head with a linear layer matching the number of classes.
Arguments:
- num_classes (int): Number of output classes.
- img_size (int): Input image size (assumed square).
- patch_size (int): Size of the image patches used by the transformer.
Attributes:
- backbone (nn.Module): The underlying vision transformer model with modified head.
190 def __init__(self, num_classes, img_size, patch_size): 191 """ 192 Initializes the tinyViT model with a pretrained DeiT backbone. 193 194 Args: 195 num_classes (int): Number of target classes. 196 img_size (int): Input image size (height and width). 197 patch_size (int): Patch size for the transformer. 198 """ 199 super(tinyViT, self).__init__() 200 self.backbone = timm.create_model( 201 "deit_tiny_patch16_224", 202 pretrained=True, 203 img_size=img_size, 204 patch_size=patch_size, 205 ) 206 in_features = self.backbone.head.in_features 207 self.backbone.head = nn.Linear(in_features, num_classes)
Initializes the tinyViT model with a pretrained DeiT backbone.
Arguments:
- num_classes (int): Number of target classes.
- img_size (int): Input image size (height and width).
- patch_size (int): Patch size for the transformer.
209 def forward(self, x): 210 """ 211 Defines the forward pass of the tinyViT model. 212 213 Args: 214 x (torch.Tensor): Input tensor of shape (batch_size, 3, img_size, img_size). 215 216 Returns: 217 torch.Tensor: Output logits tensor of shape (batch_size, num_classes). 218 """ 219 return self.backbone(x)
Defines the forward pass of the tinyViT model.
Arguments:
- x (torch.Tensor): Input tensor of shape (batch_size, 3, img_size, img_size).
Returns:
torch.Tensor: Output logits tensor of shape (batch_size, num_classes).