diff --git a/README.md b/README.md index b661aa6..35ee893 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,85 @@ -# mobilefacenet +# Mobilefacenet Face Landmark Detecter +*authors: David Wang* + + +## Desription + +A class of extremely efficient CNN models to extract 68 landmarks from a facial image[1]. + + +```python +from towhee import ops + +model = ops.face_landmark_detection.mobilefacenet() +landmark = model(img) +``` + +## Factory Constructor + +Create the operator via the following factory method + +***ops.face_landmark_detection.mobilefacenet()*** + + + +## Interface + +An image embedding operator takes an image as input. it extracts the embedding back to ndarray. + +**Args:** + +​ ***framework*** + +​ the framework of the model + +​ supported types: `str`, default is 'pytorch' + + +​ ***pretrained*** + +​ whether load the pretrained weights.. + +​ supported types: `bool`, default is True, using pretrained weights + + +**Parameters:** + +​ ***image***: *towhee._types.Image* + +​ The input image. + + +**Returns:**: *numpy.ndarray* + +​ The extracted facial landmark. + +## Code Example + +extracted facial landmark from './img1.jpg'. + +*Write the pipeline in simplified style*: + +```python +import towhee.DataCollection as dc + +dc.glob('./img1.jpg') + .face_landmark_detection.mobilefacenet() + .to_list() +``` + +*Write a same pipeline with explicit inputs/outputs name specifications:* + +```python +import towhee.DataCollection as dc + +dc.glob['path']('./img1.jpg') + .image_decode.cv2['path', 'img']() + .face_landmark_detection.mobilefacenet() + .to_list() +``` + + +## Reference + +[1].https://arxiv.org/pdf/1804.07573.pdf diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..6136253 --- /dev/null +++ b/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2021 Zilliz. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .mobilefacenet import Mobilefacenet + +def mobilefacenet(): + return Mobilefacenet() + diff --git a/mobilefacenet.py b/mobilefacenet.py new file mode 100644 index 0000000..98666d6 --- /dev/null +++ b/mobilefacenet.py @@ -0,0 +1,75 @@ +# Copyright 2021 Zilliz. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# adapted from https://github.com/cunjian/pytorch_face_landmark +import os +import sys + +from typing import NamedTuple +from pathlib import Path +import numpy as np +import torch + +from torchvision import transforms + +from towhee.operator import NNOperator +from towhee.types.image_utils import to_pil +from towhee._types import Image + +#import mobilefacenet + +@register(output_schema=['landmark']) +class Mobilefacenet(NNOperator): + """ + Mobilefacenet + """ + + def __init__(self, framework: str = 'pytorch', pretrained = True): + super().__init__(framework=framework) + sys.path.append(str(Path(__file__).parent)) + from mobilefacenet_impl import MobileFaceNet + self.model = MobileFaceNet([112, 112], 136) + if pretrained == True: + map_location = 'cpu' + checkpoint = torch.load( + os.path.dirname(__file__) +'/mobilefacenet_model_best.pth', map_location=map_location) + self.model.load_state_dict(checkpoint['state_dict']) + + normalize = transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + self.tfms = transforms.Compose([transforms.Scale(112), + transforms.ToTensor(), + normalize]) + + @arg(1, to_image_color('RGB') ) + def __call__(self, image: Image): + image = to_pil(image) + h, w = image.size + tensor = self._preprocess(image) + if len(tensor.shape) == 3: + tensor = torch.unsqueeze(tensor, 0) + self.model.eval() + landmark = self.model(tensor)[0][0] + landmark = landmark.reshape(-1, 2) + landmark[:, 0] = landmark[:, 0] * w + landmark[:, 1] = landmark[:, 1] * h + return np.asarray(landmark.cpu().detach(), dtype=np.int32) + + def _preprocess(self, image): + return self.tfms(image) + + def _postprocess(self, landmark): + pass + + def train(self): + pass diff --git a/mobilefacenet_impl.py b/mobilefacenet_impl.py new file mode 100644 index 0000000..79cd3ee --- /dev/null +++ b/mobilefacenet_impl.py @@ -0,0 +1,154 @@ +import torch +from torch import nn + +################################## Original Arcface Model ############################################################# + +class Flatten(nn.Module): + def forward(self, input): + return input.view(input.size(0), -1) + +################################## MobileFaceNet ############################################################# + +class Conv_block(nn.Module): + def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1): + super(Conv_block, self).__init__() + self.conv = nn.Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False) + self.bn = nn.BatchNorm2d(out_c) + self.prelu = nn.PReLU(out_c) + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.prelu(x) + return x + +class Linear_block(nn.Module): + def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1): + super(Linear_block, self).__init__() + self.conv = nn.Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False) + self.bn = nn.BatchNorm2d(out_c) + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + return x + +class Depth_Wise(nn.Module): + def __init__(self, in_c, out_c, residual = False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1): + super(Depth_Wise, self).__init__() + self.conv = Conv_block(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1)) + self.conv_dw = Conv_block(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride) + self.project = Linear_block(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1)) + self.residual = residual + def forward(self, x): + if self.residual: + short_cut = x + x = self.conv(x) + x = self.conv_dw(x) + x = self.project(x) + if self.residual: + output = short_cut + x + else: + output = x + return output + +class Residual(nn.Module): + def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)): + super(Residual, self).__init__() + modules = [] + for _ in range(num_block): + modules.append(Depth_Wise(c, c, residual=True, kernel=kernel, padding=padding, stride=stride, groups=groups)) + self.model = nn.Sequential(*modules) + def forward(self, x): + return self.model(x) + +class GNAP(nn.Module): + def __init__(self, embedding_size): + super(GNAP, self).__init__() + assert embedding_size == 512 + self.bn1 = nn.BatchNorm2d(512, affine=False) + self.pool = nn.AdaptiveAvgPool2d((1, 1)) + + self.bn2 = nn.BatchNorm1d(512, affine=False) + + def forward(self, x): + x = self.bn1(x) + x_norm = torch.norm(x, 2, 1, True) + x_norm_mean = torch.mean(x_norm) + weight = x_norm_mean / x_norm + x = x * weight + x = self.pool(x) + x = x.view(x.shape[0], -1) + feature = self.bn2(x) + return feature + +class GDC(nn.Module): + def __init__(self, embedding_size): + super(GDC, self).__init__() + self.conv_6_dw = Linear_block(512, 512, groups=512, kernel=(7,7), stride=(1, 1), padding=(0, 0)) + self.conv_6_flatten = nn.Flatten() + self.linear = nn.Linear(512, embedding_size, bias=False) + #self.bn = BatchNorm1d(embedding_size, affine=False) + self.bn = nn.BatchNorm1d(embedding_size) + + def forward(self, x): + x = self.conv_6_dw(x) + x = self.conv_6_flatten(x) + x = self.linear(x) + x = self.bn(x) + return x + +class MobileFaceNet(nn.Module): + def __init__(self, input_size, embedding_size = 512, output_name = "GDC"): + super(MobileFaceNet, self).__init__() + assert output_name in ["GNAP", 'GDC'] + assert input_size[0] in [112] + self.conv1 = Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1)) + self.conv2_dw = Conv_block(64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64) + self.conv_23 = Depth_Wise(64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128) + self.conv_3 = Residual(64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) + self.conv_34 = Depth_Wise(64, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256) + self.conv_4 = Residual(128, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) + self.conv_45 = Depth_Wise(128, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512) + self.conv_5 = Residual(128, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) + self.conv_6_sep = Conv_block(128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0)) + if output_name == "GNAP": + self.output_layer = GNAP(512) + else: + self.output_layer = GDC(embedding_size) + + self._initialize_weights() + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + if m.bias is not None: + m.bias.data.zero_() + + + def forward(self, x): + out = self.conv1(x) + + out = self.conv2_dw(out) + + out = self.conv_23(out) + + out = self.conv_3(out) + + out = self.conv_34(out) + + out = self.conv_4(out) + + out = self.conv_45(out) + + out = self.conv_5(out) + + conv_features = self.conv_6_sep(out) + out = self.output_layer(conv_features) + return out, conv_features diff --git a/mobilefacenet_model_best.pth b/mobilefacenet_model_best.pth new file mode 100644 index 0000000..667632d --- /dev/null +++ b/mobilefacenet_model_best.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b994af026bfddbafc507a6f1c8737a9896bab20ed2b0cfb6ae90b81736970313 +size 12281146