mobilefacenet
copied
5 changed files with 335 additions and 1 deletions
@ -1,2 +1,85 @@ |
|||
# mobilefacenet |
|||
# Mobilefacenet Face Landmark Detecter |
|||
|
|||
*authors: David Wang* |
|||
|
|||
|
|||
## Desription |
|||
|
|||
A class of extremely efficient CNN models to extract 68 landmarks from a facial image[1]. |
|||
|
|||
|
|||
```python |
|||
from towhee import ops |
|||
|
|||
model = ops.face_landmark_detection.mobilefacenet() |
|||
landmark = model(img) |
|||
``` |
|||
|
|||
## Factory Constructor |
|||
|
|||
Create the operator via the following factory method |
|||
|
|||
***ops.face_landmark_detection.mobilefacenet()*** |
|||
|
|||
|
|||
|
|||
## Interface |
|||
|
|||
An image embedding operator takes an image as input. it extracts the embedding back to ndarray. |
|||
|
|||
**Args:** |
|||
|
|||
***framework*** |
|||
|
|||
the framework of the model |
|||
|
|||
supported types: `str`, default is 'pytorch' |
|||
|
|||
|
|||
***pretrained*** |
|||
|
|||
whether load the pretrained weights.. |
|||
|
|||
supported types: `bool`, default is True, using pretrained weights |
|||
|
|||
|
|||
**Parameters:** |
|||
|
|||
***image***: *towhee._types.Image* |
|||
|
|||
The input image. |
|||
|
|||
|
|||
**Returns:**: *numpy.ndarray* |
|||
|
|||
The extracted facial landmark. |
|||
|
|||
## Code Example |
|||
|
|||
extracted facial landmark from './img1.jpg'. |
|||
|
|||
*Write the pipeline in simplified style*: |
|||
|
|||
```python |
|||
import towhee.DataCollection as dc |
|||
|
|||
dc.glob('./img1.jpg') |
|||
.face_landmark_detection.mobilefacenet() |
|||
.to_list() |
|||
``` |
|||
|
|||
*Write a same pipeline with explicit inputs/outputs name specifications:* |
|||
|
|||
```python |
|||
import towhee.DataCollection as dc |
|||
|
|||
dc.glob['path']('./img1.jpg') |
|||
.image_decode.cv2['path', 'img']() |
|||
.face_landmark_detection.mobilefacenet() |
|||
.to_list() |
|||
``` |
|||
|
|||
|
|||
## Reference |
|||
|
|||
[1].https://arxiv.org/pdf/1804.07573.pdf |
|||
|
@ -0,0 +1,19 @@ |
|||
# Copyright 2021 Zilliz. All rights reserved. |
|||
# |
|||
# Licensed under the Apache License, Version 2.0 (the "License"); |
|||
# you may not use this file except in compliance with the License. |
|||
# You may obtain a copy of the License at |
|||
# |
|||
# http://www.apache.org/licenses/LICENSE-2.0 |
|||
# |
|||
# Unless required by applicable law or agreed to in writing, software |
|||
# distributed under the License is distributed on an "AS IS" BASIS, |
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
# See the License for the specific language governing permissions and |
|||
# limitations under the License. |
|||
|
|||
from .mobilefacenet import Mobilefacenet |
|||
|
|||
def mobilefacenet(): |
|||
return Mobilefacenet() |
|||
|
@ -0,0 +1,75 @@ |
|||
# Copyright 2021 Zilliz. All rights reserved. |
|||
# |
|||
# Licensed under the Apache License, Version 2.0 (the "License"); |
|||
# you may not use this file except in compliance with the License. |
|||
# You may obtain a copy of the License at |
|||
# |
|||
# http://www.apache.org/licenses/LICENSE-2.0 |
|||
# |
|||
# Unless required by applicable law or agreed to in writing, software |
|||
# distributed under the License is distributed on an "AS IS" BASIS, |
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
# See the License for the specific language governing permissions and |
|||
# limitations under the License. |
|||
# adapted from https://github.com/cunjian/pytorch_face_landmark |
|||
import os |
|||
import sys |
|||
|
|||
from typing import NamedTuple |
|||
from pathlib import Path |
|||
import numpy as np |
|||
import torch |
|||
|
|||
from torchvision import transforms |
|||
|
|||
from towhee.operator import NNOperator |
|||
from towhee.types.image_utils import to_pil |
|||
from towhee._types import Image |
|||
|
|||
#import mobilefacenet |
|||
|
|||
@register(output_schema=['landmark']) |
|||
class Mobilefacenet(NNOperator): |
|||
""" |
|||
Mobilefacenet |
|||
""" |
|||
|
|||
def __init__(self, framework: str = 'pytorch', pretrained = True): |
|||
super().__init__(framework=framework) |
|||
sys.path.append(str(Path(__file__).parent)) |
|||
from mobilefacenet_impl import MobileFaceNet |
|||
self.model = MobileFaceNet([112, 112], 136) |
|||
if pretrained == True: |
|||
map_location = 'cpu' |
|||
checkpoint = torch.load( |
|||
os.path.dirname(__file__) +'/mobilefacenet_model_best.pth', map_location=map_location) |
|||
self.model.load_state_dict(checkpoint['state_dict']) |
|||
|
|||
normalize = transforms.Normalize( |
|||
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) |
|||
self.tfms = transforms.Compose([transforms.Scale(112), |
|||
transforms.ToTensor(), |
|||
normalize]) |
|||
|
|||
@arg(1, to_image_color('RGB') ) |
|||
def __call__(self, image: Image): |
|||
image = to_pil(image) |
|||
h, w = image.size |
|||
tensor = self._preprocess(image) |
|||
if len(tensor.shape) == 3: |
|||
tensor = torch.unsqueeze(tensor, 0) |
|||
self.model.eval() |
|||
landmark = self.model(tensor)[0][0] |
|||
landmark = landmark.reshape(-1, 2) |
|||
landmark[:, 0] = landmark[:, 0] * w |
|||
landmark[:, 1] = landmark[:, 1] * h |
|||
return np.asarray(landmark.cpu().detach(), dtype=np.int32) |
|||
|
|||
def _preprocess(self, image): |
|||
return self.tfms(image) |
|||
|
|||
def _postprocess(self, landmark): |
|||
pass |
|||
|
|||
def train(self): |
|||
pass |
@ -0,0 +1,154 @@ |
|||
import torch |
|||
from torch import nn |
|||
|
|||
################################## Original Arcface Model ############################################################# |
|||
|
|||
class Flatten(nn.Module): |
|||
def forward(self, input): |
|||
return input.view(input.size(0), -1) |
|||
|
|||
################################## MobileFaceNet ############################################################# |
|||
|
|||
class Conv_block(nn.Module): |
|||
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1): |
|||
super(Conv_block, self).__init__() |
|||
self.conv = nn.Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False) |
|||
self.bn = nn.BatchNorm2d(out_c) |
|||
self.prelu = nn.PReLU(out_c) |
|||
def forward(self, x): |
|||
x = self.conv(x) |
|||
x = self.bn(x) |
|||
x = self.prelu(x) |
|||
return x |
|||
|
|||
class Linear_block(nn.Module): |
|||
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1): |
|||
super(Linear_block, self).__init__() |
|||
self.conv = nn.Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False) |
|||
self.bn = nn.BatchNorm2d(out_c) |
|||
def forward(self, x): |
|||
x = self.conv(x) |
|||
x = self.bn(x) |
|||
return x |
|||
|
|||
class Depth_Wise(nn.Module): |
|||
def __init__(self, in_c, out_c, residual = False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1): |
|||
super(Depth_Wise, self).__init__() |
|||
self.conv = Conv_block(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1)) |
|||
self.conv_dw = Conv_block(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride) |
|||
self.project = Linear_block(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1)) |
|||
self.residual = residual |
|||
def forward(self, x): |
|||
if self.residual: |
|||
short_cut = x |
|||
x = self.conv(x) |
|||
x = self.conv_dw(x) |
|||
x = self.project(x) |
|||
if self.residual: |
|||
output = short_cut + x |
|||
else: |
|||
output = x |
|||
return output |
|||
|
|||
class Residual(nn.Module): |
|||
def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)): |
|||
super(Residual, self).__init__() |
|||
modules = [] |
|||
for _ in range(num_block): |
|||
modules.append(Depth_Wise(c, c, residual=True, kernel=kernel, padding=padding, stride=stride, groups=groups)) |
|||
self.model = nn.Sequential(*modules) |
|||
def forward(self, x): |
|||
return self.model(x) |
|||
|
|||
class GNAP(nn.Module): |
|||
def __init__(self, embedding_size): |
|||
super(GNAP, self).__init__() |
|||
assert embedding_size == 512 |
|||
self.bn1 = nn.BatchNorm2d(512, affine=False) |
|||
self.pool = nn.AdaptiveAvgPool2d((1, 1)) |
|||
|
|||
self.bn2 = nn.BatchNorm1d(512, affine=False) |
|||
|
|||
def forward(self, x): |
|||
x = self.bn1(x) |
|||
x_norm = torch.norm(x, 2, 1, True) |
|||
x_norm_mean = torch.mean(x_norm) |
|||
weight = x_norm_mean / x_norm |
|||
x = x * weight |
|||
x = self.pool(x) |
|||
x = x.view(x.shape[0], -1) |
|||
feature = self.bn2(x) |
|||
return feature |
|||
|
|||
class GDC(nn.Module): |
|||
def __init__(self, embedding_size): |
|||
super(GDC, self).__init__() |
|||
self.conv_6_dw = Linear_block(512, 512, groups=512, kernel=(7,7), stride=(1, 1), padding=(0, 0)) |
|||
self.conv_6_flatten = nn.Flatten() |
|||
self.linear = nn.Linear(512, embedding_size, bias=False) |
|||
#self.bn = BatchNorm1d(embedding_size, affine=False) |
|||
self.bn = nn.BatchNorm1d(embedding_size) |
|||
|
|||
def forward(self, x): |
|||
x = self.conv_6_dw(x) |
|||
x = self.conv_6_flatten(x) |
|||
x = self.linear(x) |
|||
x = self.bn(x) |
|||
return x |
|||
|
|||
class MobileFaceNet(nn.Module): |
|||
def __init__(self, input_size, embedding_size = 512, output_name = "GDC"): |
|||
super(MobileFaceNet, self).__init__() |
|||
assert output_name in ["GNAP", 'GDC'] |
|||
assert input_size[0] in [112] |
|||
self.conv1 = Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1)) |
|||
self.conv2_dw = Conv_block(64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64) |
|||
self.conv_23 = Depth_Wise(64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128) |
|||
self.conv_3 = Residual(64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) |
|||
self.conv_34 = Depth_Wise(64, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256) |
|||
self.conv_4 = Residual(128, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) |
|||
self.conv_45 = Depth_Wise(128, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512) |
|||
self.conv_5 = Residual(128, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) |
|||
self.conv_6_sep = Conv_block(128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0)) |
|||
if output_name == "GNAP": |
|||
self.output_layer = GNAP(512) |
|||
else: |
|||
self.output_layer = GDC(embedding_size) |
|||
|
|||
self._initialize_weights() |
|||
|
|||
def _initialize_weights(self): |
|||
for m in self.modules(): |
|||
if isinstance(m, nn.Conv2d): |
|||
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') |
|||
if m.bias is not None: |
|||
m.bias.data.zero_() |
|||
elif isinstance(m, nn.BatchNorm2d): |
|||
m.weight.data.fill_(1) |
|||
m.bias.data.zero_() |
|||
elif isinstance(m, nn.Linear): |
|||
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') |
|||
if m.bias is not None: |
|||
m.bias.data.zero_() |
|||
|
|||
|
|||
def forward(self, x): |
|||
out = self.conv1(x) |
|||
|
|||
out = self.conv2_dw(out) |
|||
|
|||
out = self.conv_23(out) |
|||
|
|||
out = self.conv_3(out) |
|||
|
|||
out = self.conv_34(out) |
|||
|
|||
out = self.conv_4(out) |
|||
|
|||
out = self.conv_45(out) |
|||
|
|||
out = self.conv_5(out) |
|||
|
|||
conv_features = self.conv_6_sep(out) |
|||
out = self.output_layer(conv_features) |
|||
return out, conv_features |
Binary file not shown.
Loading…
Reference in new issue