mobilefacenet
copied
5 changed files with 335 additions and 1 deletions
@ -1,2 +1,85 @@ |
|||||
# mobilefacenet |
|
||||
|
# Mobilefacenet Face Landmark Detecter |
||||
|
|
||||
|
*authors: David Wang* |
||||
|
|
||||
|
|
||||
|
## Desription |
||||
|
|
||||
|
A class of extremely efficient CNN models to extract 68 landmarks from a facial image[1]. |
||||
|
|
||||
|
|
||||
|
```python |
||||
|
from towhee import ops |
||||
|
|
||||
|
model = ops.face_landmark_detection.mobilefacenet() |
||||
|
landmark = model(img) |
||||
|
``` |
||||
|
|
||||
|
## Factory Constructor |
||||
|
|
||||
|
Create the operator via the following factory method |
||||
|
|
||||
|
***ops.face_landmark_detection.mobilefacenet()*** |
||||
|
|
||||
|
|
||||
|
|
||||
|
## Interface |
||||
|
|
||||
|
An image embedding operator takes an image as input. it extracts the embedding back to ndarray. |
||||
|
|
||||
|
**Args:** |
||||
|
|
||||
|
***framework*** |
||||
|
|
||||
|
the framework of the model |
||||
|
|
||||
|
supported types: `str`, default is 'pytorch' |
||||
|
|
||||
|
|
||||
|
***pretrained*** |
||||
|
|
||||
|
whether load the pretrained weights.. |
||||
|
|
||||
|
supported types: `bool`, default is True, using pretrained weights |
||||
|
|
||||
|
|
||||
|
**Parameters:** |
||||
|
|
||||
|
***image***: *towhee._types.Image* |
||||
|
|
||||
|
The input image. |
||||
|
|
||||
|
|
||||
|
**Returns:**: *numpy.ndarray* |
||||
|
|
||||
|
The extracted facial landmark. |
||||
|
|
||||
|
## Code Example |
||||
|
|
||||
|
extracted facial landmark from './img1.jpg'. |
||||
|
|
||||
|
*Write the pipeline in simplified style*: |
||||
|
|
||||
|
```python |
||||
|
import towhee.DataCollection as dc |
||||
|
|
||||
|
dc.glob('./img1.jpg') |
||||
|
.face_landmark_detection.mobilefacenet() |
||||
|
.to_list() |
||||
|
``` |
||||
|
|
||||
|
*Write a same pipeline with explicit inputs/outputs name specifications:* |
||||
|
|
||||
|
```python |
||||
|
import towhee.DataCollection as dc |
||||
|
|
||||
|
dc.glob['path']('./img1.jpg') |
||||
|
.image_decode.cv2['path', 'img']() |
||||
|
.face_landmark_detection.mobilefacenet() |
||||
|
.to_list() |
||||
|
``` |
||||
|
|
||||
|
|
||||
|
## Reference |
||||
|
|
||||
|
[1].https://arxiv.org/pdf/1804.07573.pdf |
||||
|
@ -0,0 +1,19 @@ |
|||||
|
# Copyright 2021 Zilliz. All rights reserved. |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
from .mobilefacenet import Mobilefacenet |
||||
|
|
||||
|
def mobilefacenet(): |
||||
|
return Mobilefacenet() |
||||
|
|
@ -0,0 +1,75 @@ |
|||||
|
# Copyright 2021 Zilliz. All rights reserved. |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
# adapted from https://github.com/cunjian/pytorch_face_landmark |
||||
|
import os |
||||
|
import sys |
||||
|
|
||||
|
from typing import NamedTuple |
||||
|
from pathlib import Path |
||||
|
import numpy as np |
||||
|
import torch |
||||
|
|
||||
|
from torchvision import transforms |
||||
|
|
||||
|
from towhee.operator import NNOperator |
||||
|
from towhee.types.image_utils import to_pil |
||||
|
from towhee._types import Image |
||||
|
|
||||
|
#import mobilefacenet |
||||
|
|
||||
|
@register(output_schema=['landmark']) |
||||
|
class Mobilefacenet(NNOperator): |
||||
|
""" |
||||
|
Mobilefacenet |
||||
|
""" |
||||
|
|
||||
|
def __init__(self, framework: str = 'pytorch', pretrained = True): |
||||
|
super().__init__(framework=framework) |
||||
|
sys.path.append(str(Path(__file__).parent)) |
||||
|
from mobilefacenet_impl import MobileFaceNet |
||||
|
self.model = MobileFaceNet([112, 112], 136) |
||||
|
if pretrained == True: |
||||
|
map_location = 'cpu' |
||||
|
checkpoint = torch.load( |
||||
|
os.path.dirname(__file__) +'/mobilefacenet_model_best.pth', map_location=map_location) |
||||
|
self.model.load_state_dict(checkpoint['state_dict']) |
||||
|
|
||||
|
normalize = transforms.Normalize( |
||||
|
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) |
||||
|
self.tfms = transforms.Compose([transforms.Scale(112), |
||||
|
transforms.ToTensor(), |
||||
|
normalize]) |
||||
|
|
||||
|
@arg(1, to_image_color('RGB') ) |
||||
|
def __call__(self, image: Image): |
||||
|
image = to_pil(image) |
||||
|
h, w = image.size |
||||
|
tensor = self._preprocess(image) |
||||
|
if len(tensor.shape) == 3: |
||||
|
tensor = torch.unsqueeze(tensor, 0) |
||||
|
self.model.eval() |
||||
|
landmark = self.model(tensor)[0][0] |
||||
|
landmark = landmark.reshape(-1, 2) |
||||
|
landmark[:, 0] = landmark[:, 0] * w |
||||
|
landmark[:, 1] = landmark[:, 1] * h |
||||
|
return np.asarray(landmark.cpu().detach(), dtype=np.int32) |
||||
|
|
||||
|
def _preprocess(self, image): |
||||
|
return self.tfms(image) |
||||
|
|
||||
|
def _postprocess(self, landmark): |
||||
|
pass |
||||
|
|
||||
|
def train(self): |
||||
|
pass |
@ -0,0 +1,154 @@ |
|||||
|
import torch |
||||
|
from torch import nn |
||||
|
|
||||
|
################################## Original Arcface Model ############################################################# |
||||
|
|
||||
|
class Flatten(nn.Module): |
||||
|
def forward(self, input): |
||||
|
return input.view(input.size(0), -1) |
||||
|
|
||||
|
################################## MobileFaceNet ############################################################# |
||||
|
|
||||
|
class Conv_block(nn.Module): |
||||
|
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1): |
||||
|
super(Conv_block, self).__init__() |
||||
|
self.conv = nn.Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False) |
||||
|
self.bn = nn.BatchNorm2d(out_c) |
||||
|
self.prelu = nn.PReLU(out_c) |
||||
|
def forward(self, x): |
||||
|
x = self.conv(x) |
||||
|
x = self.bn(x) |
||||
|
x = self.prelu(x) |
||||
|
return x |
||||
|
|
||||
|
class Linear_block(nn.Module): |
||||
|
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1): |
||||
|
super(Linear_block, self).__init__() |
||||
|
self.conv = nn.Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False) |
||||
|
self.bn = nn.BatchNorm2d(out_c) |
||||
|
def forward(self, x): |
||||
|
x = self.conv(x) |
||||
|
x = self.bn(x) |
||||
|
return x |
||||
|
|
||||
|
class Depth_Wise(nn.Module): |
||||
|
def __init__(self, in_c, out_c, residual = False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1): |
||||
|
super(Depth_Wise, self).__init__() |
||||
|
self.conv = Conv_block(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1)) |
||||
|
self.conv_dw = Conv_block(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride) |
||||
|
self.project = Linear_block(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1)) |
||||
|
self.residual = residual |
||||
|
def forward(self, x): |
||||
|
if self.residual: |
||||
|
short_cut = x |
||||
|
x = self.conv(x) |
||||
|
x = self.conv_dw(x) |
||||
|
x = self.project(x) |
||||
|
if self.residual: |
||||
|
output = short_cut + x |
||||
|
else: |
||||
|
output = x |
||||
|
return output |
||||
|
|
||||
|
class Residual(nn.Module): |
||||
|
def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)): |
||||
|
super(Residual, self).__init__() |
||||
|
modules = [] |
||||
|
for _ in range(num_block): |
||||
|
modules.append(Depth_Wise(c, c, residual=True, kernel=kernel, padding=padding, stride=stride, groups=groups)) |
||||
|
self.model = nn.Sequential(*modules) |
||||
|
def forward(self, x): |
||||
|
return self.model(x) |
||||
|
|
||||
|
class GNAP(nn.Module): |
||||
|
def __init__(self, embedding_size): |
||||
|
super(GNAP, self).__init__() |
||||
|
assert embedding_size == 512 |
||||
|
self.bn1 = nn.BatchNorm2d(512, affine=False) |
||||
|
self.pool = nn.AdaptiveAvgPool2d((1, 1)) |
||||
|
|
||||
|
self.bn2 = nn.BatchNorm1d(512, affine=False) |
||||
|
|
||||
|
def forward(self, x): |
||||
|
x = self.bn1(x) |
||||
|
x_norm = torch.norm(x, 2, 1, True) |
||||
|
x_norm_mean = torch.mean(x_norm) |
||||
|
weight = x_norm_mean / x_norm |
||||
|
x = x * weight |
||||
|
x = self.pool(x) |
||||
|
x = x.view(x.shape[0], -1) |
||||
|
feature = self.bn2(x) |
||||
|
return feature |
||||
|
|
||||
|
class GDC(nn.Module): |
||||
|
def __init__(self, embedding_size): |
||||
|
super(GDC, self).__init__() |
||||
|
self.conv_6_dw = Linear_block(512, 512, groups=512, kernel=(7,7), stride=(1, 1), padding=(0, 0)) |
||||
|
self.conv_6_flatten = nn.Flatten() |
||||
|
self.linear = nn.Linear(512, embedding_size, bias=False) |
||||
|
#self.bn = BatchNorm1d(embedding_size, affine=False) |
||||
|
self.bn = nn.BatchNorm1d(embedding_size) |
||||
|
|
||||
|
def forward(self, x): |
||||
|
x = self.conv_6_dw(x) |
||||
|
x = self.conv_6_flatten(x) |
||||
|
x = self.linear(x) |
||||
|
x = self.bn(x) |
||||
|
return x |
||||
|
|
||||
|
class MobileFaceNet(nn.Module): |
||||
|
def __init__(self, input_size, embedding_size = 512, output_name = "GDC"): |
||||
|
super(MobileFaceNet, self).__init__() |
||||
|
assert output_name in ["GNAP", 'GDC'] |
||||
|
assert input_size[0] in [112] |
||||
|
self.conv1 = Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1)) |
||||
|
self.conv2_dw = Conv_block(64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64) |
||||
|
self.conv_23 = Depth_Wise(64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128) |
||||
|
self.conv_3 = Residual(64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) |
||||
|
self.conv_34 = Depth_Wise(64, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256) |
||||
|
self.conv_4 = Residual(128, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) |
||||
|
self.conv_45 = Depth_Wise(128, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512) |
||||
|
self.conv_5 = Residual(128, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) |
||||
|
self.conv_6_sep = Conv_block(128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0)) |
||||
|
if output_name == "GNAP": |
||||
|
self.output_layer = GNAP(512) |
||||
|
else: |
||||
|
self.output_layer = GDC(embedding_size) |
||||
|
|
||||
|
self._initialize_weights() |
||||
|
|
||||
|
def _initialize_weights(self): |
||||
|
for m in self.modules(): |
||||
|
if isinstance(m, nn.Conv2d): |
||||
|
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') |
||||
|
if m.bias is not None: |
||||
|
m.bias.data.zero_() |
||||
|
elif isinstance(m, nn.BatchNorm2d): |
||||
|
m.weight.data.fill_(1) |
||||
|
m.bias.data.zero_() |
||||
|
elif isinstance(m, nn.Linear): |
||||
|
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') |
||||
|
if m.bias is not None: |
||||
|
m.bias.data.zero_() |
||||
|
|
||||
|
|
||||
|
def forward(self, x): |
||||
|
out = self.conv1(x) |
||||
|
|
||||
|
out = self.conv2_dw(out) |
||||
|
|
||||
|
out = self.conv_23(out) |
||||
|
|
||||
|
out = self.conv_3(out) |
||||
|
|
||||
|
out = self.conv_34(out) |
||||
|
|
||||
|
out = self.conv_4(out) |
||||
|
|
||||
|
out = self.conv_45(out) |
||||
|
|
||||
|
out = self.conv_5(out) |
||||
|
|
||||
|
conv_features = self.conv_6_sep(out) |
||||
|
out = self.output_layer(conv_features) |
||||
|
return out, conv_features |
Binary file not shown.
Loading…
Reference in new issue