distill-and-select/distill_and_select.py


								# Copyright 2021 Zilliz. All rights reserved.

								#

								# Licensed under the Apache License, Version 2.0 (the "License");

								# you may not use this file except in compliance with the License.

								# You may obtain a copy of the License at

								#

								#     http://www.apache.org/licenses/LICENSE-2.0

								#

								# Unless required by applicable law or agreed to in writing, software

								# distributed under the License is distributed on an "AS IS" BASIS,

								# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

								# See the License for the specific language governing permissions and

								# limitations under the License.

								import torch


								from typing import Union, Any

								from torchvision import transforms

								from towhee.operator.base import NNOperator

								from towhee import register

								from PIL import Image as PILImage

								from .model.feature_extractor import FeatureExtractor

								from .model.students import FineGrainedStudent, CoarseGrainedStudent

								from .model.selector import SelectorNetwork

								from torch import nn


								@register(output_schema=['vec'])

								class DistillAndSelect(NNOperator):

								    """

								    DistillAndSelect

								    """


								    def __init__(self, model_name: str, model_weight_path: str = None,

								                 feature_extractor: Union[str, nn.Module] = 'default', device: str = None):

								        """


								        Args:

								            model_name (`str`):

								                Can be one of them:

								                `feature_extractor`: Feature Extractor only,

								                `fg_att_student`: Fine Grained Student with attention,

								                `fg_bin_student`: Fine Grained Student with binarization,

								                `cg_student`: Coarse Grained Student,

								                `selector_att`: Selector Network with attention,

								                `selector_bin`: Selector Network with binarization.

								            model_weight_path (`str`):

								                Default is None, download use the original pretrained weights.

								            feature_extractor (`Union[str, nn.Module]`):

								                `None`, 'default' or a pytorch nn.Module instance.

								                `None` means this operator don't support feature extracting from the video data and this operator process embedding feature as input.

								                'default' means using the original pretrained feature extracting weights and this operator can process video data as input.

								                Or you can pass in a nn.Module instance as a specific feature extractor.

								                Default is `default`.

								            device (`str`):

								                Model device, cpu or cuda.

								        """

								        super().__init__()

								        assert model_name in ['feature_extractor', 'fg_att_student', 'fg_bin_student', 'cg_student', 'selector_att',

								                              'selector_bin'], 'unsupported model.'

								        if device is None:

								            self.device = "cuda" if torch.cuda.is_available() else "cpu"

								        else:

								            self.device = device


								        self.model_name = model_name


								        self.feature_extractor = None

								        if feature_extractor == 'default':

								            self.feature_extractor = FeatureExtractor(dims=512).to(device).eval()

								        elif isinstance(feature_extractor, nn.Module):

								            self.feature_extractor = feature_extractor


								        self.model = None

								        pretraind = True if model_weight_path is None else None

								        if self.model_name == 'fg_att_student':

								            self.model = FineGrainedStudent(pretrained=pretraind, attention=True)

								        elif self.model_name == 'fg_bin_student':

								            self.model = FineGrainedStudent(pretrained=pretraind, binarization=True)


								        elif self.model_name == 'cg_student':

								            self.model = CoarseGrainedStudent(pretrained=pretraind)


								        elif self.model_name == 'selector_att':

								            self.model = SelectorNetwork(pretrained=pretraind, attention=True)

								        elif self.model_name == 'selector_bin':

								            self.model = SelectorNetwork(pretrained=pretraind, binarization=True)


								        if model_weight_path is not None:

								            self.model.load_state_dict(torch.load(model_weight_path))


								        if self.model is not None:

								            self.model.to(device).eval()


								        self.tfms = transforms.Compose([

								            transforms.Resize(256, interpolation=transforms.InterpolationMode.BICUBIC),

								            transforms.CenterCrop(256),

								            transforms.ToTensor(),

								        ])


								    def __call__(self, data: Any):  # List[VideoFrame] when self.feature_extractor is not None

								        if self.feature_extractor is not None:

								            pil_img_list = []

								            for img in data:

								                pil_img = PILImage.fromarray(img, img.mode)

								                tfmed_img = self.tfms(pil_img).permute(1, 2, 0).unsqueeze(0)

								                pil_img_list.append(tfmed_img)

								            data = torch.concat(pil_img_list, dim=0) * 255

								            data = self.feature_extractor(data.to(self.device)).to(self.device)

								            if self.model_name == 'feature_extractor':

								                return data.cpu().detach().squeeze().numpy()

								        index_feature = self.model.index_video(data)

								        return index_feature.cpu().detach().squeeze().numpy()