Update

Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
4 years ago · bc098ab1aa
3 changed files with 57 additions and 57 deletions
--- a/README.md
+++ b/README.md
@ -6,25 +6,58 @@

 ## Desription

-An image embedding operator implemented with pretrained models provided by [Timm](https://github.com/rwightman/pytorch-image-models).
+An image embedding operator generates a vector given an image.
+This operator extracts features for image with pretrained models provided by [Timm](https://github.com/rwightman/pytorch-image-models).
+Timm is a deep-learning library developed by [Ross Wightman](https://twitter.com/wightmanr),
+which maintains SOTA deep-learning models and tools in computer vision.

+## Code Example
+
+Load an image from path './dog.jpg'
+and use the pretrained ResNet50 model ('resnet50') to generate an image embedding. 

+ *Write the pipeline in simplified style*:

 ```python
-from towhee import ops
-import numpy as np
+from towhee import dc

-img_encoder = ops.image_embedding.timm(model_name='resnet50')
-fake_img = np.zeros((256, 256, 3))
-image_embedding = img_encoder(fake_img)
+dc.glob('./dog.jpg') \
+  .image_decode.cv2() \
+  .image_embedding.timm(model_name='resnet50') \
+  .show()
 ```

+*Write a same pipeline with explicit inputs/outputs name specifications:*
+
+```python
+from towhee import dc
+
+dc.glob['path']('./dog.jpg') \
+  .image_decode.cv2['path', 'img']() \
+  .image_embedding.timm['img', 'vec'](model_name='resnet50') \
+  .select('vec') \
+  .to_list()
+```
+
+
 ## Factory Constructor

 Create the operator via the following factory method

-***ops.image_embedding.timm(model_name)***
+***image_embedding.timm(model_name='resnet34', num_classes=1000, skip_preprocess=False)***
+
+**Parameters:**
+
+	***model_name***: *str*
+
+	The model name in string.
+If no model name is given, it will use the default value "resnet34".
+Refer [Timm Docs](https://fastai.github.io/timmdocs/#List-Models-with-Pretrained-Weights) to get a full list of supported models.

+skip_preprocess (bool): 
+Flag to control whether to skip image preprocess, defaults to False.
+If set to True, image preprocess steps such as transform, normalization will be skipped.
+In this case, the user should guarantee that all the input images are already reprocessed properly, and thus can be fed to model directly.


 ## Interface
@ -47,32 +80,3 @@ It uses the pre-trained model specified by model name to generate an image embed



-## Code Example
-
-Load an image from path './dog.jpg'
-and use the pretrained ResNet50 model ('resnet50') to generate an image embedding. 
-
- *Write the pipeline in simplified style*:
-
-```python
-from towhee import dc
-
-dc.glob('./dog.jpg')
-  .image_decode()
-  .image_embedding.timm(model_name='resnet50')
-  .show()
-```
-
-*Write a same pipeline with explicit inputs/outputs name specifications:*
-
-```python
-from towhee import dc
-
-dc.glob['path']('./dog.jpg')
-  .image_decode['path', 'img']()
-  .image_embedding.timm['img', 'vec'](model_name='resnet50')
-  .select('vec')
-  .show()
-```
-
-
--- a/init.py
+++ b/init.py
@ -15,5 +15,5 @@
 from .timm_image import TimmImage


-def timm(model_name: str, num_classes: int=1000):
-    return TimmImage(model_name=model_name, num_classes=num_classes)
+def timm(model_name: str, num_classes: int = 1000, skip_preprocess: bool = False):
+    return TimmImage(model_name=model_name, num_classes=num_classes, skip_preprocess=skip_preprocess)
--- a/timm_image.py
+++ b/timm_image.py
@ -16,13 +16,14 @@ import logging
 import numpy

 from towhee.operator.base import NNOperator, OperatorFlag
+from towhee.types import Image as towheeImage
+from towhee.types.arg import arg, to_image_color
 from towhee import register

 import torch
 from torch import nn

 from PIL import Image as PILImage
-import cv2

 from timm.data.transforms_factory import create_transform
 from timm.data import resolve_data_config
@ -43,9 +44,11 @@ class TimmImage(NNOperator):
            Which model to use for the embeddings.
        num_classes (`int = 1000`):
            Number of classes for classification.
+        skip_preprocess (`bool = False`):
+            Whether skip image transforms.
    """

-    def __init__(self, model_name: str, num_classes: int = 1000) -> None:
+    def __init__(self, model_name: str, num_classes: int = 1000, skip_preprocess: bool = False) -> None:
        super().__init__()
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model = create_model(model_name, pretrained=True, num_classes=num_classes)
@ -53,20 +56,13 @@ class TimmImage(NNOperator):
        self.model.eval()
        config = resolve_data_config({}, model=self.model)
        self.tfms = create_transform(**config)
+        self.skip_tfms = skip_preprocess

-    def __call__(self, img: numpy.ndarray) -> numpy.ndarray:
-        if hasattr(img, 'mode'):
-            if img.mode not in ['RGB', 'BGR']:
-                log.error(f'Invalid image mode: expect "RGB" or "BGR" but receive "{img.mode}".')
-                raise AssertionError(f'Invalid image mode "{img.mode}".')
-            elif img.mode == 'BGR':
-                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-                log.warning('Converting image mode from "BGR" to "RGB" ...')
-        else:
-            log.warning(f'Image mode is not specified. Using "RGB" now.')
-
+    @arg(1, to_image_color('RGB'))
+    def __call__(self, img: towheeImage) -> numpy.ndarray:
        img = PILImage.fromarray(img.astype('uint8'), 'RGB')
-        img = self.tfms(img).unsqueeze(0)
+        if not self.skip_tfms:
+            img = self.tfms(img).unsqueeze(0)
        img = img.to(self.device)
        features = self.model.forward_features(img)
        if features.dim() == 4:
@ -79,13 +75,13 @@ class TimmImage(NNOperator):


 # if __name__ == '__main__':
-#     from towhee._types import Image
+#     from towhee import ops
 #
+#     path = '/image/path/or/link'
 #
-#     path = '/path/to/image'
-#     img = cv2.imread(path)
-#     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-#     img = Image(img)
+#     decoder = ops.image_decode.cv2()
+#     img = decoder(path)
 #
 #     op = TimmImage('resnet50')
 #     out = op(img)
+#     print(out)