From 5b1a1a02e90ff8cafeb87404dbb7431dd5f192c9 Mon Sep 17 00:00:00 2001
From: wxywb <xy.wang@zilliz.com>
Date: Fri, 22 Jul 2022 15:00:48 +0800
Subject: [PATCH] slip operator update.

Signed-off-by: wxywb <xy.wang@zilliz.com>
---
 __init__.py                  |  6 ++--
 bpe_simple_vocab_16e6.txt.gz |  3 ++
 models.py                    | 36 ++++++++++-----------
 slip.py                      | 63 +++++++++++++++++++++++++++++-------
 utils.py                     |  8 -----
 5 files changed, 75 insertions(+), 41 deletions(-)
 create mode 100644 bpe_simple_vocab_16e6.txt.gz
 delete mode 100644 utils.py

diff --git a/__init__.py b/__init__.py
index 3a4024d..cb0ca27 100644
--- a/__init__.py
+++ b/__init__.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .blip import Blip
+from .slip import Slip
 
 
-def blip(model_name: str, modality: str):
-    return Blip(model_name, modality)
+def slip(model_name: str, modality: str):
+    return Slip(model_name, modality)
diff --git a/bpe_simple_vocab_16e6.txt.gz b/bpe_simple_vocab_16e6.txt.gz
new file mode 100644
index 0000000..36a1585
--- /dev/null
+++ b/bpe_simple_vocab_16e6.txt.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:924691ac288e54409236115652ad4aa250f48203de50a9e4722a6ecd48d6804a
+size 1356917
diff --git a/models.py b/models.py
index a082384..6bed2bb 100644
--- a/models.py
+++ b/models.py
@@ -12,7 +12,7 @@ import timm
 import torch
 from torch import nn
 
-import losses
+#import losses
 
 
 class LayerNorm(nn.LayerNorm):
@@ -235,23 +235,23 @@ class SLIP(CLIP):
                 'aug2_embed': aug2_embed}
 
 
-def get_loss(model, ssl_temp, ssl_scale):
-    if model.startswith('SLIP'):
-        ssl_loss = losses.SIMCLRLoss(temperature=ssl_temp)
-        return losses.SLIPLoss(ssl_loss, ssl_scale)
-    if model.startswith('CLIP'):
-        return losses.CLIPLoss()
-    if model.startswith('SIMCLR'):
-        return losses.SIMCLRLoss(temperature=ssl_temp)
-
-
-def get_metric_names(model):
-    if model.startswith('SLIP'):
-        return ['loss', 'clip_loss', 'ssl_loss', 'clip_acc', 'ssl_acc']
-    elif model.startswith('CLIP'):
-        return ['loss', 'clip_loss', 'clip_acc']
-    else:
-        return ['loss', 'ssl_loss', 'ssl_acc']
+#def get_loss(model, ssl_temp, ssl_scale):
+#    if model.startswith('SLIP'):
+#        ssl_loss = losses.SIMCLRLoss(temperature=ssl_temp)
+#        return losses.SLIPLoss(ssl_loss, ssl_scale)
+#    if model.startswith('CLIP'):
+#        return losses.CLIPLoss()
+#    if model.startswith('SIMCLR'):
+#        return losses.SIMCLRLoss(temperature=ssl_temp)
+#
+#
+#def get_metric_names(model):
+#    if model.startswith('SLIP'):
+#        return ['loss', 'clip_loss', 'ssl_loss', 'clip_acc', 'ssl_acc']
+#    elif model.startswith('CLIP'):
+#        return ['loss', 'clip_loss', 'clip_acc']
+#    else:
+#        return ['loss', 'ssl_loss', 'ssl_acc']
 
 
 @timm.models.registry.register_model
diff --git a/slip.py b/slip.py
index 11111ba..a0ce924 100644
--- a/slip.py
+++ b/slip.py
@@ -13,18 +13,20 @@
 # limitations under the License.
 
 import sys
+import os
 from pathlib import Path
+from urllib.parse import urlparse
+from collections import OrderedDict
 
 import torch
 from torchvision import transforms
+from timm.models.hub import download_cached_file
 
 from towhee import register
 from towhee.operator.base import NNOperator, OperatorFlag
 from towhee.types.arg import arg, to_image_color
 from towhee.types.image_utils import from_pil, to_pil
 
-from tokenizer import SimpleTokenizer
-
 def get_model(model):
     if isinstance(model, torch.nn.DataParallel) \
       or isinstance(model, torch.nn.parallel.DistributedDataParallel):
@@ -32,16 +34,52 @@ def get_model(model):
     else:
         return model
 
+def is_url(url_or_filename):
+    parsed = urlparse(url_or_filename)
+    return parsed.scheme in ("http", "https")
+
+def load_checkpoint(url_or_filename, models, device):
+    if is_url(url_or_filename):
+        cached_file = download_cached_file(url_or_filename, check_hash=False, progress=True)
+        checkpoint = torch.load(cached_file, map_location='cpu')
+    elif os.path.isfile(url_or_filename):
+        checkpoint = torch.load(url_or_filename, map_location='cpu')
+    else:
+        raise RuntimeError('checkpoint url or path is invalid')
+
+    if is_url(url_or_filename):
+        cached_file = download_cached_file(url_or_filename, check_hash=False, progress=True)
+        checkpoint = torch.load(cached_file, map_location='cpu')
+    elif os.path.isfile(url_or_filename):
+        checkpoint = torch.load(url_or_filename, map_location='cpu')
+    else:
+        raise RuntimeError('checkpoint url or path is invalid')
+
+    state_dict = OrderedDict()
+    for k, v in checkpoint['state_dict'].items():
+        state_dict[k.replace('module.', '')] = v
+    old_args = checkpoint['args']
+
+    model = getattr(models, old_args.model)(rand_embed=False,
+        ssl_mlp_dim=old_args.ssl_mlp_dim, ssl_emb_dim=old_args.ssl_emb_dim)
+    model.to(device)
+    model.load_state_dict(state_dict, strict=True)
+    return model
+
 @register(output_schema=['vec'])
-class Slip(NNOperator)
+class Slip(NNOperator):
     """
     SLIP multi-modal embedding operator
     """
     def __init__(self, model_name: str, modality: str):
         super().__init__()
         sys.path.append(str(Path(__file__).parent))
+        import models
+        from tokenizer import SimpleTokenizer
         self.tokenizer = SimpleTokenizer()
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self._modality = modality
+        self.model = load_checkpoint(self._configs()[model_name]['weights'], models, self.device) 
         self.model.to(self.device)
         self.model.eval()
 
@@ -57,17 +95,18 @@ class Slip(NNOperator)
     def __call__(self, data):
         if self._modality == 'image':
              vec = self._inference_from_image(data)
-         elif self._modality == 'text':
-             vec = self._inference_from_text(data)
-         else:
-             raise ValueError("modality[{}] not implemented.".format(self._modality))
-         return vec.detach().cpu().numpy().flatten()
+        elif self._modality == 'text':
+            vec = self._inference_from_text(data)
+        else:
+            raise ValueError("modality[{}] not implemented.".format(self._modality))
+        vec = vec / vec.norm(dim=-1, keepdim=True)
+        return vec.detach().cpu().numpy().flatten()
 
     def _inference_from_text(self, text):
-        texts = tokenizer(texts).cuda(non_blocking=True)
-        texts = texts.view(-1, 77).contiguous()
-        embedding = get_model(self.model).encode_text(texts)
-        embedding = embedding / embedding.norm(dim=-1, keepdim=True)
+        text = self.tokenizer(text).to(self.device)
+        text = text.view(-1, 77).contiguous()
+        embedding = get_model(self.model).encode_text(text)
+        return embedding
 
     @arg(1, to_image_color('RGB'))
     def _inference_from_image(self, img):
diff --git a/utils.py b/utils.py
deleted file mode 100644
index 2ffd522..0000000
--- a/utils.py
+++ /dev/null
@@ -1,8 +0,0 @@
-import torch
-
-def get_model(model):
-    if isinstance(model, torch.nn.DataParallel) \
-      or isinstance(model, torch.nn.parallel.DistributedDataParallel):
-        return model.module
-    else:
-        return model