diff --git a/README.md b/README.md
index 9ba730b..6ab3a16 100644
--- a/README.md
+++ b/README.md
@@ -6,46 +6,9 @@
 ## Desription
 
 The audio embedding operator converts an input audio into a dense vector which can be used to represent the audio clip's semantics.
+Each vector represents for an audio clip with a fixed length of around 2s.
 This operator is built on top of the original implementation of [CLMR](https://github.com/Spijkervet/CLMR).
-The [default model weight](./checkpoints/clmr_checkpoint_10000.pt) provided is pretrained on [Magnatagatune Dataset](https://paperswithcode.com/dataset/magnatagatune) with [SampleCNN](./models/sample_cnn.py).
-
-```python
-import numpy as np
-from towhee import ops
-
-audio_encoder = ops.audio_embedding.clmr()
-
-# Path or url as input
-audio_embedding = audio_encoder("/audio/path/or/url/")
-
-# Audio data as input
-audio_data = np.zeros((2, 441344))
-sample_rate = 44100
-audio_embedding = audio_encoder(audio_data, sample_rate)
-```
-
-## Factory Constructor
-
-Create the operator via the following factory method
-
-***ops.audio_embedding.clmr()***
-
-
-## Interface
-
-An audio embedding operator generates vectors in numpy.ndarray given an audio file path or audio data in numpy.ndarray.
-
-
-**Parameters:**
-
-​	None.
-
-
-**Returns**: *numpy.ndarray*
-
-​	Audio embeddings in shape (num_clips, 512).
-
-
+The [default model weight](clmr_checkpoint_10000.pt) provided is pretrained on [Magnatagatune Dataset](https://paperswithcode.com/dataset/magnatagatune) with [SampleCNN](sample_cnn.py).
 
 ## Code Example
 
@@ -57,9 +20,12 @@ Generate embeddings for the audio "test.wav".
 from towhee import dc
 
 dc.glob('test.wav')
+  .audio_decode()
+  .time_window(range=10)
   .audio_embedding.clmr()
   .show()
 ```
+    |  |
 
 *Write a same pipeline with explicit inputs/outputs name specifications:*
 
@@ -67,9 +33,41 @@ dc.glob('test.wav')
 from towhee import dc
 
 dc.glob['path']('test.wav')
-  .audio_embedding.clmr['path', 'vecs']()
+  .audio_decode['path', 'audio']()
+  .time_window['audio', 'frames'](range=10)
+  .audio_embedding.clmr['frames', 'vecs']()
   .select('vecs')
-  .show()
+  .to_vec()
 ```
 
+## Factory Constructor
+
+Create the operator via the following factory method
+
+***audio_embedding.clmr(framework="pytorch")***
+
+**Parameters:**
+
+​   *framework: str*
+
+​   The framework of model implementation.
+Default value is "pytorch" since the model is implemented in Pytorch.
+
+## Interface
+
+An audio embedding operator generates vectors in numpy.ndarray given an audio file path or a [towhee audio](link/to/AudioFrame/api/doc).
+
+**Parameters:**
+
+​   *Union[str, towhee.types.Audio]*
+
+​   The audio path or link in string.
+Or audio input data in towhee audio frames.
+The input data should represent for an audio longer than 2s.
+
+**Returns**:
+
+​   *numpy.ndarray*
 
+​   Audio embeddings in shape (num_clips, 512).
+Each embedding stands for features of an audio clip with length of 2s.
\ No newline at end of file
diff --git a/utils/checkpoint.py b/clmr_checkpoint.py
similarity index 100%
rename from utils/checkpoint.py
rename to clmr_checkpoint.py
diff --git a/checkpoints/clmr_checkpoint_10000.pt b/clmr_checkpoint_10000.pt
similarity index 100%
rename from checkpoints/clmr_checkpoint_10000.pt
rename to clmr_checkpoint_10000.pt
diff --git a/clmr_magnatagatune.py b/clmr_magnatagatune.py
index 7328bf9..b90c0e9 100644
--- a/clmr_magnatagatune.py
+++ b/clmr_magnatagatune.py
@@ -26,14 +26,13 @@ from towhee.operator import NNOperator
 from towhee import register
 
 sys.path.append(str(Path(__file__).parent))
-
-from utils.checkpoint import load_encoder_checkpoint
-from models.sample_cnn import SampleCNN
+from clmr_checkpoint import load_encoder_checkpoint
+from sample_cnn import SampleCNN
 
 log = logging.getLogger()
 
 
-@register(output_schema=['vec'])
+@register(output_schema=['vecs'])
 class ClmrMagnatagatune(NNOperator):
     """
     Pretrained clmr
@@ -44,7 +43,7 @@ class ClmrMagnatagatune(NNOperator):
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
 
         weight_path = os.path.join(str(Path(__file__).parent),
-                                   'checkpoints/clmr_checkpoint_10000.pt')
+                                   'clmr_checkpoint_10000.pt')
         state_dict = load_encoder_checkpoint(weight_path, 1)
         encoder = SampleCNN(strides=[3, 3, 3, 3, 3, 3, 3, 3, 3], supervised=False, out_dim=1)
         encoder.load_state_dict(state_dict)
@@ -86,11 +85,11 @@ class ClmrMagnatagatune(NNOperator):
 # if __name__ == "__main__":
 #     encoder = ClmrMagnatagatune()
 #
-#     audio_path = "/audio/path/or/link"
-#     vec = encoder(audio_path)
+#     # audio_path = "/audio/path/or/link"
+#     # vec = encoder(audio_path)
 #
-#     # audio_data = numpy.zeros((2, 441344))
-#     # sample_rate = 44100
-#     # vec = encoder(audio_data, sample_rate)
+#     audio_data = numpy.zeros((2, 441344))
+#     sample_rate = 44100
+#     vec = encoder(audio_data, sample_rate)
 #
 #     print(vec.shape)
diff --git a/models/model.py b/clmr_model.py
similarity index 100%
rename from models/model.py
rename to clmr_model.py
diff --git a/models/__init__.py b/models/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/models/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/requirements.txt b/requirements.txt
index 2af2d5f..16049ae 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
 torchaudio==0.9.0
 torch==1.9.0
-soundfile
 numpy
diff --git a/models/sample_cnn.py b/sample_cnn.py
similarity index 96%
rename from models/sample_cnn.py
rename to sample_cnn.py
index 7d619c9..355cebf 100644
--- a/models/sample_cnn.py
+++ b/sample_cnn.py
@@ -1,6 +1,5 @@
-import torch
-import torch.nn as nn
-from .model import Model
+from torch import nn
+from clmr_model import Model
 
 
 class SampleCNN(Model):
diff --git a/utils/__init__.py b/utils/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/utils/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-