diff --git a/README.md b/README.md index b496a0e..3718aab 100644 --- a/README.md +++ b/README.md @@ -69,16 +69,14 @@ Create the operator via the following factory method ## Interface -An image-text embedding operator takes a [towhee image](link/to/towhee/image/api/doc) as input and generate the correspoing caption. +An image captioning operator takes a [towhee image](link/to/towhee/image/api/doc) as input and generate the correspoing caption. **Parameters:** ​ ***data:*** *towhee.types.Image (a sub-class of numpy.ndarray)* -​ The image to generate embedding. - - +​ The image to generate caption. **Returns:** *str* diff --git a/clipcap.py b/clipcap.py index 7c7c5f9..268be67 100644 --- a/clipcap.py +++ b/clipcap.py @@ -33,7 +33,7 @@ class ClipCap(NNOperator): def __init__(self, model_name: str): super().__init__() sys.path.append(str(Path(__file__).parent)) - from models.clipcap import ClipCaptionModel, generate_beam + from clipcap_model.clipcap import ClipCaptionModel, generate_beam self.device = "cuda" if torch.cuda.is_available() else "cpu" self.generate_beam = generate_beam self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2") diff --git a/clipcap_model/__init__.py b/clipcap_model/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/clipcap.py b/clipcap_model/clipcap.py similarity index 100% rename from models/clipcap.py rename to clipcap_model/clipcap.py