faiss-index
copied
5 changed files with 157 additions and 1 deletions
@ -1,2 +1,91 @@ |
|||
# faiss |
|||
# Operator: ANN Search: Faiss |
|||
|
|||
*author: shiyu* |
|||
|
|||
<br /> |
|||
|
|||
|
|||
|
|||
## Desription |
|||
|
|||
Search embedding in [Faiss](https://github.com/facebookresearch/faiss), **please make sure you have inserted data to Faiss before search**. |
|||
|
|||
<br /> |
|||
|
|||
|
|||
|
|||
## Code Example |
|||
|
|||
- Insert data into Faiss first |
|||
|
|||
```python |
|||
import numpy as np |
|||
import towhee |
|||
|
|||
vec = np.random.random((10, 100)).astype('float32') |
|||
ids = list(i for i in range(10)) |
|||
|
|||
x = towhee.dc['id'](ids) \ |
|||
.runas_op['id', 'vec'](func=lambda x: vec[x]) \ |
|||
.to_faiss['id', 'vec'](findex='index.bin') |
|||
``` |
|||
|
|||
- Example |
|||
|
|||
*Write the pipeline in simplified style:* |
|||
|
|||
```python |
|||
query = vec[0:2] |
|||
towhee.dc(query) \ |
|||
.ann_search.faiss(findex='index.bin') |
|||
``` |
|||
|
|||
*Write a same pipeline with explicit inputs/outputs name specifications:* |
|||
|
|||
```python |
|||
query = vec[0:2] |
|||
towhee.dc['vec'](query) \ |
|||
.ann_search.faiss['vec', 'results'](findex='index.bin') \ |
|||
.show() |
|||
``` |
|||
|
|||
<img src="./result.png" height="100px"/> |
|||
|
|||
<br /> |
|||
|
|||
|
|||
|
|||
## Factory Constructor |
|||
|
|||
Create the operator via the following factory method: |
|||
|
|||
***ann-search.faiss(findex)*** |
|||
|
|||
|
|||
|
|||
**Parameters:** |
|||
|
|||
|
|||
|
|||
***findex:*** *str* or *faiss.INDEX* |
|||
|
|||
The path to faiss index file or faiss index. |
|||
|
|||
|
|||
<br /> |
|||
|
|||
|
|||
|
|||
## Interface |
|||
|
|||
**Parameters:** |
|||
|
|||
***query:*** *list* |
|||
|
|||
Query embeddings in Faiss |
|||
|
|||
|
|||
|
|||
**Returns:** *Entity* |
|||
|
|||
Return the results in Faiss with `key` and `score`. |
|||
|
@ -0,0 +1,4 @@ |
|||
from .faiss import Faiss |
|||
|
|||
def faiss(*args, **kwargs): |
|||
return Faiss(*args, **kwargs) |
@ -0,0 +1,60 @@ |
|||
import numpy as np |
|||
from pathlib import Path |
|||
import faiss |
|||
from towhee import register |
|||
from towhee.utils.faiss_utils import KVStorage |
|||
from towhee.functional.entity import Entity |
|||
|
|||
|
|||
@register(output_schema=['result']) |
|||
class Faiss: |
|||
""" |
|||
Search for embedding vectors in Faiss. Note that the index has data before searching, |
|||
refer to DataCollection Mixin `to_faiss`. |
|||
|
|||
Args: |
|||
findex (`str` or `faiss.INDEX`): |
|||
The path to faiss index file(defaults to './index.bin') or faiss index. |
|||
kwargs |
|||
The kwargs with index.search, refer to https://github.com/facebookresearch/faiss/wiki. And the parameter `k` defaults to 10. |
|||
|
|||
Examples: |
|||
|
|||
>>> import towhee |
|||
>>> res = ( |
|||
... towhee.glob['path']('./*.jpg') |
|||
... .image_decode['path', 'img']() |
|||
... .image_embedding.timm['img', 'vec'](model_name='resnet50') |
|||
... .faiss_search['vec', 'results'](findex='./faiss/faiss.index') |
|||
... .to_list() |
|||
... ) |
|||
[<Entity dict_keys(['path', 'img', 'vec', 'results'])>, |
|||
<Entity dict_keys(['path', 'img', 'vec', 'results'])>] |
|||
""" |
|||
def __init__(self, findex, **kwargs): |
|||
self.faiss_index = findex |
|||
self.kwargs = kwargs |
|||
self.kv_storage = None |
|||
if isinstance(findex, str): |
|||
kv_file = findex.strip('./').replace('.', '_kv.') |
|||
index_file = Path(findex) |
|||
self.faiss_index = faiss.read_index(str(index_file)) |
|||
if Path(kv_file).exists(): |
|||
self.kv_storage = KVStorage(kv_file) |
|||
|
|||
def __call__(self, query: list): |
|||
if 'k' not in self.kwargs: |
|||
self.kwargs['k'] = 10 |
|||
|
|||
query = np.array([query]) |
|||
scores, ids = self.faiss_index.search(query, **self.kwargs) |
|||
|
|||
ids = ids[0].tolist() |
|||
result = [] |
|||
for i in range(len(ids)): |
|||
if self.kv_storage is not None: |
|||
k = self.kv_storage.get(ids[i]) |
|||
else: |
|||
k = ids[i] |
|||
result.append(Entity(**{'key': k, 'score': scores[0][i]})) |
|||
return result |
@ -0,0 +1,3 @@ |
|||
faiss-cpu |
|||
numpy |
|||
towhee |
After Width: | Height: | Size: 56 KiB |
Loading…
Reference in new issue