faiss-index
copied
5 changed files with 157 additions and 1 deletions
@ -1,2 +1,91 @@ |
|||||
# faiss |
|
||||
|
# Operator: ANN Search: Faiss |
||||
|
|
||||
|
*author: shiyu* |
||||
|
|
||||
|
<br /> |
||||
|
|
||||
|
|
||||
|
|
||||
|
## Desription |
||||
|
|
||||
|
Search embedding in [Faiss](https://github.com/facebookresearch/faiss), **please make sure you have inserted data to Faiss before search**. |
||||
|
|
||||
|
<br /> |
||||
|
|
||||
|
|
||||
|
|
||||
|
## Code Example |
||||
|
|
||||
|
- Insert data into Faiss first |
||||
|
|
||||
|
```python |
||||
|
import numpy as np |
||||
|
import towhee |
||||
|
|
||||
|
vec = np.random.random((10, 100)).astype('float32') |
||||
|
ids = list(i for i in range(10)) |
||||
|
|
||||
|
x = towhee.dc['id'](ids) \ |
||||
|
.runas_op['id', 'vec'](func=lambda x: vec[x]) \ |
||||
|
.to_faiss['id', 'vec'](findex='index.bin') |
||||
|
``` |
||||
|
|
||||
|
- Example |
||||
|
|
||||
|
*Write the pipeline in simplified style:* |
||||
|
|
||||
|
```python |
||||
|
query = vec[0:2] |
||||
|
towhee.dc(query) \ |
||||
|
.ann_search.faiss(findex='index.bin') |
||||
|
``` |
||||
|
|
||||
|
*Write a same pipeline with explicit inputs/outputs name specifications:* |
||||
|
|
||||
|
```python |
||||
|
query = vec[0:2] |
||||
|
towhee.dc['vec'](query) \ |
||||
|
.ann_search.faiss['vec', 'results'](findex='index.bin') \ |
||||
|
.show() |
||||
|
``` |
||||
|
|
||||
|
<img src="./result.png" height="100px"/> |
||||
|
|
||||
|
<br /> |
||||
|
|
||||
|
|
||||
|
|
||||
|
## Factory Constructor |
||||
|
|
||||
|
Create the operator via the following factory method: |
||||
|
|
||||
|
***ann-search.faiss(findex)*** |
||||
|
|
||||
|
|
||||
|
|
||||
|
**Parameters:** |
||||
|
|
||||
|
|
||||
|
|
||||
|
***findex:*** *str* or *faiss.INDEX* |
||||
|
|
||||
|
The path to faiss index file or faiss index. |
||||
|
|
||||
|
|
||||
|
<br /> |
||||
|
|
||||
|
|
||||
|
|
||||
|
## Interface |
||||
|
|
||||
|
**Parameters:** |
||||
|
|
||||
|
***query:*** *list* |
||||
|
|
||||
|
Query embeddings in Faiss |
||||
|
|
||||
|
|
||||
|
|
||||
|
**Returns:** *Entity* |
||||
|
|
||||
|
Return the results in Faiss with `key` and `score`. |
||||
|
@ -0,0 +1,4 @@ |
|||||
|
from .faiss import Faiss |
||||
|
|
||||
|
def faiss(*args, **kwargs): |
||||
|
return Faiss(*args, **kwargs) |
@ -0,0 +1,60 @@ |
|||||
|
import numpy as np |
||||
|
from pathlib import Path |
||||
|
import faiss |
||||
|
from towhee import register |
||||
|
from towhee.utils.faiss_utils import KVStorage |
||||
|
from towhee.functional.entity import Entity |
||||
|
|
||||
|
|
||||
|
@register(output_schema=['result']) |
||||
|
class Faiss: |
||||
|
""" |
||||
|
Search for embedding vectors in Faiss. Note that the index has data before searching, |
||||
|
refer to DataCollection Mixin `to_faiss`. |
||||
|
|
||||
|
Args: |
||||
|
findex (`str` or `faiss.INDEX`): |
||||
|
The path to faiss index file(defaults to './index.bin') or faiss index. |
||||
|
kwargs |
||||
|
The kwargs with index.search, refer to https://github.com/facebookresearch/faiss/wiki. And the parameter `k` defaults to 10. |
||||
|
|
||||
|
Examples: |
||||
|
|
||||
|
>>> import towhee |
||||
|
>>> res = ( |
||||
|
... towhee.glob['path']('./*.jpg') |
||||
|
... .image_decode['path', 'img']() |
||||
|
... .image_embedding.timm['img', 'vec'](model_name='resnet50') |
||||
|
... .faiss_search['vec', 'results'](findex='./faiss/faiss.index') |
||||
|
... .to_list() |
||||
|
... ) |
||||
|
[<Entity dict_keys(['path', 'img', 'vec', 'results'])>, |
||||
|
<Entity dict_keys(['path', 'img', 'vec', 'results'])>] |
||||
|
""" |
||||
|
def __init__(self, findex, **kwargs): |
||||
|
self.faiss_index = findex |
||||
|
self.kwargs = kwargs |
||||
|
self.kv_storage = None |
||||
|
if isinstance(findex, str): |
||||
|
kv_file = findex.strip('./').replace('.', '_kv.') |
||||
|
index_file = Path(findex) |
||||
|
self.faiss_index = faiss.read_index(str(index_file)) |
||||
|
if Path(kv_file).exists(): |
||||
|
self.kv_storage = KVStorage(kv_file) |
||||
|
|
||||
|
def __call__(self, query: list): |
||||
|
if 'k' not in self.kwargs: |
||||
|
self.kwargs['k'] = 10 |
||||
|
|
||||
|
query = np.array([query]) |
||||
|
scores, ids = self.faiss_index.search(query, **self.kwargs) |
||||
|
|
||||
|
ids = ids[0].tolist() |
||||
|
result = [] |
||||
|
for i in range(len(ids)): |
||||
|
if self.kv_storage is not None: |
||||
|
k = self.kv_storage.get(ids[i]) |
||||
|
else: |
||||
|
k = ids[i] |
||||
|
result.append(Entity(**{'key': k, 'score': scores[0][i]})) |
||||
|
return result |
@ -0,0 +1,3 @@ |
|||||
|
faiss-cpu |
||||
|
numpy |
||||
|
towhee |
After Width: | Height: | Size: 56 KiB |
Loading…
Reference in new issue