faiss-index
              
                
                
            
          copied
				 5 changed files with 157 additions and 1 deletions
			
			
		@ -1,2 +1,91 @@ | 
				
			|||
# faiss | 
				
			|||
# Operator: ANN Search: Faiss | 
				
			|||
 | 
				
			|||
*author: shiyu* | 
				
			|||
 | 
				
			|||
<br /> | 
				
			|||
 | 
				
			|||
 | 
				
			|||
 | 
				
			|||
## Desription | 
				
			|||
 | 
				
			|||
Search embedding in [Faiss](https://github.com/facebookresearch/faiss), **please make sure you have inserted data to Faiss before search**. | 
				
			|||
 | 
				
			|||
<br /> | 
				
			|||
 | 
				
			|||
 | 
				
			|||
 | 
				
			|||
## Code Example | 
				
			|||
 | 
				
			|||
- Insert data into Faiss first | 
				
			|||
 | 
				
			|||
```python | 
				
			|||
import numpy as np | 
				
			|||
import towhee | 
				
			|||
 | 
				
			|||
vec = np.random.random((10, 100)).astype('float32') | 
				
			|||
ids = list(i for i in range(10)) | 
				
			|||
 | 
				
			|||
x = towhee.dc['id'](ids) \ | 
				
			|||
    .runas_op['id', 'vec'](func=lambda x: vec[x]) \ | 
				
			|||
    .to_faiss['id', 'vec'](findex='index.bin') | 
				
			|||
``` | 
				
			|||
 | 
				
			|||
-  Example | 
				
			|||
 | 
				
			|||
*Write the pipeline in simplified style:* | 
				
			|||
 | 
				
			|||
```python | 
				
			|||
query = vec[0:2] | 
				
			|||
towhee.dc(query) \ | 
				
			|||
    .ann_search.faiss(findex='index.bin') | 
				
			|||
``` | 
				
			|||
 | 
				
			|||
*Write a same pipeline with explicit inputs/outputs name specifications:* | 
				
			|||
 | 
				
			|||
```python | 
				
			|||
query = vec[0:2] | 
				
			|||
towhee.dc['vec'](query) \ | 
				
			|||
    .ann_search.faiss['vec', 'results'](findex='index.bin') \ | 
				
			|||
    .show() | 
				
			|||
``` | 
				
			|||
 | 
				
			|||
<img src="./result.png" height="100px"/> | 
				
			|||
 | 
				
			|||
<br /> | 
				
			|||
 | 
				
			|||
 | 
				
			|||
 | 
				
			|||
## Factory Constructor | 
				
			|||
 | 
				
			|||
Create the operator via the following factory method: | 
				
			|||
 | 
				
			|||
***ann-search.faiss(findex)*** | 
				
			|||
 | 
				
			|||
 | 
				
			|||
 | 
				
			|||
**Parameters:** | 
				
			|||
 | 
				
			|||
 | 
				
			|||
 | 
				
			|||
***findex:*** *str* or *faiss.INDEX* | 
				
			|||
 | 
				
			|||
The path to faiss index file or faiss index. | 
				
			|||
 | 
				
			|||
 | 
				
			|||
<br /> | 
				
			|||
 | 
				
			|||
 | 
				
			|||
 | 
				
			|||
## Interface | 
				
			|||
 | 
				
			|||
**Parameters:** | 
				
			|||
 | 
				
			|||
***query:*** *list* | 
				
			|||
 | 
				
			|||
Query embeddings in Faiss | 
				
			|||
 | 
				
			|||
 | 
				
			|||
 | 
				
			|||
**Returns:** *Entity* | 
				
			|||
 | 
				
			|||
Return the results in Faiss with `key` and `score`. | 
				
			|||
 | 
				
			|||
@ -0,0 +1,4 @@ | 
				
			|||
from .faiss import Faiss | 
				
			|||
 | 
				
			|||
def faiss(*args, **kwargs): | 
				
			|||
    return Faiss(*args, **kwargs) | 
				
			|||
@ -0,0 +1,60 @@ | 
				
			|||
import numpy as np | 
				
			|||
from pathlib import Path | 
				
			|||
import faiss | 
				
			|||
from towhee import register | 
				
			|||
from towhee.utils.faiss_utils import KVStorage | 
				
			|||
from towhee.functional.entity import Entity | 
				
			|||
 | 
				
			|||
 | 
				
			|||
@register(output_schema=['result']) | 
				
			|||
class Faiss: | 
				
			|||
    """ | 
				
			|||
    Search for embedding vectors in Faiss. Note that the index has data before searching, | 
				
			|||
    refer to DataCollection Mixin `to_faiss`. | 
				
			|||
 | 
				
			|||
    Args: | 
				
			|||
        findex (`str` or `faiss.INDEX`): | 
				
			|||
            The path to faiss index file(defaults to './index.bin') or faiss index. | 
				
			|||
        kwargs | 
				
			|||
            The kwargs with index.search, refer to https://github.com/facebookresearch/faiss/wiki. And the parameter `k` defaults to 10. | 
				
			|||
 | 
				
			|||
    Examples: | 
				
			|||
 | 
				
			|||
    >>> import towhee | 
				
			|||
    >>> res = ( | 
				
			|||
    ...    towhee.glob['path']('./*.jpg') | 
				
			|||
    ...           .image_decode['path', 'img']() | 
				
			|||
    ...           .image_embedding.timm['img', 'vec'](model_name='resnet50') | 
				
			|||
    ...           .faiss_search['vec', 'results'](findex='./faiss/faiss.index') | 
				
			|||
    ...           .to_list() | 
				
			|||
    ... ) | 
				
			|||
    [<Entity dict_keys(['path', 'img', 'vec', 'results'])>, | 
				
			|||
     <Entity dict_keys(['path', 'img', 'vec', 'results'])>] | 
				
			|||
    """ | 
				
			|||
    def __init__(self, findex, **kwargs): | 
				
			|||
        self.faiss_index = findex | 
				
			|||
        self.kwargs = kwargs | 
				
			|||
        self.kv_storage = None | 
				
			|||
        if isinstance(findex, str): | 
				
			|||
            kv_file = findex.strip('./').replace('.', '_kv.') | 
				
			|||
            index_file = Path(findex) | 
				
			|||
            self.faiss_index = faiss.read_index(str(index_file)) | 
				
			|||
            if Path(kv_file).exists(): | 
				
			|||
                self.kv_storage = KVStorage(kv_file) | 
				
			|||
 | 
				
			|||
    def __call__(self, query: list): | 
				
			|||
        if 'k' not in self.kwargs: | 
				
			|||
            self.kwargs['k'] = 10 | 
				
			|||
 | 
				
			|||
        query = np.array([query]) | 
				
			|||
        scores, ids = self.faiss_index.search(query, **self.kwargs) | 
				
			|||
 | 
				
			|||
        ids = ids[0].tolist() | 
				
			|||
        result = [] | 
				
			|||
        for i in range(len(ids)): | 
				
			|||
            if self.kv_storage is not None: | 
				
			|||
                k = self.kv_storage.get(ids[i]) | 
				
			|||
            else: | 
				
			|||
                k = ids[i] | 
				
			|||
            result.append(Entity(**{'key': k, 'score': scores[0][i]})) | 
				
			|||
        return result | 
				
			|||
@ -0,0 +1,3 @@ | 
				
			|||
faiss-cpu | 
				
			|||
numpy | 
				
			|||
towhee | 
				
			|||
| 
		 After Width: | Height: | Size: 56 KiB  | 
					Loading…
					
					
				
		Reference in new issue