faiss-index
              
                
                
            
          copied
				 5 changed files with 157 additions and 1 deletions
			
			
		@ -1,2 +1,91 @@ | 
			
		|||||
# faiss | 
				 | 
			
		||||
 | 
				# Operator: ANN Search: Faiss | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				*author: shiyu* | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				<br /> | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				## Desription | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				Search embedding in [Faiss](https://github.com/facebookresearch/faiss), **please make sure you have inserted data to Faiss before search**. | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				<br /> | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				## Code Example | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				- Insert data into Faiss first | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				```python | 
			
		||||
 | 
				import numpy as np | 
			
		||||
 | 
				import towhee | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				vec = np.random.random((10, 100)).astype('float32') | 
			
		||||
 | 
				ids = list(i for i in range(10)) | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				x = towhee.dc['id'](ids) \ | 
			
		||||
 | 
				    .runas_op['id', 'vec'](func=lambda x: vec[x]) \ | 
			
		||||
 | 
				    .to_faiss['id', 'vec'](findex='index.bin') | 
			
		||||
 | 
				``` | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				-  Example | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				*Write the pipeline in simplified style:* | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				```python | 
			
		||||
 | 
				query = vec[0:2] | 
			
		||||
 | 
				towhee.dc(query) \ | 
			
		||||
 | 
				    .ann_search.faiss(findex='index.bin') | 
			
		||||
 | 
				``` | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				*Write a same pipeline with explicit inputs/outputs name specifications:* | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				```python | 
			
		||||
 | 
				query = vec[0:2] | 
			
		||||
 | 
				towhee.dc['vec'](query) \ | 
			
		||||
 | 
				    .ann_search.faiss['vec', 'results'](findex='index.bin') \ | 
			
		||||
 | 
				    .show() | 
			
		||||
 | 
				``` | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				<img src="./result.png" height="100px"/> | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				<br /> | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				## Factory Constructor | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				Create the operator via the following factory method: | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				***ann-search.faiss(findex)*** | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				**Parameters:** | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				***findex:*** *str* or *faiss.INDEX* | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				The path to faiss index file or faiss index. | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				<br /> | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				## Interface | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				**Parameters:** | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				***query:*** *list* | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				Query embeddings in Faiss | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				**Returns:** *Entity* | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				Return the results in Faiss with `key` and `score`. | 
			
		||||
 | 
			
		|||||
@ -0,0 +1,4 @@ | 
			
		|||||
 | 
				from .faiss import Faiss | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				def faiss(*args, **kwargs): | 
			
		||||
 | 
				    return Faiss(*args, **kwargs) | 
			
		||||
@ -0,0 +1,60 @@ | 
			
		|||||
 | 
				import numpy as np | 
			
		||||
 | 
				from pathlib import Path | 
			
		||||
 | 
				import faiss | 
			
		||||
 | 
				from towhee import register | 
			
		||||
 | 
				from towhee.utils.faiss_utils import KVStorage | 
			
		||||
 | 
				from towhee.functional.entity import Entity | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				@register(output_schema=['result']) | 
			
		||||
 | 
				class Faiss: | 
			
		||||
 | 
				    """ | 
			
		||||
 | 
				    Search for embedding vectors in Faiss. Note that the index has data before searching, | 
			
		||||
 | 
				    refer to DataCollection Mixin `to_faiss`. | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				    Args: | 
			
		||||
 | 
				        findex (`str` or `faiss.INDEX`): | 
			
		||||
 | 
				            The path to faiss index file(defaults to './index.bin') or faiss index. | 
			
		||||
 | 
				        kwargs | 
			
		||||
 | 
				            The kwargs with index.search, refer to https://github.com/facebookresearch/faiss/wiki. And the parameter `k` defaults to 10. | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				    Examples: | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				    >>> import towhee | 
			
		||||
 | 
				    >>> res = ( | 
			
		||||
 | 
				    ...    towhee.glob['path']('./*.jpg') | 
			
		||||
 | 
				    ...           .image_decode['path', 'img']() | 
			
		||||
 | 
				    ...           .image_embedding.timm['img', 'vec'](model_name='resnet50') | 
			
		||||
 | 
				    ...           .faiss_search['vec', 'results'](findex='./faiss/faiss.index') | 
			
		||||
 | 
				    ...           .to_list() | 
			
		||||
 | 
				    ... ) | 
			
		||||
 | 
				    [<Entity dict_keys(['path', 'img', 'vec', 'results'])>, | 
			
		||||
 | 
				     <Entity dict_keys(['path', 'img', 'vec', 'results'])>] | 
			
		||||
 | 
				    """ | 
			
		||||
 | 
				    def __init__(self, findex, **kwargs): | 
			
		||||
 | 
				        self.faiss_index = findex | 
			
		||||
 | 
				        self.kwargs = kwargs | 
			
		||||
 | 
				        self.kv_storage = None | 
			
		||||
 | 
				        if isinstance(findex, str): | 
			
		||||
 | 
				            kv_file = findex.strip('./').replace('.', '_kv.') | 
			
		||||
 | 
				            index_file = Path(findex) | 
			
		||||
 | 
				            self.faiss_index = faiss.read_index(str(index_file)) | 
			
		||||
 | 
				            if Path(kv_file).exists(): | 
			
		||||
 | 
				                self.kv_storage = KVStorage(kv_file) | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				    def __call__(self, query: list): | 
			
		||||
 | 
				        if 'k' not in self.kwargs: | 
			
		||||
 | 
				            self.kwargs['k'] = 10 | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				        query = np.array([query]) | 
			
		||||
 | 
				        scores, ids = self.faiss_index.search(query, **self.kwargs) | 
			
		||||
 | 
				
 | 
			
		||||
 | 
				        ids = ids[0].tolist() | 
			
		||||
 | 
				        result = [] | 
			
		||||
 | 
				        for i in range(len(ids)): | 
			
		||||
 | 
				            if self.kv_storage is not None: | 
			
		||||
 | 
				                k = self.kv_storage.get(ids[i]) | 
			
		||||
 | 
				            else: | 
			
		||||
 | 
				                k = ids[i] | 
			
		||||
 | 
				            result.append(Entity(**{'key': k, 'score': scores[0][i]})) | 
			
		||||
 | 
				        return result | 
			
		||||
@ -0,0 +1,3 @@ | 
			
		|||||
 | 
				faiss-cpu | 
			
		||||
 | 
				numpy | 
			
		||||
 | 
				towhee | 
			
		||||
| 
		 After Width: | Height: | Size: 56 KiB  | 
					Loading…
					
					
				
		Reference in new issue