|  |  | @ -60,9 +60,11 @@ class Vggish(NNOperator): | 
			
		
	
		
			
				
					|  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |     def preprocess(self, frames: List[AudioFrame]): | 
			
		
	
		
			
				
					|  |  |  |         sr = frames[0].sample_rate | 
			
		
	
		
			
				
					|  |  |  |         layout = frames[0].lay_out | 
			
		
	
		
			
				
					|  |  |  |         audio = numpy.hstack(frames) | 
			
		
	
		
			
				
					|  |  |  |         ii = numpy.iinfo(audio.dtype) | 
			
		
	
		
			
				
					|  |  |  |         audio = 2 * audio / (ii.max - ii.min + 1) | 
			
		
	
		
			
				
					|  |  |  |         if layout == 'stereo': | 
			
		
	
		
			
				
					|  |  |  |             audio = audio.reshape(-1, 2) | 
			
		
	
		
			
				
					|  |  |  |         audio = self.int2float(audio) | 
			
		
	
		
			
				
					|  |  |  |         try: | 
			
		
	
		
			
				
					|  |  |  |             audio = audio.transpose() | 
			
		
	
		
			
				
					|  |  |  |             audio_tensors = vggish_input.waveform_to_examples(audio, sr, return_tensor=True) | 
			
		
	
	
		
			
				
					|  |  | @ -71,3 +73,19 @@ class Vggish(NNOperator): | 
			
		
	
		
			
				
					|  |  |  |             log.error("Fail to load audio data.") | 
			
		
	
		
			
				
					|  |  |  |             raise e | 
			
		
	
		
			
				
					|  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |     def int2float(self, wav: numpy.ndarray, dtype: str = 'float64'): | 
			
		
	
		
			
				
					|  |  |  |         """ | 
			
		
	
		
			
				
					|  |  |  |         Convert audio data from int to float. | 
			
		
	
		
			
				
					|  |  |  |         The input dtype must be integers. | 
			
		
	
		
			
				
					|  |  |  |         The output dtype is controlled by the parameter `dtype`, defaults to 'float64'. | 
			
		
	
		
			
				
					|  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |         The code is inspired by https://github.com/mgeier/python-audio/blob/master/audio-files/utility.py | 
			
		
	
		
			
				
					|  |  |  |         """ | 
			
		
	
		
			
				
					|  |  |  |         assert wav.dtype.kind in 'iu' | 
			
		
	
		
			
				
					|  |  |  |         dtype = numpy.dtype(dtype) | 
			
		
	
		
			
				
					|  |  |  |         assert dtype.kind == 'f' | 
			
		
	
		
			
				
					|  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |         ii = numpy.iinfo(wav.dtype) | 
			
		
	
		
			
				
					|  |  |  |         abs_max = 2 ** (ii.bits - 1) | 
			
		
	
		
			
				
					|  |  |  |         offset = ii.min + abs_max | 
			
		
	
		
			
				
					|  |  |  |         return (wav.astype(dtype) - offset) / abs_max | 
			
		
	
	
		
			
				
					|  |  | 
 |