diff --git a/audio_embedding_vggish.yaml b/audio_embedding_vggish.yaml index 85b426b..0b33eeb 100644 --- a/audio_embedding_vggish.yaml +++ b/audio_embedding_vggish.yaml @@ -1,4 +1,4 @@ -name: 'audio_embedding_vggish' +name: 'audio_decode' operators: - name: '_start_op' @@ -11,31 +11,52 @@ operators: col: 0 outputs: - - df: 'audio' + df: 'input_df' iter_info: type: map + - + name: 'audio_decoder' + function: 'towhee/audio-decoder' + init_args: + inputs: + - + df: 'input_df' + name: 'audio_path' + col: 0 + outputs: + - + df: 'audio_df' + iter_info: + type: generator - name: 'embedding_model' function: 'towhee/torch-vggish' init_args: inputs: - - df: 'audio' - name: 'audio_path' + df: 'audio_df' + name: 'audio' col: 0 + - + df: 'audio_df' + name: 'sample_rate' + col: 1 outputs: - - df: 'emb' + df: 'output_df' iter_info: - type: map + type: time_window + params: + time_range_sec: 3 + time_step_sec: 3 - name: '_end_op' function: '_end_op' init_args: inputs: - - df: 'emb' - name: 'embs' + df: 'output_df' + name: 'vec' col: 0 outputs: - @@ -50,20 +71,29 @@ dataframes: name: 'audio_path' vtype: 'str' - - name: 'audio' + name: 'input_df' columns: - name: 'audio_path' vtype: 'str' - - name: 'emb' + name: 'audio_df' + columns: + - + name: 'audio_frame' + vtype: 'numpy.ndarray' + - + name: 'sample_rate' + vtype: 'numpy.ndarray' + - + name: 'output_df' columns: - - name: 'embs' + name: 'vec' vtype: 'numpy.ndarray' - name: '_end_df' columns: - - name: 'embs' + name: 'vec' vtype: 'numpy.ndarray'