|
@ -39,9 +39,11 @@ import towhee |
|
|
.audio_decode.ffmpeg['path', 'frames']() |
|
|
.audio_decode.ffmpeg['path', 'frames']() |
|
|
.runas_op['frames', 'frames'](func=lambda x:[y[0] for y in x]) |
|
|
.runas_op['frames', 'frames'](func=lambda x:[y[0] for y in x]) |
|
|
.audio_classification.panns['frames', ('labels', 'scores', 'vec')]() |
|
|
.audio_classification.panns['frames', ('labels', 'scores', 'vec')]() |
|
|
|
|
|
.select['path', 'labels', 'scores', 'vec']() |
|
|
.show() |
|
|
.show() |
|
|
) |
|
|
) |
|
|
``` |
|
|
``` |
|
|
|
|
|
<img src="./result.png" width="800px"/> |
|
|
|
|
|
|
|
|
<br /> |
|
|
<br /> |
|
|
|
|
|
|
|
@ -93,4 +95,3 @@ The input data should represent for an audio longer than 2s. |
|
|
- labels: a list of topk predicted labels by model. |
|
|
- labels: a list of topk predicted labels by model. |
|
|
- scores: a list of scores corresponding to labels, representing for possibility. |
|
|
- scores: a list of scores corresponding to labels, representing for possibility. |
|
|
- vec: a audio embedding generated by model, shape of which is (2048,) |
|
|
- vec: a audio embedding generated by model, shape of which is (2048,) |
|
|
|
|
|
|
|
|