Spaces:

mumbert
/

automatic-audio-captioning-demo-dcase

Sleeping

App Files Files Community

Martí Umbert commited on Mar 15

Commit

fab9582

1 Parent(s): 6c99403

app files

Browse files

Files changed (2) hide show

app.py +12 -0
app_dcase.py +75 -0

app.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import gradio as gr
+import app_dcase
+def main():
+    demo = gr.TabbedInterface([app_dcase.create_app()],
+                            ["DCASE demo"])
+    demo.launch()
+if __name__ == "__main__":
+    main()

app_dcase.py ADDED Viewed

	@@ -0,0 +1,75 @@

+# import gradio as gr
+# from msclap import CLAP
+# clap_model = CLAP(version = 'clapcap', use_cuda=False)
+# def clap_inference(mic=None, file=None):
+#     if mic is not None:
+#         audio = mic
+#     elif file is not None:
+#         audio = file
+#     else:
+#         return "You must either provide a mic recording or a file"
+#     # Generate captions for the recording
+#     captions = clap_model.generate_caption([audio],
+#                                            resample=True,
+#                                            beam_size=5,
+#                                            entry_length=67,
+#                                            temperature=0.01)
+#     return captions[0]
+import gradio as gr
+from dcase24t6.nn.hub import baseline_pipeline
+model = baseline_pipeline()
+def dcase_inference(mic=None, file=None):
+    if mic is not None:
+        audio, sr = mic
+        print(f"sr 1: {sr}")
+    elif file is not None:
+        audio, sr = file
+        print(f"file 1: {sr}")
+    else:
+        return "You must either provide a mic recording or a file"
+    # Generate captions for the recording
+    item = {"audio": audio, "sr": sr}
+    outputs = model(item)
+    candidate = outputs["candidates"][0]
+    return candidate
+def create_app():
+    with gr.Blocks() as demo:
+        gr.Markdown(
+            """
+            # DCASE demo for automatic audio captioning
+            """
+        )
+        gr.Interface(
+            fn=dcase_inference,
+            inputs=[
+                gr.Audio(sources="microphone", type="filepath"),
+                gr.Audio(sources="upload", type="filepath"),
+            ],
+            outputs="text",
+        )
+    return demo
+def main():
+    app = create_app()
+    app.launch(debug=True)
+if __name__ == "__main__":
+    main()