Spaces:

Walid-Ahmed
/

summarize_audio_file

Sleeping

App Files Files Community

Walid-Ahmed commited on Jan 16

Commit

dc1d260

verified ·

1 Parent(s): 436d7be

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -35

app.py CHANGED Viewed

@@ -1,47 +1,48 @@
-import gradio as gr
 import whisper
-from transformers import pipeline
-# Load the tiny Whisper model
-# Check if GPU is available and set device accordingly
-device = 0 if torch.cuda.is_available() else -1
-if device == 0:
-    print("Running on GPU")
-else:
-    print("Running on CPU")
-whisper_model = whisper.load_model("tiny", device=device)
-#model = whisper.load_model("base")
-# Load the text summarization model from Hugging Face
-summarizer = pipeline(task="summarization", model="facebook/bart-large-cnn", device=device)
-# Function to transcribe and summarize the audio file
-def transcribe_and_summarize(audio):
-    # Step 1: Transcribe the audio using Whisper
-    transcription_result = whisper_model.transcribe(audio)
-    transcription = transcription_result['text']
-    # Step 2: Summarize the transcription
-    summary = summarizer(transcription, min_length=10, max_length=100)
-    summary_text = summary[0]['summary_text']
-    return transcription, summary_text
-# Define the Gradio interface
-interface = gr.Interface(
-    fn=transcribe_and_summarize,  # Function to run
-    inputs=gr.Audio(type="filepath", label="Upload your audio file"),  # Input audio field
-    outputs=[gr.Textbox(label="Transcription"), gr.Textbox(label="Summary")],  # Output fields
-    title="Whisper Tiny Transcription and Summarization",
-    description="Upload an audio file, get the transcription from Whisper tiny model and the summarized version using Hugging Face."
 )
-# Launch the Gradio app
-interface.launch(debug=True)

 import whisper
+import gradio as gr
+from accelerate import init_empty_weights, load_checkpoint_and_dispatch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+# Initialize the device map for ZeRO
+from accelerate.utils import set_module_tensor_to_device
+import torch
+device_map = "auto"  # Automatically allocate layers across available GPUs/CPUs
+print(f"Using ZeRO-powered device map: {device_map}")
+# Load the model using ZeRO
+model_name = "openai/whisper-tiny"
+# Load the Whisper model into ZeRO's memory-efficient mode
+with init_empty_weights():
+    whisper_model = whisper.load_model(model_name)
+# Load tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Load model with Accelerate/ZeRO
+whisper_model = load_checkpoint_and_dispatch(
+    whisper_model,
+    device_map=device_map,
+    dtype=torch.float16  # Optional: Use mixed precision for further optimization
+)
+# Define the transcription function
+def transcribe(audio):
+    # Perform transcription using the Whisper model
+    result = whisper_model.transcribe(audio)
+    return result['text']
+# Create the Gradio interface
+demo = gr.Interface(
+    fn=transcribe,  # The function to be called for transcription
+    inputs=gr.Audio(source="microphone", type="filepath", label="Speak into the microphone"),  # Input audio
+    outputs=gr.Textbox(label="Transcription"),  # Output transcription
+    title="Whisper Speech-to-Text with ZeRO",  # Title of the interface
+    description="Record audio using your microphone and get a transcription using the Whisper model optimized by ZeRO."
 )
+# Launch the Gradio interface
+demo.launch()