-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathqTranscribeq.py
149 lines (129 loc) · 5.19 KB
/
qTranscribeq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import streamlit as st
import os
import requests
import logging
from pathlib import Path
from pydub import AudioSegment
from tqdm import tqdm
import yaml
import tempfile
# Initialize logging
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)
# Load configuration and initialize Groq client
def load_config():
with open("config.yaml", "r") as file:
return yaml.safe_load(file)
config = load_config()
groq_api_key = config["groq"]["api_key"]
def transcribe_audio(audio_file_path):
"""
Transcribes spoken words from an audio file into text using the Groq Whisper model.
"""
try:
with open(audio_file_path, "rb") as audio_file:
response = requests.post(
"https://api.groq.com/openai/v1/audio/transcriptions",
headers={
"Authorization": f"Bearer {groq_api_key}",
},
files={
"file": audio_file,
},
data={
"model": "whisper-large-v3",
"prompt": "Please transcribe the audio content accurately.",
"response_format": "json",
"language": "en",
"temperature": 0.0,
},
)
response_data = response.json()
if response.status_code == 200 and "text" in response_data:
return response_data["text"]
else:
logger.error(f"Failed to transcribe audio: {response_data}\n")
return None
except Exception as e:
logger.error(f"Transcription failed due to an error: {e}\n")
return None
def get_chunk_length_ms(file_path, max_size_mb=24):
"""
Calculates the appropriate chunk length in milliseconds based on the file size.
"""
audio = AudioSegment.from_file(file_path)
file_size = os.path.getsize(file_path)
duration_ms = len(audio)
max_size_bytes = max_size_mb * 1024 * 1024 # Slightly less than 25 MB to account for overhead
chunk_length_ms = int(max_size_bytes / file_size * duration_ms)
logger.info(f"Calculated chunk length: {chunk_length_ms} ms")
return chunk_length_ms
def split_audio(file_path, chunk_length_ms):
"""
Splits the audio file into smaller chunks.
"""
audio = AudioSegment.from_file(file_path)
chunks = []
file_path = Path(file_path) # Convert to Path object if it's a string
for i in range(0, len(audio), chunk_length_ms):
chunk = audio[i:i + chunk_length_ms]
chunk_file_path = f"{file_path.stem}_chunk{i // chunk_length_ms}.wav"
chunk.export(chunk_file_path, format="wav")
chunks.append(chunk_file_path)
return chunks
def process_audio_file(file_path):
"""
Process the audio file: split if necessary, transcribe, and clean up.
"""
audio = AudioSegment.from_file(file_path)
duration_seconds = len(audio) // 1000
st.write(f"The audio file is {duration_seconds} seconds long.")
if duration_seconds > 600: # 10 minutes
user_input = st.radio("The audio is longer than 10 minutes. Do you want to split it into smaller chunks for transcription?", ("Yes", "No"))
if user_input != "Yes":
st.error("Transcription aborted by user.")
return None
chunk_length_ms = get_chunk_length_ms(file_path)
chunks = split_audio(file_path, chunk_length_ms)
all_transcriptions = []
progress_bar = st.progress(0)
for i, chunk in enumerate(chunks):
transcribed_text = transcribe_audio(chunk)
if transcribed_text:
all_transcriptions.append(transcribed_text)
os.remove(chunk) # Clean up chunk file after transcription
progress_bar.progress((i + 1) / len(chunks))
return "\n".join(all_transcriptions)
def main():
"""
Main function to handle audio transcription.
"""
st.title("gTranscribeq")
st.write("Upload an audio file or enter the path to transcribe:")
uploaded_file = st.file_uploader("Drag and drop an audio file here", type=["wav", "mp3", "m4a"])
file_path = st.text_input("Or enter the file path")
if uploaded_file is not None:
# Save the uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
tmp_file.write(uploaded_file.getvalue())
temp_path = tmp_file.name
full_transcription = process_audio_file(temp_path)
os.unlink(temp_path) # Remove the temporary file
elif st.button("Transcribe") and file_path:
file_path = Path(file_path)
if not file_path.exists() or not file_path.is_file() or file_path.suffix not in [".wav", ".mp3", ".m4a"]:
st.error("Invalid file path. Please provide a valid .wav, .mp3, or .m4a file.")
return
full_transcription = process_audio_file(file_path)
else:
return
if full_transcription:
st.text_area("Transcription", full_transcription, height=300)
st.download_button(
label="Download Transcription",
data=full_transcription,
file_name="transcription.txt",
mime="text/plain"
)
if __name__ == "__main__":
main()