from subprocess import Popen
from speech_recognition import (Recognizer, AudioFile)
from speech_recognition import (UnknownValueError, RequestError)
class SpeechOggAudioFileToText:
def __init__(self):
self.recognizer = Recognizer()
def ogg_to_wav(self, file):
args = ['ffmpeg','-i', file, 'test.wav']
process = Popen(args)
process.wait()
@property
def text(self):
AUDIO_FILE = 'test.wav'
with AudioFile(AUDIO_FILE) as source:
audio = self.recognizer.record(source)
try:
text = self.recognizer.recognize_google(audio, language='RU')
return text
except UnknownValueError:
print("Не удаётся распознать аудио файл")
except RequestError as error:
print("Не удалось запросить результаты: {0}".format(error))
def main():
speech_ogg = SpeechOggAudioFileToText()
speech_ogg.ogg_to_wav('test.ogg')
print(speech_ogg.text)
if __name__ == '__main__':
main()
from vosk import Model, KaldiRecognizer
import sys
import os
import subprocess
if not os.path.exists("model"):
print ("Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder.")
exit (1)
sample_rate=16000
model = Model("model")
rec = KaldiRecognizer(model, sample_rate)
process = subprocess.Popen(['ffmpeg', '-loglevel', 'quiet', '-i',
sys.argv[1],
'-ar', str(sample_rate) , '-ac', '1', '-f', 's16le', '-'],
stdout=subprocess.PIPE)
while True:
data = process.stdout.read(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
print(rec.Result())
else:
print(rec.PartialResult())
print(rec.FinalResult())
import soundfile
data, samplerate = soundfile.read('yourfile.ogg')
soundfile.write('newfile.wav', data, samplerate)
data, samplerate = soundfile.read('newfile.wav')