我正在尝试使用 userMedia 实现语音到文本。我在成功创建的 Nodejs 服务器文件上的文件中写入流,但当尝试使用 Azure fromStreamInput
将语音转换为文本时,得到未定义的结果。
var subscriptionKey = "--";
var serviceRegion = "--"; // e.g., "westus"
var s = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
function LoadArrayFromFile (filename) {
const fileContents = fs.readFileSync(filename);
const ret = Uint8Array.from(fileContents.slice(44));
return ret.buffer;
}
const fileBuffer = LoadArrayFromFile('output.mp3');
let bytesSent = 0;
let p;
p = sdk.AudioInputStream.createPullStream(
{
close: () => { return; },
read: (buffer) => {
const copyArray = new Uint8Array(buffer);
const start = bytesSent;
const end = buffer.byteLength > (fileBuffer.byteLength - bytesSent) ? (fileBuffer.byteLength - 1) : (bytesSent + buffer.byteLength - 1);
copyArray.set(new Uint8Array(fileBuffer.slice(start, end)));
bytesSent += (end - start) + 1;
if (bytesSent < buffer.byteLength) {
setTimeout(() => p.close(), 1000);
}
return (end - start) + 1;
},
});
const config = sdk.AudioConfig.fromStreamInput(p);
const r = new sdk.SpeechRecognizer(s, config);
// expect(r).not.toBeUndefined();
// expect(r instanceof sdk.Recognizer);
r.canceled = (o, e) => {
try {
console.log("canceled", res)
} catch (error) {
console.log("canceled error", error)
}
};
r.recognizeOnceAsync(
(p2) => {
const res = p2;
try {
console.log(res)
} catch (error) {
console.log(error)
// done.fail(error);
}
},
(error) => {
console.log(error)
// done.fail(error);
});
});
最佳答案
您的代码正在读取 .mp3 文件的内容并将其传递给 AudioConfig.fromStreamInput()
。但是azure doc非常清楚地表明,只有未压缩的音频(他们说脉冲编码调制或 .WAV)在这里才有效。
在将音频传递给语音识别器之前,您需要弄清楚如何解压缩音频。或者 Azure 将必须弄清楚如何处理压缩音频。
关于Azure语音到文本结果未定义nodejs,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/62556154/