android - Google Speech to Text 对话框卡住了

标签 android speech-to-text speech

我正在开发一个应用程序,我想在其中通过语音输入使用从印地语到英语的音译。为此,我正在使用谷歌 STT API。当我的语音输入很短时,一切正常,但是当我输入很长的语音时,Dialog 会卡在“尝试说点什么……”,而且我没有得到很好的结果。

这是我的主要 Activity :-

public class MainActivity extends AppCompatActivity implements View.OnClickListener {

    //  Record Button
    AppCompatButton RecordBtn;

    //  TextView to show Original and recognized Text
    TextView Original,result;

    // Request Code for STT
    private final int SST_REQUEST_CODE = 101;

    //  Conversion Table Object...
    ConversionTable conversionTable;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);

        Original = findViewById(R.id.Original_Text);
        RecordBtn = findViewById(R.id.RecordBtn);
        result = findViewById(R.id.Recognized_Text);

        RecordBtn.setOnClickListener(this);
    }

    @Override
    public void onClick(View v) {
        switch (v.getId()) {
            case R.id.RecordBtn:
                Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);

                //  For 30 Sec it will Record...
                intent.putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, 30);

                //  Use Off line Recognition Engine only...
                intent.putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE, false);

                //  Use Hindi Speech Recognition Model...
                intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, "hi-IN");

                try {
                    startActivityForResult(intent, SST_REQUEST_CODE);
                } catch (ActivityNotFoundException a) {
                    Toast.makeText(getApplicationContext(),
                            getString(R.string.error),
                            Toast.LENGTH_SHORT).show();
                }

                break;
        }
    }

    @Override
    protected void onActivityResult(int requestCode, int resultCode, Intent data) {
        super.onActivityResult(requestCode, resultCode, data);
        switch (requestCode) {
            case SST_REQUEST_CODE:
                if (resultCode == RESULT_OK && null != data) {
                    ArrayList<String> getResult = data.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS);
                    Original.setText(getResult.get(0));
                    conversionTable = new ConversionTable();
                    String Transformed_String = conversionTable.transform(getResult.get(0));
                    result.setText(Transformed_String);
                }
                break;
        }
    }
}

我的 ConversationTable.class :-

package android.example.com.conversion;

import android.util.Log;

import java.util.ArrayList;
import java.util.Hashtable;

public class ConversionTable
{
    private String TAG = "Conversation Table";

    private Hashtable<String,String> unicode;

    private void populateHashTable()
    {
        unicode = new Hashtable<>();

        // unicode
        unicode.put("\u0901","rha"); // anunAsika - cchandra bindu, using ~ to // *
        unicode.put("\u0902","n"); // anusvara
        unicode.put("\u0903","ah"); // visarga

        unicode.put("\u0940","ee");
        unicode.put("\u0941","u");
        unicode.put("\u0942","oo");
        unicode.put("\u0943","rhi");
        unicode.put("\u0944","rhee");   //  * = Doubtful Case
        unicode.put("\u0945","e");
        unicode.put("\u0946","e");
        unicode.put("\u0947","e");
        unicode.put("\u0948","ai");
        unicode.put("\u0949","o");
        unicode.put("\u094a","o");
        unicode.put("\u094b","o");
        unicode.put("\u094c","au");

        unicode.put("\u094d","");
        unicode.put("\u0950","om");

        unicode.put("\u0958","k");
        unicode.put("\u0959","kh");
        unicode.put("\u095a","gh");
        unicode.put("\u095b","z");
        unicode.put("\u095c","dh");    // *
        unicode.put("\u095d","rh");
        unicode.put("\u095e","f");

        unicode.put("\u095f","y");
        unicode.put("\u0960","ri");
        unicode.put("\u0961","lri");
        unicode.put("\u0962","lr");       //  *
        unicode.put("\u0963","lree");     //  *

        unicode.put("\u093E","aa");
        unicode.put("\u093F","i");

        //  Vowels and Consonants...
        unicode.put("\u0905","a");
        unicode.put("\u0906","a");
        unicode.put("\u0907","i");
        unicode.put("\u0908","ee");
        unicode.put("\u0909","u");
        unicode.put("\u090a","oo");
        unicode.put("\u090b","ri");
        unicode.put("\u090c","lri"); // *
        unicode.put("\u090d","e"); // *
        unicode.put("\u090e","e"); // *
        unicode.put("\u090f","e");
        unicode.put("\u0910","ai");
        unicode.put("\u0911","o");
        unicode.put("\u0912","o");
        unicode.put("\u0913","o");
        unicode.put("\u0914","au");

        unicode.put("\u0915","k");
        unicode.put("\u0916","kh");
        unicode.put("\u0917","g");
        unicode.put("\u0918","gh");
        unicode.put("\u0919","ng");
        unicode.put("\u091a","ch");
        unicode.put("\u091b","chh");
        unicode.put("\u091c","j");
        unicode.put("\u091d","jh");
        unicode.put("\u091e","ny");
        unicode.put("\u091f","t"); // Ta as in Tom
        unicode.put("\u0920","th");
        unicode.put("\u0921","d"); // Da as in David
        unicode.put("\u0922","dh");
        unicode.put("\u0923","n");
        unicode.put("\u0924","t"); // ta as in tamasha
        unicode.put("\u0925","th"); // tha as in thanks
        unicode.put("\u0926","d"); // da as in darvaaza
        unicode.put("\u0927","dh"); // dha as in dhanusha
        unicode.put("\u0928","n");
        unicode.put("\u0929","nn");
        unicode.put("\u092a","p");
        unicode.put("\u092b","ph");
        unicode.put("\u092c","b");
        unicode.put("\u092d","bh");
        unicode.put("\u092e","m");
        unicode.put("\u092f","y");
        unicode.put("\u0930","r");
        unicode.put("\u0931","rr");
        unicode.put("\u0932","l");
        unicode.put("\u0933","ll"); // the Marathi and Vedic 'L'
        unicode.put("\u0934","lll"); // the Marathi and Vedic 'L'
        unicode.put("\u0935","v");
        unicode.put("\u0936","sh");
        unicode.put("\u0937","ss");
        unicode.put("\u0938","s");
        unicode.put("\u0939","h");

        // represent it\
        //  unicode.put("\u093c","'"); // avagraha using "'"
        //  unicode.put("\u093d","'"); // avagraha using "'"
        unicode.put("\u0969","3"); // 3 equals to pluta
        unicode.put("\u014F","Z");// Z equals to upadhamaniya
        unicode.put("\u0CF1","V");// V equals to jihvamuliya....but what character have u settled for jihvamuliya
     /*   unicode.put("\u0950","Ω"); // aum
        unicode.put("\u0958","κ"); // Urdu qaif
        unicode.put("\u0959","Κ"); //Urdu qhe
        unicode.put("\u095A","γ"); // Urdu gain
        unicode.put("\u095B","ζ"); //Urdu zal, ze, zoe
        unicode.put("\u095E","φ"); // Urdu f
        unicode.put("\u095C","δ"); // Hindi 'dh' as in padh
        unicode.put("\u095D","Δ"); // hindi dhh*/
        unicode.put("\u0926\u093C","τ"); // Urdu dwad
        unicode.put("\u0924\u093C","θ"); // Urdu toe
        unicode.put("\u0938\u093C","σ"); // Urdu swad, se
    }

    ConversionTable()
    {
        populateHashTable();
    }

    public String transform(String s1)
    {

        StringBuilder transformed = new StringBuilder();

        int strLen = s1.length();
        ArrayList<String> shabda = new ArrayList<>();
        String lastEntry = "";

        for (int i = 0; i < strLen; i++)
        {
            char c = s1.charAt(i);
            String varna = String.valueOf(c);

            Log.d(TAG, "transform: " + varna + "\n");

            String halant = "0x0951";

            if (VowelUtil.isConsonant(varna))
            {
                Log.d(TAG, "transform: " + unicode.get(varna));
                shabda.add(unicode.get(varna));
                shabda.add(halant); //halant
                lastEntry = halant;
            }

            else if (VowelUtil.isVowel(varna))
            {
                Log.d(TAG, "transform: " + "Vowel Detected...");
                if (halant.equals(lastEntry))
                {
                    if (varna.equals("a"))
                    {
                        shabda.set(shabda.size() - 1,"");
                    }
                    else
                    {
                        shabda.set(shabda.size() - 1, unicode.get(varna));
                    }
                }

                else
                {
                    shabda.add(unicode.get(varna));
                }
                lastEntry = unicode.get(varna);
            } // end of else if is-Vowel

            else if (unicode.containsKey(varna))
            {
                shabda.add(unicode.get(varna));
                lastEntry = unicode.get(varna);
            }
            else
            {
                shabda.add(varna);
                lastEntry = varna;
            }

        } // end of for

        for (String string: shabda)
        {
            transformed.append(string);
        }

        //Discard the shabda array
        shabda = null;
        return transformed.toString(); // return transformed;
    }

}

我的 ViewUtil 类:-

package android.example.com.conversion;

public class VowelUtil {

    protected static boolean isVowel(String strVowel) {
        // Log.logInfo("came in is_Vowel: Checking whether string is a Vowel");
        return strVowel.equals("a") || strVowel.equals("aa") || strVowel.equals("i") || strVowel.equals("ee") ||
                strVowel.equals("u") || strVowel.equals("oo") || strVowel.equals("ri") || strVowel.equals("lri") || strVowel.equals("e")
                || strVowel.equals("ai") || strVowel.equals("o") || strVowel.equals("au") || strVowel.equals("om");
    }

    protected static boolean isConsonant(String strConsonant) {
        // Log.logInfo("came in is_consonant: Checking whether string is a
        // consonant");
        return strConsonant.equals("k") || strConsonant.equals("kh") || strConsonant.equals("g")
                || strConsonant.equals("gh") || strConsonant.equals("ng") || strConsonant.equals("ch") || strConsonant.equals("chh") || strConsonant.equals("j")
                || strConsonant.equals("jh") || strConsonant.equals("ny") || strConsonant.equals("t") || strConsonant.equals("th") ||
                strConsonant.equals("d") || strConsonant.equals("dh") || strConsonant.equals("n") || strConsonant.equals("nn") || strConsonant.equals("p") ||
                strConsonant.equals("ph") || strConsonant.equals("b") || strConsonant.equals("bh") || strConsonant.equals("m") || strConsonant.equals("y") ||
                strConsonant.equals("r") || strConsonant.equals("rr") || strConsonant.equals("l") || strConsonant.equals("ll") || strConsonant.equals("lll") ||
                strConsonant.equals("v") || strConsonant.equals("sh") || strConsonant.equals("ss") || strConsonant.equals("s") || strConsonant.equals("h") ||
                strConsonant.equals("3") || strConsonant.equals("z") || strConsonant.equals("v") || strConsonant.equals("Ω") ||
                strConsonant.equals("κ") || strConsonant.equals("K") || strConsonant.equals("γ") || strConsonant.equals("ζ") || strConsonant.equals("φ") ||
                strConsonant.equals("δ") || strConsonant.equals("Δ") || strConsonant.equals("τ") || strConsonant.equals("θ") || strConsonant.equals("σ");
    }
}

输出:-

对于短语音输入:-

Short Input

对于长语音输入,它卡住了,无法得到结果:- enter image description here

最佳答案

问题在于 Google 的实现。我遇到了同样的困难并尝试了所有的事情,但没有做任何事情。

所以,我采用了另一种方法来解决这个问题,解决方案是自己实现监听器。这是我的代码,它从不弹出内置对话框(您可以实现您的自定义对话框),但它就像魅力一样。

这里是你如何做到的:

public class MainActivity extends AppCompatActivity implements View.OnClickListener {

    //  Record Button
    AppCompatButton RecordBtn;

    //  TextView to show Original
    TextView Original;

    //  SpeechRecognizer Object...
    private SpeechRecognizer speechRecognizer;

    //  For TAG
    private String TAG = getClass().getName();

    //  RecognizerIntent
    private Intent recognizerIntent;

    //  Request Code for Permission
    private static final int REQUEST_CODE_RECORD_AUDIO = 100;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);

        Original = findViewById(R.id.Original_Text);
        RecordBtn = findViewById(R.id.RecordBtn);

        recognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
        recognizerIntent.putExtra(RecognizerIntent.EXTRA_PROMPT, R.string.record);

        if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) {
            recognizerIntent.putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE, false);
        }

        //  For 30 Sec it will Record...
        recognizerIntent.putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, 30);

        //  Use Hindi Speech Recognition Model...
        recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, "hi-IN");

        speechRecognizer = SpeechRecognizer.createSpeechRecognizer(this);

        //  Permission Dialog
        Askpermission();

        RecordBtn.setOnClickListener(this);
    }

    private void Askpermission() {
        // No explanation needed; request the permission
        ActivityCompat.requestPermissions(this,
                new String[]{Manifest.permission.RECORD_AUDIO},
                REQUEST_CODE_RECORD_AUDIO);
    }

    @Override
    public void onRequestPermissionsResult(int requestCode,
                                           @NonNull String permissions[], @NonNull int[] grantResults) {
        switch (requestCode) {
            case REQUEST_CODE_RECORD_AUDIO: {
                // If request is cancelled, the result arrays are empty.
                if (grantResults.length > 0
                        && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
                    Operation();
                } else {
                    Toast.makeText(MainActivity.this, "Permission denied to read your External storage", Toast.LENGTH_SHORT).show();
                }
            }
        }
    }

    @Override
    public void onClick(View v) {
        switch (v.getId()) {
            case R.id.RecordBtn:
                Log.d(TAG, "onClick: ");
                if (checkPermission()) {
                    if (IsAvailable(this)) {
                        Log.d(TAG, "Speech Recognition Service Available...");
                        speechRecognizer.startListening(recognizerIntent);
                    } else {
                        Toast.makeText(this, "Speech Recognition Service not Available on your device...",
                                Toast.LENGTH_SHORT)
                                .show();
                    }
                } else {
                    Askpermission();
                }
                break;
        }
    }

    //  Check if Speech recognition Service is Available on the Smartphone...
    private boolean IsAvailable(Context context) {
        return SpeechRecognizer.isRecognitionAvailable(context);
    }

    @Override
    protected void onDestroy() {
        super.onDestroy();
        speechRecognizer.destroy();
    }

    // Check Audio Permission
    private boolean checkPermission() {
        return ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) ==
                PackageManager.PERMISSION_GRANTED;
    }

    // Start Operation
    private void Operation() {
        speechRecognizer.setRecognitionListener(new RecognitionListener() {
            @Override
            public void onReadyForSpeech(Bundle params) {
                Log.d(TAG, "Audio Service is connected to Servers....");
                Log.d(TAG, "You can now start your speech...");
            }

            @Override
            public void onBeginningOfSpeech() {
                Log.d(TAG, "User has started speech...");
            }

            @Override
            public void onRmsChanged(float rmsdB) {

            }

            @Override
            public void onBufferReceived(byte[] buffer) {

            }

            @Override
            public void onEndOfSpeech() {
                Log.d(TAG, "User has Finished... speech...");
            }

            @Override
            public void onError(int error) {
                Log.d(TAG, "onError: " + error);
                switch (error){
                    case SpeechRecognizer.ERROR_AUDIO:
                        Toast.makeText(MainActivity.this, "Error Recording Audio...", Toast
                                .LENGTH_SHORT).show();
                        break;
                    case SpeechRecognizer.ERROR_CLIENT:
                        Toast.makeText(MainActivity.this, "Client Side Error...", Toast
                                .LENGTH_SHORT).show();
                        break;
                    case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
                        Toast.makeText(MainActivity.this, "Insufficient permissions...", Toast
                                .LENGTH_SHORT).show();
                        break;
                    case SpeechRecognizer.ERROR_NETWORK:
                        Toast.makeText(MainActivity.this, "Network Related Error...", Toast
                                .LENGTH_SHORT).show();
                        break;

                    case SpeechRecognizer.ERROR_NO_MATCH:
                        Toast.makeText(MainActivity.this, "Please Installed Offline Hindi " +
                                "Language Data...", Toast.LENGTH_SHORT).show();
                        break;
                    case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
                        Toast.makeText(MainActivity.this, "Recognition Busy...", Toast
                                .LENGTH_SHORT).show();
                        break;
                    case SpeechRecognizer.ERROR_SERVER:
                        Toast.makeText(MainActivity.this, "Please Installed Offline Hindi " +
                                "Language Data...", Toast
                                .LENGTH_SHORT).show();
                        break;
                    case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
                        Toast.makeText(MainActivity.this, "Speech Timeout...", Toast
                                .LENGTH_SHORT).show();
                        break;
                    case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
                        Toast.makeText(MainActivity.this, "Network Timeout Error...", Toast
                                .LENGTH_SHORT).show();
                }
            }

            @Override
            public void onResults(Bundle results) {
                ArrayList<String> Results = results.getStringArrayList(SpeechRecognizer
                        .RESULTS_RECOGNITION);

                if (Results != null) {
                    Original.setText(Results.get(0));
                }
            }

            @Override
            public void onPartialResults(Bundle partialResults) {

            }

            @Override
            public void onEvent(int eventType, Bundle params) {

            }
        });
    }

}

关于android - Google Speech to Text 对话框卡住了,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/51556974/

相关文章:

java - 连续同步读写数据通信

javascript - Microsoft Speech-to-Text SDK JS 不接受具有长字节数组的文件

c++ - 如何在 boost beast websocket 中传递模型类型

android - 如何在 Android 中隐藏 toast 消息 “Your audio will be sent to google to provide speech recognition service.”?

java - 方向更改后的 NullPointerException

android - 通过 GCM 和 xmpp 输入状态/在线状态

android - 插件 org.eclipse.ui 中未处理的事件循环异常

c# - 如何在我的骨骼跟踪程序中实现语音识别?

c# - 没有麦克风? SpeechRecognitionEngine SetInputToDefaultAudioDevice() 问题

audio - 从音频信号了解共振峰