我是 Unity 3d 的新手,正在开发一个应用程序。因为它具有“自动”口型同步的功能。
我正在学习下面的教程
http://answers.unity3d.com/questions/139323/any-way-of-quotautomaticquot-lip-syncing.html
看看下面我的代码
using UnityEngine;
using System.Collections;
public class lipmovement2: MonoBehaviour
{
// Use this for initialization
/*Class for implementing Lips Syncronisation*/
public AudioClip source_clip;
public float[] freqData;
int nSamples = 256;
int fMax = 24000;
public Transform upmouth0_M, upmouth01_L, upmouth02_R, downmouth1_M, downmouth11_L, downmouth12_R;
float volume = 1000;
// float freqLow = 200;
// float freqHigh = 800;
//value change
float freqLow = 200;
float freqHigh = 1600;
int sizeFilter = 5;
float[] filter;
float filterSum;
int posFilter = 0;
int qSample = 0;
int video_Length, secCounter;
float y0, y1;
void OnEnable ()
{
secCounter = 0;
// y0 = mouth0.localPosition.y;
// y1 = mouth1.localPosition.y;
y0 = upmouth0_M.localPosition.y;
y0 = upmouth01_L.localPosition.y;
y0 = upmouth02_R.localPosition.y;
y1 = downmouth1_M.localPosition.y;
y1 = downmouth11_L.localPosition.y;
y1 = downmouth12_R.localPosition.y;
freqData = new float[nSamples];
//source_clip = SetFace.voiceOver;
GetComponent<AudioSource> ().clip = Rec_voice.instance.voiceFeed.clip;
GetComponent<AudioSource> ().Play ();
video_Length = Mathf.CeilToInt (source_clip.length);
}
float BandVol (float fLow, float fHigh)
{
fLow = Mathf.Clamp (fLow, 20, fMax);
fHigh = Mathf.Clamp (fHigh, fLow, fMax);
GetComponent<AudioSource> ().GetSpectrumData (freqData, 0, FFTWindow.BlackmanHarris);
int n1 = Mathf.FloorToInt (fLow * nSamples / fMax);
int n2 = Mathf.FloorToInt (fHigh * nSamples / fMax);
float sum = 0;
for (int i = n1; i <= n2; i++) {
sum = freqData [i];
}
return sum;
}
float MovingAverage (float sample)
{
if (qSample == 0)
filter = new float[sizeFilter];
filterSum += sample - filter [posFilter];
filter [posFilter++] = sample;
if (posFilter > qSample) {
qSample = posFilter;
}
posFilter = posFilter % sizeFilter;
return filterSum / qSample;
}
void Start ()
{
/*secCounter = 0;
y0 = mouth0.localPosition.y;
y1 = mouth1.localPosition.y;
freqData = new float[nSamples];
//source_clip = SetFace.voiceOver;
GetComponent<AudioSource> ().clip = Rec_voice.instance.voiceOver;
GetComponent<AudioSource> ().Play ();
video_Length = Mathf.CeilToInt (source_clip.length);
*/
//Debug.Log (y0);
// DebugConsole.Log (y0.ToString ());
// Debug.Log (Application.persistentDataPath);
/*StartCoroutine (Timer ());
StartCoroutine (recordScreen ());
*/
}
/* IEnumerator Timer ()
{
while (secCounter < video_Length) {
yield return new WaitForSeconds (1f);
secCounter += 1;
}
}*/
float limValue;
// Update is called once per frame
void Update ()
{
float band_vol = BandVol (freqLow, freqHigh);
float val = MovingAverage (band_vol) * volume;
//limValue = val;//Mathf.Clamp (val, 0, 0.1f);
//limValue = Mathf.Clamp (val, 0, 10f);
//check new lip movement abd set clamp val
limValue = Mathf.Clamp (val, 0, 25f);
//Debug.Log (y0 - limValue);
if (Input.GetKeyDown (KeyCode.Escape)) {
Application.Quit ();
}
/* mouth0.position = new Vector3 (mouth0.position.x, y0 - MovingAverage (band_vol) * volume, mouth0.position.z);
mouth1.position = new Vector3 (mouth1.position.x, y1 + MovingAverage (band_vol) * volume * 0.3f, mouth1.position.z);*/
}
void LateUpdate ()
{
// mouth0.localPosition = new Vector3 (mouth0.localPosition.x, y0 - limValue, mouth0.localPosition.z);
// mouth1.localPosition = new Vector3 (mouth1.localPosition.x, y1 + limValue, mouth1.localPosition.z);
upmouth0_M.localPosition = new Vector3 (upmouth0_M.localPosition.x, y0 - limValue, upmouth0_M.localPosition.z);
upmouth01_L.localPosition = new Vector3 (upmouth01_L.localPosition.x, y0 - limValue, upmouth01_L.localPosition.z);
upmouth02_R.localPosition = new Vector3 (upmouth02_R.localPosition.x, y0 - limValue, upmouth02_R.localPosition.z);
downmouth1_M.localPosition = new Vector3 (downmouth1_M.localPosition.x, y1 + limValue, downmouth1_M.localPosition.z);
downmouth11_L.localPosition = new Vector3 (downmouth11_L.localPosition.x, y1 + limValue, downmouth11_L.localPosition.z);
downmouth12_R.localPosition = new Vector3 (downmouth12_R.localPosition.x, y1 + limValue, downmouth12_R.localPosition.z);
}
}
我遇到了如下问题
1) 如何识别人声? :因为如果另一种声音(如音乐等)会被检测到,那么我们如何才能停止呢?我希望嘴唇仅针对人声同步。
2) 当我录制时,如果距离靠近设备,那么它工作正常,但如果距离稍远,则嘴唇不同步。
所以建议我哪里会出错?以及如何解决上述问题?
最佳答案
2) 麦克风记录的声级随着距离的增加而降低。因此,每个频段上的能量将更少(即 GetSpectrumData 给出的值更小)。如果增加“volume”参数的值,则 val 会变大
float val = MovingAverage (band_vol) * volume;
...嘴唇将沿 y 轴移动更多。
1) 如果与整个频谱(比如 0-16000Hz)相比,较低频段(比如 0-1000Hz)有足够多的噪声,一个简单的算法将只查看频率数据并将输入分类为语音。这可能会阻止算法在随机噪声中进行唇形同步。 对于更高级的需求,我会实现 MFCC 算法。然后,如果从录制的音频流计算出的 MFCC 与训练数据足够接近,我将使用常见音素训练算法并进行口型同步。
关于c# - Unity 3D "automatic"嘴唇同步?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/46709164/