8 #define PI 3.1415926535897932384626433832795f
28 ELM_FN
, ELM_F1
, ELM_F2
, ELM_F3
,
29 ELM_B1
, ELM_B2
, ELM_B3
, ELM_AN
,
30 ELM_A1
, ELM_A2
, ELM_A3
, ELM_A4
,
31 ELM_A5
, ELM_A6
, ELM_AB
, ELM_AV
,
32 ELM_AVC
, ELM_ASP
, ELM_AF
,
39 const char *mName
; // unused
43 unsigned char mFont
; // unused
44 const char *mDict
; // unused
45 const char *mIpa
; // unused
46 int mFeat
; // only ELM_FEATURE_VWL
47 Interp mInterpolator
[ELM_COUNT
];
52 ELM_FEATURE_ALV
= 0x00000001,
53 ELM_FEATURE_APR
= 0x00000002,
54 ELM_FEATURE_BCK
= 0x00000004,
55 ELM_FEATURE_BLB
= 0x00000008,
56 ELM_FEATURE_CNT
= 0x00000010,
57 ELM_FEATURE_DNT
= 0x00000020,
58 ELM_FEATURE_FNT
= 0x00000040,
59 ELM_FEATURE_FRC
= 0x00000080,
60 ELM_FEATURE_GLT
= 0x00000100,
61 ELM_FEATURE_HGH
= 0x00000200,
62 ELM_FEATURE_LAT
= 0x00000400,
63 ELM_FEATURE_LBD
= 0x00000800,
64 ELM_FEATURE_LBV
= 0x00001000,
65 ELM_FEATURE_LMD
= 0x00002000,
66 ELM_FEATURE_LOW
= 0x00004000,
67 ELM_FEATURE_MDL
= 0x00008000,
68 ELM_FEATURE_NAS
= 0x00010000,
69 ELM_FEATURE_PAL
= 0x00020000,
70 ELM_FEATURE_PLA
= 0x00040000,
71 ELM_FEATURE_RND
= 0x00080000,
72 ELM_FEATURE_RZD
= 0x00100000,
73 ELM_FEATURE_SMH
= 0x00200000,
74 ELM_FEATURE_STP
= 0x00400000,
75 ELM_FEATURE_UMD
= 0x00800000,
76 ELM_FEATURE_UNR
= 0x01000000,
77 ELM_FEATURE_VCD
= 0x02000000,
78 ELM_FEATURE_VEL
= 0x04000000,
79 ELM_FEATURE_VLS
= 0x08000000,
80 ELM_FEATURE_VWL
= 0x10000000
86 ELM_Q
, ELM_P
, ELM_PY
, ELM_PZ
, ELM_T
, ELM_TY
,
87 ELM_TZ
, ELM_K
, ELM_KY
, ELM_KZ
, ELM_B
, ELM_BY
, ELM_BZ
,
88 ELM_D
, ELM_DY
, ELM_DZ
, ELM_G
, ELM_GY
, ELM_GZ
, ELM_M
,
89 ELM_N
, ELM_NG
, ELM_F
, ELM_TH
, ELM_S
, ELM_SH
, ELM_X
,
90 ELM_H
, ELM_V
, ELM_QQ
, ELM_DH
, ELM_DI
, ELM_Z
, ELM_ZZ
,
91 ELM_ZH
, ELM_CH
, ELM_CI
, ELM_J
, ELM_JY
, ELM_L
, ELM_LL
,
92 ELM_RX
, ELM_R
, ELM_W
, ELM_Y
, ELM_I
, ELM_E
, ELM_AA
,
93 ELM_U
, ELM_O
, ELM_OO
, ELM_A
, ELM_EE
, ELM_ER
, ELM_AR
,
94 ELM_AW
, ELM_UU
, ELM_AI
, ELM_IE
, ELM_OI
, ELM_OU
, ELM_OV
,
95 ELM_OA
, ELM_IA
, ELM_IB
, ELM_AIR
,ELM_OOR
,ELM_OR
98 #define PHONEME_COUNT 53
100 #define StressDur(e,s) (s,((e->mDU + e->mUD)/2))
105 class PhonemeToElements
112 /* Order is important - 2 byte phonemes first, otherwise
113 the search function will fail*/
114 static PhonemeToElements phoneme_to_elements
[PHONEME_COUNT
] =
116 /* mKey, count, 0-7 elements */
117 /* tS */ 0x5374, 2, ELM_CH
, ELM_CI
, 0, 0, 0, 0, 0,
118 /* dZ */ 0x5a64, 4, ELM_J
, ELM_JY
, ELM_QQ
, ELM_JY
, 0, 0, 0,
119 /* rr */ 0x7272, 3, ELM_R
, ELM_QQ
, ELM_R
, 0, 0, 0, 0,
120 /* eI */ 0x4965, 2, ELM_AI
, ELM_I
, 0, 0, 0, 0, 0,
121 /* aI */ 0x4961, 2, ELM_IE
, ELM_I
, 0, 0, 0, 0, 0,
122 /* oI */ 0x496f, 2, ELM_OI
, ELM_I
, 0, 0, 0, 0, 0,
123 /* aU */ 0x5561, 2, ELM_OU
, ELM_OV
, 0, 0, 0, 0, 0,
124 /* @U */ 0x5540, 2, ELM_OA
, ELM_OV
, 0, 0, 0, 0, 0,
125 /* I@ */ 0x4049, 2, ELM_IA
, ELM_IB
, 0, 0, 0, 0, 0,
126 /* e@ */ 0x4065, 2, ELM_AIR
, ELM_IB
, 0, 0, 0, 0, 0,
127 /* U@ */ 0x4055, 2, ELM_OOR
, ELM_IB
, 0, 0, 0, 0, 0,
128 /* O@ */ 0x404f, 2, ELM_OR
, ELM_IB
, 0, 0, 0, 0, 0,
129 /* oU */ 0x556f, 2, ELM_OI
, ELM_OV
, 0, 0, 0, 0, 0,
130 /* */ 0x0020, 1, ELM_Q
, 0, 0, 0, 0, 0, 0,
131 /* p */ 0x0070, 3, ELM_P
, ELM_PY
, ELM_PZ
, 0, 0, 0, 0,
132 /* t */ 0x0074, 3, ELM_T
, ELM_TY
, ELM_TZ
, 0, 0, 0, 0,
133 /* k */ 0x006b, 3, ELM_K
, ELM_KY
, ELM_KZ
, 0, 0, 0, 0,
134 /* b */ 0x0062, 3, ELM_B
, ELM_BY
, ELM_BZ
, 0, 0, 0, 0,
135 /* d */ 0x0064, 3, ELM_D
, ELM_DY
, ELM_DZ
, 0, 0, 0, 0,
136 /* g */ 0x0067, 3, ELM_G
, ELM_GY
, ELM_GZ
, 0, 0, 0, 0,
137 /* m */ 0x006d, 1, ELM_M
, 0, 0, 0, 0, 0, 0,
138 /* n */ 0x006e, 1, ELM_N
, 0, 0, 0, 0, 0, 0,
139 /* N */ 0x004e, 1, ELM_NG
, 0, 0, 0, 0, 0, 0,
140 /* f */ 0x0066, 1, ELM_F
, 0, 0, 0, 0, 0, 0,
141 /* T */ 0x0054, 1, ELM_TH
, 0, 0, 0, 0, 0, 0,
142 /* s */ 0x0073, 1, ELM_S
, 0, 0, 0, 0, 0, 0,
143 /* S */ 0x0053, 1, ELM_SH
, 0, 0, 0, 0, 0, 0,
144 /* h */ 0x0068, 1, ELM_H
, 0, 0, 0, 0, 0, 0,
145 /* v */ 0x0076, 3, ELM_V
, ELM_QQ
, ELM_V
, 0, 0, 0, 0,
146 /* D */ 0x0044, 3, ELM_DH
, ELM_QQ
, ELM_DI
, 0, 0, 0, 0,
147 /* z */ 0x007a, 3, ELM_Z
, ELM_QQ
, ELM_ZZ
, 0, 0, 0, 0,
148 /* Z */ 0x005a, 3, ELM_ZH
, ELM_QQ
, ELM_ZH
, 0, 0, 0, 0,
149 /* l */ 0x006c, 1, ELM_L
, 0, 0, 0, 0, 0, 0,
150 /* r */ 0x0072, 1, ELM_R
, 0, 0, 0, 0, 0, 0,
151 /* R */ 0x0052, 1, ELM_RX
, 0, 0, 0, 0, 0, 0,
152 /* w */ 0x0077, 1, ELM_W
, 0, 0, 0, 0, 0, 0,
153 /* x */ 0x0078, 1, ELM_X
, 0, 0, 0, 0, 0, 0,
154 /* % */ 0x0025, 1, ELM_QQ
, 0, 0, 0, 0, 0, 0,
155 /* j */ 0x006a, 1, ELM_Y
, 0, 0, 0, 0, 0, 0,
156 /* I */ 0x0049, 1, ELM_I
, 0, 0, 0, 0, 0, 0,
157 /* e */ 0x0065, 1, ELM_E
, 0, 0, 0, 0, 0, 0,
158 /* & */ 0x0026, 1, ELM_AA
, 0, 0, 0, 0, 0, 0,
159 /* V */ 0x0056, 1, ELM_U
, 0, 0, 0, 0, 0, 0,
160 /* 0 */ 0x0030, 1, ELM_O
, 0, 0, 0, 0, 0, 0,
161 /* U */ 0x0055, 1, ELM_OO
, 0, 0, 0, 0, 0, 0,
162 /* @ */ 0x0040, 1, ELM_A
, 0, 0, 0, 0, 0, 0,
163 /* i */ 0x0069, 1, ELM_EE
, 0, 0, 0, 0, 0, 0,
164 /* 3 */ 0x0033, 1, ELM_ER
, 0, 0, 0, 0, 0, 0,
165 /* A */ 0x0041, 1, ELM_AR
, 0, 0, 0, 0, 0, 0,
166 /* O */ 0x004f, 1, ELM_AW
, 0, 0, 0, 0, 0, 0,
167 /* u */ 0x0075, 1, ELM_UU
, 0, 0, 0, 0, 0, 0,
168 /* o */ 0x006f, 1, ELM_OI
, 0, 0, 0, 0, 0, 0,
169 /* . */ 0x002e, 1, ELM_END
,0, 0, 0, 0, 0, 0,
172 static Element gElement
[] =
174 #include "Elements.def"
177 static short clip(float input
)
179 int temp
= (int)input
;
180 /* clip on boundaries of 16-bit word */
194 return (short)(temp
);
197 /* Convert from decibels to a linear scale factor */
198 static float DBtoLIN(int dB
)
201 * Convertion table, db to linear, 87 dB --> 32767
202 * 86 dB --> 29491 (1 dB down = 0.5**1/6)
204 * 81 dB --> 16384 (6 dB down = 0.5)
208 * The just noticeable difference for a change in intensity of a vowel
209 * is approximately 1 dB. Thus all amplitudes are quantized to 1 dB
213 static const float amptable
[88] =
215 0.0, 0.0, 0.0, 0.0, 0.0,
216 0.0, 0.0, 0.0, 0.0, 0.0,
217 0.0, 0.0, 0.0, 6.0, 7.0,
218 8.0, 9.0, 10.0, 11.0, 13.0,
219 14.0, 16.0, 18.0, 20.0, 22.0,
220 25.0, 28.0, 32.0, 35.0, 40.0,
221 45.0, 51.0, 57.0, 64.0, 71.0,
222 80.0, 90.0, 101.0, 114.0, 128.0,
223 142.0, 159.0, 179.0, 202.0, 227.0,
224 256.0, 284.0, 318.0, 359.0, 405.0,
225 455.0, 512.0, 568.0, 638.0, 719.0,
226 811.0, 911.0, 1024.0, 1137.0, 1276.0,
227 1438.0, 1622.0, 1823.0, 2048.0, 2273.0,
228 2552.0, 2875.0, 3244.0, 3645.0, 4096.0,
229 4547.0, 5104.0, 5751.0, 6488.0, 7291.0,
230 8192.0, 9093.0, 10207.0, 11502.0, 12976.0,
231 14582.0, 16384.0, 18350.0, 20644.0, 23429.0,
232 26214.0, 29491.0, 32767.0
235 // Check limits or argument (can be removed in final product)
246 return amptable
[dB
] * 0.001f
;
251 klatt_frame::klatt_frame() :
252 mF0FundamentalFreq(1330), mVoicingAmpdb(60), mFormant1Freq(500),
253 mFormant1Bandwidth(60), mFormant2Freq(1500), mFormant2Bandwidth(90),
254 mFormant3Freq(2800), mFormant3Bandwidth(150), mFormant4Freq(3250),
255 mFormant4Bandwidth(200), mFormant5Freq(3700), mFormant5Bandwidth(200),
256 mFormant6Freq(4990), mFormant6Bandwidth(500), mNasalZeroFreq(270),
257 mNasalZeroBandwidth(100), mNasalPoleFreq(270), mNasalPoleBandwidth(100),
258 mAspirationAmpdb(0), mNoSamplesInOpenPeriod(30), mVoicingBreathiness(0),
259 mVoicingSpectralTiltdb(10), mFricationAmpdb(0), mSkewnessOfAlternatePeriods(0),
260 mFormant1Ampdb(0), mFormant1ParallelBandwidth(80), mFormant2Ampdb(0),
261 mFormant2ParallelBandwidth(200), mFormant3Ampdb(0), mFormant3ParallelBandwidth(350),
262 mFormant4Ampdb(0), mFormant4ParallelBandwidth(500), mFormant5Ampdb(0),
263 mFormant5ParallelBandwidth(600), mFormant6Ampdb(0), mFormant6ParallelBandwidth(800),
264 mParallelNasalPoleAmpdb(0), mBypassFricationAmpdb(0), mPalallelVoicingAmpdb(0),
273 mBaseDeclination(0.5f
),
274 mBaseWaveform(KW_SAW
),
278 mF0FundamentalFreq(0),
280 mSkewnessOfAlternatePeriods(0),
311 This function adds F0 flutter, as specified in:
313 "Analysis, synthesis and perception of voice quality variations among
314 female and male talkers" D.H. Klatt and L.C. Klatt JASA 87(2) February 1990.
315 Flutter is added by applying a quasi-random element constructed from three
316 slowly varying sine waves.
318 void klatt::flutter()
320 int original_f0
= mFrame
.mF0FundamentalFreq
/ 10;
321 float fla
= (float) mF0Flutter
/ 50;
322 float flb
= (float) original_f0
/ 100;
323 float flc
= (float)sin(2 * PI
* 12.7 * mTimeCount
);
324 float fld
= (float)sin(2 * PI
* 7.1 * mTimeCount
);
325 float fle
= (float)sin(2 * PI
* 4.7 * mTimeCount
);
326 float delta_f0
= fla
* flb
* (flc
+ fld
+ fle
) * 10;
327 mF0FundamentalFreq
+= (int) delta_f0
;
330 /* Vwave is the differentiated glottal flow waveform, there is a weak
331 spectral zero around 800 Hz, magic constants a,b reset pitch-synch
334 float klatt::natural_source(int aNper
)
336 // See if glottis open
339 switch (mBaseWaveform
)
342 return ((aNper
% 200) - 100) * 81.92f
; // triangle
344 return (float)(sin(aNper
* 0.0314) * 8192); // sin
346 return ((aNper
% 200) - 100) > 0 ? 8192.0f
: -8192.0f
; // square
348 return ((aNper
% 200) - 100) > 50 ? 8192.0f
: -8192.0f
; // pulse
350 return (int)mNLast
& 1 ? -8192.0f
: 8192.0f
;
352 return (int)mNLast
& 7 ? -8192.0f
: 8192.0f
;
353 case KW_SAW
: // fallthrough
355 return (abs((aNper
% 200) - 100) - 50) * 163.84f
; // saw
366 /* Reset selected parameters pitch-synchronously */
368 void klatt::pitch_synch_par_reset(int ns
)
370 if (mF0FundamentalFreq
> 0)
372 mT0
= (40 * mSampleRate
) / mF0FundamentalFreq
;
374 /* Period in samp*4 */
375 mAmpVoice
= DBtoLIN(mVoicingAmpdb
);
377 /* Duration of period before amplitude modulation */
380 if (mVoicingAmpdb
> 0)
385 /* Breathiness of voicing waveform */
387 mAmpBreth
= DBtoLIN(mFrame
.mVoicingBreathiness
) * 0.1f
;
389 /* Set open phase of glottal period */
390 /* where 40 <= open phase <= 263 */
392 mNOpen
= 4 * mFrame
.mNoSamplesInOpenPeriod
;
394 if (mNOpen
>= (mT0
- 1))
401 mNOpen
= 40; /* F0 max = 1000 Hz */
407 temp
= mSampleRate
/ mNOpen
;
408 mCritDampedGlotLowPassFilter
.initResonator(0L, temp
, mSampleRate
);
410 /* Make gain at F1 about constant */
412 temp1
= mNOpen
* .00833f
;
413 mCritDampedGlotLowPassFilter
.setGain(temp1
* temp1
);
415 /* Truncate skewness so as not to exceed duration of closed phase
420 if (mSkewnessOfAlternatePeriods
> temp
)
422 mSkewnessOfAlternatePeriods
= temp
;
427 mSkew
= mSkewnessOfAlternatePeriods
; /* Reset mSkew to requested mSkewnessOfAlternatePeriods */
431 mSkew
= -mSkewnessOfAlternatePeriods
;
434 /* Add skewness to closed portion of voicing period */
441 mT0
= 4; /* Default for f0 undefined */
447 /* Reset these pars pitch synchronously or at update rate if f0=0 */
449 if ((mT0
!= 4) || (ns
== 0))
451 /* Set one-pole ELM_FEATURE_LOW-pass filter that tilts glottal source */
452 mDecay
= (0.033f
* mFrame
.mVoicingSpectralTiltdb
); /* Function of samp_rate ? */
456 mOneMd
= 1.0f
- mDecay
;
466 /* Get variable parameters from host computer,
467 initially also get definition of fixed pars
470 void klatt::frame_init()
472 int mOverallGaindb
; /* Overall gain, 60 dB is unity 0 to 60 */
473 float amp_parF1
; /* mFormant1Ampdb converted to linear gain */
474 float amp_parFN
; /* mParallelNasalPoleAmpdb converted to linear gain */
475 float amp_parF2
; /* mFormant2Ampdb converted to linear gain */
476 float amp_parF3
; /* mFormant3Ampdb converted to linear gain */
477 float amp_parF4
; /* mFormant4Ampdb converted to linear gain */
478 float amp_parF5
; /* mFormant5Ampdb converted to linear gain */
479 float amp_parF6
; /* mFormant6Ampdb converted to linear gain */
481 /* Read speech frame definition into temp store
482 and move some parameters into active use immediately
483 (voice-excited ones are updated pitch synchronously
484 to avoid waveform glitches).
487 mF0FundamentalFreq
= mFrame
.mF0FundamentalFreq
;
488 mVoicingAmpdb
= mFrame
.mVoicingAmpdb
- 7;
490 if (mVoicingAmpdb
< 0) mVoicingAmpdb
= 0;
492 mAmpAspir
= DBtoLIN(mFrame
.mAspirationAmpdb
) * .05f
;
493 mAmpFrica
= DBtoLIN(mFrame
.mFricationAmpdb
) * 0.25f
;
494 mSkewnessOfAlternatePeriods
= mFrame
.mSkewnessOfAlternatePeriods
;
496 /* Fudge factors (which comprehend affects of formants on each other?)
497 with these in place ALL_PARALLEL should sound as close as
498 possible to CASCADE_PARALLEL.
499 Possible problem feeding in Holmes's amplitudes given this.
501 amp_parF1
= DBtoLIN(mFrame
.mFormant1Ampdb
) * 0.4f
; /* -7.96 dB */
502 amp_parF2
= DBtoLIN(mFrame
.mFormant2Ampdb
) * 0.15f
; /* -16.5 dB */
503 amp_parF3
= DBtoLIN(mFrame
.mFormant3Ampdb
) * 0.06f
; /* -24.4 dB */
504 amp_parF4
= DBtoLIN(mFrame
.mFormant4Ampdb
) * 0.04f
; /* -28.0 dB */
505 amp_parF5
= DBtoLIN(mFrame
.mFormant5Ampdb
) * 0.022f
; /* -33.2 dB */
506 amp_parF6
= DBtoLIN(mFrame
.mFormant6Ampdb
) * 0.03f
; /* -30.5 dB */
507 amp_parFN
= DBtoLIN(mFrame
.mParallelNasalPoleAmpdb
) * 0.6f
; /* -4.44 dB */
508 mAmpBypas
= DBtoLIN(mFrame
.mBypassFricationAmpdb
) * 0.05f
; /* -26.0 db */
510 // Set coeficients of nasal resonator and zero antiresonator
511 mNasalPole
.initResonator(mFrame
.mNasalPoleFreq
, mFrame
.mNasalPoleBandwidth
, mSampleRate
);
513 mNasalZero
.initAntiresonator(mFrame
.mNasalZeroFreq
, mFrame
.mNasalZeroBandwidth
, mSampleRate
);
515 // Set coefficients of parallel resonators, and amplitude of outputs
516 mParallelFormant1
.initResonator(mFrame
.mFormant1Freq
, mFrame
.mFormant1ParallelBandwidth
, mSampleRate
);
517 mParallelFormant1
.setGain(amp_parF1
);
519 mParallelResoNasalPole
.initResonator(mFrame
.mNasalPoleFreq
, mFrame
.mNasalPoleBandwidth
, mSampleRate
);
520 mParallelResoNasalPole
.setGain(amp_parFN
);
522 mParallelFormant2
.initResonator(mFrame
.mFormant2Freq
, mFrame
.mFormant2ParallelBandwidth
, mSampleRate
);
523 mParallelFormant2
.setGain(amp_parF2
);
525 mParallelFormant3
.initResonator(mFrame
.mFormant3Freq
, mFrame
.mFormant3ParallelBandwidth
, mSampleRate
);
526 mParallelFormant3
.setGain(amp_parF3
);
528 mParallelFormant4
.initResonator(mFrame
.mFormant4Freq
, mFrame
.mFormant4ParallelBandwidth
, mSampleRate
);
529 mParallelFormant4
.setGain(amp_parF4
);
531 mParallelFormant5
.initResonator(mFrame
.mFormant5Freq
, mFrame
.mFormant5ParallelBandwidth
, mSampleRate
);
532 mParallelFormant5
.setGain(amp_parF5
);
534 mParallelFormant6
.initResonator(mFrame
.mFormant6Freq
, mFrame
.mFormant6ParallelBandwidth
, mSampleRate
);
535 mParallelFormant6
.setGain(amp_parF6
);
538 /* fold overall gain into output resonator */
539 mOverallGaindb
= mFrame
.mOverallGaindb
- 3;
541 if (mOverallGaindb
<= 0)
544 /* output ELM_FEATURE_LOW-pass filter - resonator with freq 0 and BW = globals->mSampleRate
545 Thus 3db point is globals->mSampleRate/2 i.e. Nyquist limit.
546 Only 3db down seems rather mild...
548 mOutputLowPassFilter
.initResonator(0L, (int)mSampleRate
, mSampleRate
);
549 mOutputLowPassFilter
.setGain(DBtoLIN(mOverallGaindb
));
555 CONVERT FRAME OF PARAMETER DATA TO A WAVEFORM CHUNK
556 Synthesize globals->mNspFr samples of waveform and store in jwave[].
559 void klatt::parwave(short int *jwave
)
561 /* Output of cascade branch, also final output */
563 /* Initialize synthesizer and get specification for current speech
564 frame from host microcomputer */
570 mTimeCount
++; /* used for f0 flutter */
571 flutter(); /* add f0 flutter */
574 /* MAIN LOOP, for each output sample of current frame: */
577 for (ns
= 0; ns
< mNspFr
; ns
++)
581 float sourc
; /* Sound source if all-parallel config used */
582 float glotout
; /* Output of glottal sound source */
583 float par_glotout
; /* Output of parallelglottal sound sourc */
584 float voice
= 0; /* Current sample of voicing waveform */
585 float frics
; /* Frication sound source */
586 float aspiration
; /* Aspiration sound source */
587 int nrand
; /* Varible used by random number generator */
589 /* Our own code like rand(), but portable
590 whole upper 31 bits of seed random
591 assumes 32-bit unsigned arithmetic
592 with untested code to handle larger.
594 mSeed
= mSeed
* 1664525 + 1;
598 /* Shift top bits of seed up to top of int then back down to LS 14 bits */
599 /* Assumes 8 bits per sizeof unit i.e. a "byte" */
600 nrand
= (((int) mSeed
) << (8 * sizeof(int) - 32)) >> (8 * sizeof(int) - 14);
602 /* Tilt down noise spectrum by soft ELM_FEATURE_LOW-pass filter having
603 * a pole near the origin in the z-plane, i.e.
604 * output = input + (0.75 * lastoutput) */
606 noise
= nrand
+ (0.75f
* mNLast
); /* Function of samp_rate ? */
610 /* Amplitude modulate noise (reduce noise amplitude during
611 second half of glottal period) if voicing simultaneously present
619 /* Compute frication noise */
620 sourc
= frics
= mAmpFrica
* noise
;
622 /* Compute voicing waveform : (run glottal source simulation at
623 4 times normal sample rate to minimize quantization noise in
624 period of female voice)
627 for (n4
= 0; n4
< 4; n4
++)
629 /* use a more-natural-shaped source waveform with excitation
630 occurring both upon opening and upon closure, stronest at closure */
631 voice
= natural_source(mNPer
);
633 /* Reset period when counter 'mNPer' reaches mT0 */
638 pitch_synch_par_reset(ns
);
641 /* Low-pass filter voicing waveform before downsampling from 4*globals->mSampleRate */
642 /* to globals->mSampleRate samples/sec. Resonator f=.09*globals->mSampleRate, bw=.06*globals->mSampleRate */
644 voice
= mDownSampLowPassFilter
.resonate(voice
); /* in=voice, out=voice */
646 /* Increment counter that keeps track of 4*globals->mSampleRate samples/sec */
650 /* Tilt spectrum of voicing source down by soft ELM_FEATURE_LOW-pass filtering, amount
651 of tilt determined by mVoicingSpectralTiltdb
653 voice
= (voice
* mOneMd
) + (mVLast
* mDecay
);
657 /* Add breathiness during glottal open phase */
660 /* Amount of breathiness determined by parameter mVoicingBreathiness */
661 /* Use nrand rather than noise because noise is ELM_FEATURE_LOW-passed */
662 voice
+= mAmpBreth
* nrand
;
665 /* Set amplitude of voicing */
666 glotout
= mAmpVoice
* voice
;
668 /* Compute aspiration amplitude and add to voicing source */
669 aspiration
= mAmpAspir
* noise
;
671 glotout
+= aspiration
;
673 par_glotout
= glotout
;
675 /* NIS - rsynth "hack"
676 As Holmes' scheme is weak at nasals and (physically) nasal cavity
677 is "back near glottis" feed glottal source through nasal resonators
678 Don't think this is quite right, but improves things a bit
680 par_glotout
= mNasalZero
.antiresonate(par_glotout
);
681 par_glotout
= mNasalPole
.resonate(par_glotout
);
682 /* And just use mParallelFormant1 NOT mParallelResoNasalPole */
683 float out
= mParallelFormant1
.resonate(par_glotout
);
684 /* Sound sourc for other parallel resonators is frication
685 plus first difference of voicing waveform.
687 sourc
+= (par_glotout
- mGlotLast
);
688 mGlotLast
= par_glotout
;
690 /* Standard parallel vocal tract
691 Formants F6,F5,F4,F3,F2, outputs added with alternating sign
693 out
= mParallelFormant6
.resonate(sourc
) - out
;
694 out
= mParallelFormant5
.resonate(sourc
) - out
;
695 out
= mParallelFormant4
.resonate(sourc
) - out
;
696 out
= mParallelFormant3
.resonate(sourc
) - out
;
697 out
= mParallelFormant2
.resonate(sourc
) - out
;
699 out
= mAmpBypas
* sourc
- out
;
700 out
= mOutputLowPassFilter
.resonate(out
);
702 *jwave
++ = clip(out
); /* Convert back to integer */
708 static char * phoneme_to_element_lookup(char *s
, void ** data
)
711 int key16
= key8
+ (s
[1] << 8);
712 if (s
[1] == 0) key16
= -1; // avoid key8==key16
714 for (i
= 0; i
< PHONEME_COUNT
; i
++)
716 if (phoneme_to_elements
[i
].mKey
== key16
)
718 *data
= &phoneme_to_elements
[i
].mData
;
721 if (phoneme_to_elements
[i
].mKey
== key8
)
723 *data
= &phoneme_to_elements
[i
].mData
;
727 // should never happen
734 int klatt::phone_to_elm(char *aPhoneme
, int aCount
, darray
*aElement
)
739 char *limit
= s
+ aCount
;
741 while (s
< limit
&& *s
)
744 s
= phoneme_to_element_lookup(s
, (void**)&e
);
753 Element
* p
= &gElement
[x
];
754 /* This works because only vowels have mUD != mDU,
755 and we set stress just before a vowel
759 if (!(p
->mFeat
& ELM_FEATURE_VWL
))
762 int stressdur
= StressDur(p
,stress
);
766 aElement
->put(stressdur
);
767 aElement
->put(stress
);
778 case '\'': /* Primary stress */
782 case ',': /* Secondary stress */
786 case '+': /* Tertiary stress */
790 case '-': /* hyphen in input */
794 // fprintf(stderr, "Ignoring %c in '%.*s'\n", ch, aCount, aPhoneme);
805 /* 'a' is dominant element, 'b' is dominated
806 ext is flag to say to use external times from 'a' rather
807 than internal i.e. ext != 0 if 'a' is NOT current element.
810 static void set_trans(Slope
*t
, Element
* a
, Element
* b
,int ext
, int /* e */)
814 for (i
= 0; i
< ELM_COUNT
; i
++)
816 t
[i
].mTime
= ((ext
) ? a
->mInterpolator
[i
].mExtDelay
: a
->mInterpolator
[i
].mIntDelay
);
820 t
[i
].mValue
= a
->mInterpolator
[i
].mFixed
+ (a
->mInterpolator
[i
].mProportion
* b
->mInterpolator
[i
].mSteady
) * 0.01f
; // mProportion is in scale 0..100, so *0.01.
824 t
[i
].mValue
= b
->mInterpolator
[i
].mSteady
;
829 static float lerp(float a
, float b
, int t
, int d
)
841 float f
= (float)t
/ (float)d
;
842 return a
+ (b
- a
) * f
;
845 static float interpolate(Slope
*aStartSlope
, Slope
*aEndSlope
, float aMidValue
, int aTime
, int aDuration
)
847 int steadyTime
= aDuration
- (aStartSlope
->mTime
+ aEndSlope
->mTime
);
851 // Interpolate to a midpoint, stay there for a while, then interpolate to end
853 if (aTime
< aStartSlope
->mTime
)
855 // interpolate to the first value
856 return lerp(aStartSlope
->mValue
, aMidValue
, aTime
, aStartSlope
->mTime
);
860 aTime
-= aStartSlope
->mTime
;
862 if (aTime
<= steadyTime
)
864 // still at steady state
868 // interpolate to the end
869 return lerp(aMidValue
, aEndSlope
->mValue
, aTime
- steadyTime
, aEndSlope
->mTime
);
874 float f
= 1.0f
- ((float) aTime
/ (float) aDuration
);
875 float sp
= lerp(aStartSlope
->mValue
, aMidValue
, aTime
, aStartSlope
->mTime
);
876 float ep
= lerp(aEndSlope
->mValue
, aMidValue
, aDuration
- aTime
, aEndSlope
->mTime
);
877 return f
* sp
+ ((float) 1.0 - f
) * ep
;
883 void klatt::initsynth(int aElementCount
,unsigned char *aElement
)
886 mElementCount
= aElementCount
;
888 mLastElement
= &gElement
[0];
892 mFrame
.mF0FundamentalFreq
= mBaseF0
;
893 mTop
= 1.1f
* mFrame
.mF0FundamentalFreq
;
894 mFrame
.mNasalPoleFreq
= (int)mLastElement
->mInterpolator
[ELM_FN
].mSteady
;
895 mFrame
.mFormant1ParallelBandwidth
= mFrame
.mFormant1Bandwidth
= 60;
896 mFrame
.mFormant2ParallelBandwidth
= mFrame
.mFormant2Bandwidth
= 90;
897 mFrame
.mFormant3ParallelBandwidth
= mFrame
.mFormant3Bandwidth
= 150;
898 // mFrame.mFormant4ParallelBandwidth = (default)
900 // Set stress attack/decay slope
903 mStressE
.mValue
= 0.0;
906 int klatt::synth(int /* aSampleCount */, short *aSamplePointer
)
908 short *samp
= aSamplePointer
;
910 if (mElementIndex
>= mElementCount
)
913 Element
* currentElement
= &gElement
[mElement
[mElementIndex
++]];
914 int dur
= mElement
[mElementIndex
++];
915 mElementIndex
++; // skip stress
917 if (currentElement
->mRK
== 31) // "END"
919 // Reset the fundamental frequency top
920 mFrame
.mF0FundamentalFreq
= mBaseF0
;
921 mTop
= 1.1f
* mFrame
.mF0FundamentalFreq
;
924 // Skip zero length elements which are only there to affect
925 // boundary values of adjacent elements
929 Element
* ne
= (mElementIndex
< mElementCount
) ? &gElement
[mElement
[mElementIndex
]] : &gElement
[0];
930 Slope start
[ELM_COUNT
];
931 Slope end
[ELM_COUNT
];
934 if (currentElement
->mRK
> mLastElement
->mRK
)
936 set_trans(start
, currentElement
, mLastElement
, 0, 's');
941 set_trans(start
, mLastElement
, currentElement
, 1, 's');
945 if (ne
->mRK
> currentElement
->mRK
)
947 set_trans(end
, ne
, currentElement
, 1, 'e');
952 set_trans(end
, currentElement
, ne
, 0, 'e');
956 for (t
= 0; t
< dur
; t
++, mTStress
++)
958 float base
= mTop
* 0.8f
; // 3 * top / 5
961 if (mTStress
== mNTStress
)
963 int j
= mElementIndex
;
968 while (j
<= mElementCount
)
970 Element
* e
= (j
< mElementCount
) ? &gElement
[mElement
[j
++]] : &gElement
[0];
971 int du
= (j
< mElementCount
) ? mElement
[j
++] : 0;
972 int s
= (j
< mElementCount
) ? mElement
[j
++] : 3;
974 if (s
|| e
->mFeat
& ELM_FEATURE_VWL
)
979 mStressE
.mValue
= (float) s
/ 3;
981 mStressE
.mValue
= (float) 0.1;
986 e
= (j
< mElementCount
) ? &gElement
[mElement
[j
++]] : &gElement
[0];
990 while ((e
->mFeat
& ELM_FEATURE_VWL
) && mElement
[j
++] == s
);
1002 for (j
= 0; j
< ELM_COUNT
; j
++)
1004 tp
[j
] = interpolate(&start
[j
], &end
[j
], (float) currentElement
->mInterpolator
[j
].mSteady
, t
, dur
);
1007 // Now call the synth for each frame
1009 mFrame
.mF0FundamentalFreq
= (int)(base
+ (mTop
- base
) * interpolate(&mStressS
, &mStressE
, (float)0, mTStress
, mNTStress
));
1010 mFrame
.mVoicingAmpdb
= mFrame
.mPalallelVoicingAmpdb
= (int)tp
[ELM_AV
];
1011 mFrame
.mFricationAmpdb
= (int)tp
[ELM_AF
];
1012 mFrame
.mNasalZeroFreq
= (int)tp
[ELM_FN
];
1013 mFrame
.mAspirationAmpdb
= (int)tp
[ELM_ASP
];
1014 mFrame
.mVoicingBreathiness
= (int)tp
[ELM_AVC
];
1015 mFrame
.mFormant1ParallelBandwidth
= mFrame
.mFormant1Bandwidth
= (int)tp
[ELM_B1
];
1016 mFrame
.mFormant2ParallelBandwidth
= mFrame
.mFormant2Bandwidth
= (int)tp
[ELM_B2
];
1017 mFrame
.mFormant3ParallelBandwidth
= mFrame
.mFormant3Bandwidth
= (int)tp
[ELM_B3
];
1018 mFrame
.mFormant1Freq
= (int)tp
[ELM_F1
];
1019 mFrame
.mFormant2Freq
= (int)tp
[ELM_F2
];
1020 mFrame
.mFormant3Freq
= (int)tp
[ELM_F3
];
1022 // AMP_ADJ + is a kludge to get amplitudes up to klatt-compatible levels
1025 //pars.mParallelNasalPoleAmpdb = AMP_ADJ + tp[ELM_AN];
1027 mFrame
.mBypassFricationAmpdb
= AMP_ADJ
+ (int)tp
[ELM_AB
];
1028 mFrame
.mFormant5Ampdb
= AMP_ADJ
+ (int)tp
[ELM_A5
];
1029 mFrame
.mFormant6Ampdb
= AMP_ADJ
+ (int)tp
[ELM_A6
];
1030 mFrame
.mFormant1Ampdb
= AMP_ADJ
+ (int)tp
[ELM_A1
];
1031 mFrame
.mFormant2Ampdb
= AMP_ADJ
+ (int)tp
[ELM_A2
];
1032 mFrame
.mFormant3Ampdb
= AMP_ADJ
+ (int)tp
[ELM_A3
];
1033 mFrame
.mFormant4Ampdb
= AMP_ADJ
+ (int)tp
[ELM_A4
];
1039 // Declination of f0 envelope 0.25Hz / cS
1040 mTop
-= mBaseDeclination
;// 0.5;
1044 mLastElement
= currentElement
;
1046 return (int)(samp
- aSamplePointer
);
1050 void klatt::init(int aBaseFrequency
, float aBaseSpeed
, float aBaseDeclination
, int aBaseWaveform
)
1052 mBaseF0
= aBaseFrequency
;
1053 mBaseSpeed
= aBaseSpeed
;
1054 mBaseDeclination
= aBaseDeclination
;
1055 mBaseWaveform
= aBaseWaveform
;
1057 mSampleRate
= 11025;
1059 mF0FundamentalFreq
= mBaseF0
;
1060 mFrame
.mF0FundamentalFreq
= mBaseF0
;
1062 int FLPhz
= (950 * mSampleRate
) / 10000;
1063 int BLPhz
= (630 * mSampleRate
) / 10000;
1064 mNspFr
= (int)(mSampleRate
* mBaseSpeed
) / 1000;
1066 mDownSampLowPassFilter
.initResonator(FLPhz
, BLPhz
, mSampleRate
);
1071 mVLast
= 0; /* Previous output of voice */
1072 mNLast
= 0; /* Previous output of random number generator */
1073 mGlotLast
= 0; /* Previous value of glotout */