]> git.bts.cx Git - benzene.git/blob - third_party/soloud_speech/tts.cpp
Update git module URL
[benzene.git] / third_party / soloud_speech / tts.cpp
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <ctype.h>
4 #include <string.h>
5 #include "darray.h"
6 #include "tts.h"
7
8 static const char *ASCII[] =
9 {
10 "null", "", "", "",
11 "", "", "", "",
12 "", "", "", "",
13 "", "", "", "",
14 "", "", "", "",
15 "", "", "", "",
16 "", "", "", "",
17 "", "", "", "",
18 "space", "exclamation mark", "double quote", "hash",
19 "dollar", "percent", "ampersand", "quote",
20 "open parenthesis", "close parenthesis", "asterisk", "plus",
21 "comma", "minus", "full stop", "slash",
22 "zero", "one", "two", "three",
23 "four", "five", "six", "seven",
24 "eight", "nine", "colon", "semi colon",
25 "less than", "equals", "greater than", "question mark",
26 #ifndef ALPHA_IN_DICT
27 "at", "ay", "bee", "see",
28 "dee", "e", "eff", "gee",
29 "aych", "i", "jay", "kay",
30 "ell", "em", "en", "ohe",
31 "pee", "kju", "are", "es",
32 "tee", "you", "vee", "double you",
33 "eks", "why", "zed", "open bracket",
34 #else /* ALPHA_IN_DICT */
35 "at", "A", "B", "C",
36 "D", "E", "F", "G",
37 "H", "I", "J", "K",
38 "L", "M", "N", "O",
39 "P", "Q", "R", "S",
40 "T", "U", "V", "W",
41 "X", "Y", "Z", "open bracket",
42 #endif /* ALPHA_IN_DICT */
43 "back slash", "close bracket", "circumflex", "underscore",
44 #ifndef ALPHA_IN_DICT
45 "back quote", "ay", "bee", "see",
46 "dee", "e", "eff", "gee",
47 "aych", "i", "jay", "kay",
48 "ell", "em", "en", "ohe",
49 "pee", "kju", "are", "es",
50 "tee", "you", "vee", "double you",
51 "eks", "why", "zed", "open brace",
52 #else /* ALPHA_IN_DICT */
53 "back quote", "A", "B", "C",
54 "D", "E", "F", "G",
55 "H", "I", "J", "K",
56 "L", "M", "N", "O",
57 "P", "Q", "R", "S",
58 "T", "U", "V", "W",
59 "X", "Y", "Z", "open brace",
60 #endif /* ALPHA_IN_DICT */
61 "vertical bar", "close brace", "tilde", "delete",
62 NULL
63 };
64
65 /* Context definitions */
66 static const char Anything[] = "";
67 /* No context requirement */
68
69 static const char Nothing[] = " ";
70 /* Context is beginning or end of word */
71
72 static const char Silent[] = "";
73 /* No phonemes */
74
75
76 #define LEFT_PART 0
77 #define MATCH_PART 1
78 #define RIGHT_PART 2
79 #define OUT_PART 3
80
81 typedef const char *Rule[4];
82 /* Rule is an array of 4 character pointers */
83
84
85 /*0 = Punctuation */
86 /*
87 ** LEFT_PART MATCH_PART RIGHT_PART OUT_PART
88 */
89
90
91 static Rule punct_rules[] =
92 {
93 {Anything, " ", Anything, " "},
94 {Anything, "-", Anything, ""},
95 {".", "'S", Anything, "z"},
96 {"#:.E", "'S", Anything, "z"},
97 {"#", "'S", Anything, "z"},
98 {Anything, "'", Anything, ""},
99 {Anything, ",", Anything, " "},
100 {Anything, ".", Anything, " "},
101 {Anything, "?", Anything, " "},
102 {Anything, "!", Anything, " "},
103 {Anything, 0, Anything, Silent},
104 };
105
106 static Rule A_rules[] =
107 {
108 {Anything, "A", Nothing, "@"},
109 {Nothing, "ARE", Nothing, "0r"},
110 {Nothing, "AR", "O", "@r"},
111 {Anything, "AR", "#", "er"},
112 {"^", "AS", "#", "eIs"},
113 {Anything, "A", "WA", "@"},
114 {Anything, "AW", Anything, "O"},
115 {" :", "ANY", Anything, "eni"},
116 {Anything, "A", "^+#", "eI"},
117 {"#:", "ALLY", Anything, "@li"},
118 {Nothing, "AL", "#", "@l"},
119 {Anything, "AGAIN", Anything, "@gen"},
120 {"#:", "AG", "E", "IdZ"},
121 {Anything, "A", "^+:#", "&"},
122 {" :", "A", "^+ ", "eI"},
123 {Anything, "A", "^%", "eI"},
124 {Nothing, "ARR", Anything, "@r"},
125 {Anything, "ARR", Anything, "&r"},
126 {" :", "AR", Nothing, "0r"},
127 {Anything, "AR", Nothing, "3"},
128 {Anything, "AR", Anything, "0r"},
129 {Anything, "AIR", Anything, "er"},
130 {Anything, "AI", Anything, "eI"},
131 {Anything, "AY", Anything, "eI"},
132 {Anything, "AU", Anything, "O"},
133 {"#:", "AL", Nothing, "@l"},
134 {"#:", "ALS", Nothing, "@lz"},
135 {Anything, "ALK", Anything, "Ok"},
136 {Anything, "AL", "^", "Ol"},
137 {" :", "ABLE", Anything, "eIb@l"},
138 {Anything, "ABLE", Anything, "@b@l"},
139 {Anything, "ANG", "+", "eIndZ"},
140 {"^", "A", "^#", "eI"},
141 {Anything, "A", Anything, "&"},
142 {Anything, 0, Anything, Silent},
143 };
144
145 static Rule B_rules[] =
146 {
147 {Nothing, "BE", "^#", "bI"},
148 {Anything, "BEING", Anything, "biIN"},
149 {Nothing, "BOTH", Nothing, "b@UT"},
150 {Nothing, "BUS", "#", "bIz"},
151 {Anything, "BUIL", Anything, "bIl"},
152 {Anything, "B", Anything, "b"},
153 {Anything, 0, Anything, Silent},
154 };
155
156 static Rule C_rules[] =
157 {
158 {Nothing, "CH", "^", "k"},
159 {"^E", "CH", Anything, "k"},
160 {Anything, "CH", Anything, "tS"},
161 {" S", "CI", "#", "saI"},
162 {Anything, "CI", "A", "S"},
163 {Anything, "CI", "O", "S"},
164 {Anything, "CI", "EN", "S"},
165 {Anything, "C", "+", "s"},
166 {Anything, "CK", Anything, "k"},
167 {Anything, "COM", "%", "kVm"},
168 {Anything, "C", Anything, "k"},
169 {Anything, 0, Anything, Silent},
170 };
171
172 static Rule D_rules[] =
173 {
174 {"#:", "DED", Nothing, "dId"},
175 {".E", "D", Nothing, "d"},
176 {"#:^E", "D", Nothing, "t"},
177 {Nothing, "DE", "^#", "dI"},
178 {Nothing, "DO", Nothing, "mDU"},
179 {Nothing, "DOES", Anything, "dVz"},
180 {Nothing, "DOING", Anything, "duIN"},
181 {Nothing, "DOW", Anything, "daU"},
182 {Anything, "DU", "A", "dZu"},
183 {Anything, "D", Anything, "d"},
184 {Anything, 0, Anything, Silent},
185 };
186
187 static Rule E_rules[] =
188 {
189 {"#:", "E", Nothing, ""},
190 {"':^", "E", Nothing, ""},
191 {" :", "E", Nothing, "i"},
192 {"#", "ED", Nothing, "d"},
193 {"#:", "E", "D ", ""},
194 {Anything, "EV", "ER", "ev"},
195 {Anything, "E", "^%", "i"},
196 {Anything, "ERI", "#", "iri"},
197 {Anything, "ERI", Anything, "erI"},
198 {"#:", "ER", "#", "3"},
199 {Anything, "ER", "#", "er"},
200 {Anything, "ER", Anything, "3"},
201 {Nothing, "EVEN", Anything, "iven"},
202 {"#:", "E", "W", ""},
203 {"T", "EW", Anything, "u"},
204 {"S", "EW", Anything, "u"},
205 {"R", "EW", Anything, "u"},
206 {"D", "EW", Anything, "u"},
207 {"L", "EW", Anything, "u"},
208 {"Z", "EW", Anything, "u"},
209 {"N", "EW", Anything, "u"},
210 {"J", "EW", Anything, "u"},
211 {"TH", "EW", Anything, "u"},
212 {"CH", "EW", Anything, "u"},
213 {"SH", "EW", Anything, "u"},
214 {Anything, "EW", Anything, "ju"},
215 {Anything, "E", "O", "i"},
216 {"#:S", "ES", Nothing, "Iz"},
217 {"#:C", "ES", Nothing, "Iz"},
218 {"#:G", "ES", Nothing, "Iz"},
219 {"#:Z", "ES", Nothing, "Iz"},
220 {"#:X", "ES", Nothing, "Iz"},
221 {"#:J", "ES", Nothing, "Iz"},
222 {"#:CH", "ES", Nothing, "Iz"},
223 {"#:SH", "ES", Nothing, "Iz"},
224 {"#:", "E", "S ", ""},
225 {"#:", "ELY", Nothing, "li"},
226 {"#:", "EMENT", Anything, "ment"},
227 {Anything, "EFUL", Anything, "fUl"},
228 {Anything, "EE", Anything, "i"},
229 {Anything, "EARN", Anything, "3n"},
230 {Nothing, "EAR", "^", "3"},
231 {Anything, "EAD", Anything, "ed"},
232 {"#:", "EA", Nothing, "i@"},
233 {Anything, "EA", "SU", "e"},
234 {Anything, "EA", Anything, "i"},
235 {Anything, "EIGH", Anything, "eI"},
236 {Anything, "EI", Anything, "i"},
237 {Nothing, "EYE", Anything, "aI"},
238 {Anything, "EY", Anything, "i"},
239 {Anything, "EU", Anything, "ju"},
240 {Anything, "E", Anything, "e"},
241 {Anything, 0, Anything, Silent},
242 };
243
244 static Rule F_rules[] =
245 {
246 {Anything, "FUL", Anything, "fUl"},
247 {Anything, "F", Anything, "f"},
248 {Anything, 0, Anything, Silent},
249 };
250
251 static Rule G_rules[] =
252 {
253 {Anything, "GIV", Anything, "gIv"},
254 {Nothing, "G", "I^", "g"},
255 {Anything, "GE", "T", "ge"},
256 {"SU", "GGES", Anything, "gdZes"},
257 {Anything, "GG", Anything, "g"},
258 {" B#", "G", Anything, "g"},
259 {Anything, "G", "+", "dZ"},
260 {Anything, "GREAT", Anything, "greIt"},
261 {"#", "GH", Anything, ""},
262 {Anything, "G", Anything, "g"},
263 {Anything, 0, Anything, Silent},
264 };
265
266 static Rule H_rules[] =
267 {
268 {Nothing, "HAV", Anything, "h&v"},
269 {Nothing, "HERE", Anything, "hir"},
270 {Nothing, "HOUR", Anything, "aU3"},
271 {Anything, "HOW", Anything, "haU"},
272 {Anything, "H", "#", "h"},
273 {Anything, "H", Anything, ""},
274 {Anything, 0, Anything, Silent},
275 };
276
277 static Rule I_rules[] =
278 {
279 {Nothing, "IAIN", Nothing, "I@n"},
280 {Nothing, "ING", Nothing, "IN"},
281 {Nothing, "IN", Anything, "In"},
282 {Nothing, "I", Nothing, "aI"},
283 {Anything, "IN", "D", "aIn"},
284 {Anything, "IER", Anything, "i3"},
285 {"#:R", "IED", Anything, "id"},
286 {Anything, "IED", Nothing, "aId"},
287 {Anything, "IEN", Anything, "ien"},
288 {Anything, "IE", "T", "aIe"},
289 {" :", "I", "%", "aI"},
290 {Anything, "I", "%", "i"},
291 {Anything, "IE", Anything, "i"},
292 {Anything, "I", "^+:#", "I"},
293 {Anything, "IR", "#", "aIr"},
294 {Anything, "IZ", "%", "aIz"},
295 {Anything, "IS", "%", "aIz"},
296 {Anything, "I", "D%", "aI"},
297 {"+^", "I", "^+", "I"},
298 {Anything, "I", "T%", "aI"},
299 {"#:^", "I", "^+", "I"},
300 {Anything, "I", "^+", "aI"},
301 {Anything, "IR", Anything, "3"},
302 {Anything, "IGH", Anything, "aI"},
303 {Anything, "ILD", Anything, "aIld"},
304 {Anything, "IGN", Nothing, "aIn"},
305 {Anything, "IGN", "^", "aIn"},
306 {Anything, "IGN", "%", "aIn"},
307 {Anything, "IQUE", Anything, "ik"},
308 {"^", "I", "^#", "aI"},
309 {Anything, "I", Anything, "I"},
310 {Anything, 0, Anything, Silent},
311 };
312
313 static Rule J_rules[] =
314 {
315 {Anything, "J", Anything, "dZ"},
316 {Anything, 0, Anything, Silent},
317 };
318
319 static Rule K_rules[] =
320 {
321 {Nothing, "K", "N", ""},
322 {Anything, "K", Anything, "k"},
323 {Anything, 0, Anything, Silent},
324 };
325
326 static Rule L_rules[] =
327 {
328 {Anything, "LO", "C#", "l@U"},
329 {"L", "L", Anything, ""},
330 {"#:^", "L", "%", "@l"},
331 {Anything, "LEAD", Anything, "lid"},
332 {Anything, "L", Anything, "l"},
333 {Anything, 0, Anything, Silent},
334 };
335
336 static Rule M_rules[] =
337 {
338 {Anything, "MOV", Anything, "muv"},
339 {"#", "MM", "#", "m"},
340 {Anything, "M", Anything, "m"},
341 {Anything, 0, Anything, Silent},
342 };
343
344 static Rule N_rules[] =
345 {
346 {"E", "NG", "+", "ndZ"},
347 {Anything, "NG", "R", "Ng"},
348 {Anything, "NG", "#", "Ng"},
349 {Anything, "NGL", "%", "Ng@l"},
350 {Anything, "NG", Anything, "N"},
351 {Anything, "NK", Anything, "Nk"},
352 {Nothing, "NOW", Nothing, "naU"},
353 {"#", "NG", Nothing, "Ng"},
354 {Anything, "N", Anything, "n"},
355 {Anything, 0, Anything, Silent},
356 };
357
358 static Rule O_rules[] =
359 {
360 {Anything, "OF", Nothing, "@v"},
361 {Anything, "OROUGH", Anything, "3@U"},
362 {"#:", "OR", Nothing, "3"},
363 {"#:", "ORS", Nothing, "3z"},
364 {Anything, "OR", Anything, "Or"},
365 {Nothing, "ONE", Anything, "wVn"},
366 {Anything, "OW", Anything, "@U"},
367 {Nothing, "OVER", Anything, "@Uv3"},
368 {Anything, "OV", Anything, "Vv"},
369 {Anything, "O", "^%", "@U"},
370 {Anything, "O", "^EN", "@U"},
371 {Anything, "O", "^I#", "@U"},
372 {Anything, "OL", "D", "@Ul"},
373 {Anything, "OUGHT", Anything, "Ot"},
374 {Anything, "OUGH", Anything, "Vf"},
375 {Nothing, "OU", Anything, "aU"},
376 {"H", "OU", "S#", "aU"},
377 {Anything, "OUS", Anything, "@s"},
378 {Anything, "OUR", Anything, "Or"},
379 {Anything, "OULD", Anything, "Ud"},
380 {"^", "OU", "^L", "V"},
381 {Anything, "OUP", Anything, "up"},
382 {Anything, "OU", Anything, "aU"},
383 {Anything, "OY", Anything, "oI"},
384 {Anything, "OING", Anything, "@UIN"},
385 {Anything, "OI", Anything, "oI"},
386 {Anything, "OOR", Anything, "Or"},
387 {Anything, "OOK", Anything, "Uk"},
388 {Anything, "OOD", Anything, "Ud"},
389 {Anything, "OO", Anything, "u"},
390 {Anything, "O", "E", "@U"},
391 {Anything, "O", Nothing, "@U"},
392 {Anything, "OA", Anything, "@U"},
393 {Nothing, "ONLY", Anything, "@Unli"},
394 {Nothing, "ONCE", Anything, "wVns"},
395 {Anything, "ON'T", Anything, "@Unt"},
396 {"C", "O", "N", "0"},
397 {Anything, "O", "NG", "O"},
398 {" :^", "O", "N", "V"},
399 {"I", "ON", Anything, "@n"},
400 {"#:", "ON", Nothing, "@n"},
401 {"#^", "ON", Anything, "@n"},
402 {Anything, "O", "ST ", "@U"},
403 {Anything, "OF", "^", "Of"},
404 {Anything, "OTHER", Anything, "VD3"},
405 {Anything, "OSS", Nothing, "Os"},
406 {"#:^", "OM", Anything, "Vm"},
407 {Anything, "O", Anything, "0"},
408 {Anything, 0, Anything, Silent},
409 };
410
411 static Rule P_rules[] =
412 {
413 {Anything, "PH", Anything, "f"},
414 {Anything, "PEOP", Anything, "pip"},
415 {Anything, "POW", Anything, "paU"},
416 {Anything, "PUT", Nothing, "pUt"},
417 {Anything, "P", Anything, "p"},
418 {Anything, 0, Anything, Silent},
419 };
420
421 static Rule Q_rules[] =
422 {
423 {Anything, "QUAR", Anything, "kwOr"},
424 {Anything, "QU", Anything, "kw"},
425 {Anything, "Q", Anything, "k"},
426 {Anything, 0, Anything, Silent},
427 };
428
429 static Rule R_rules[] =
430 {
431 {Nothing, "RE", "^#", "ri"},
432 {Anything, "R", Anything, "r"},
433 {Anything, 0, Anything, Silent},
434 };
435
436 static Rule S_rules[] =
437 {
438 {Anything, "SH", Anything, "S"},
439 {"#", "SION", Anything, "Z@n"},
440 {Anything, "SOME", Anything, "sVm"},
441 {"#", "SUR", "#", "Z3"},
442 {Anything, "SUR", "#", "S3"},
443 {"#", "SU", "#", "Zu"},
444 {"#", "SSU", "#", "Su"},
445 {"#", "SED", Nothing, "zd"},
446 {"#", "S", "#", "z"},
447 {Anything, "SAID", Anything, "sed"},
448 {"^", "SION", Anything, "S@n"},
449 {Anything, "S", "S", ""},
450 {".", "S", Nothing, "z"},
451 {"#:.E", "S", Nothing, "z"},
452 {"#:^##", "S", Nothing, "z"},
453 {"#:^#", "S", Nothing, "s"},
454 {"U", "S", Nothing, "s"},
455 {" :#", "S", Nothing, "z"},
456 {Nothing, "SCH", Anything, "sk"},
457 {Anything, "S", "C+", ""},
458 {"#", "SM", Anything, "zm"},
459 {"#", "SN", "'", "z@n"},
460 {Anything, "S", Anything, "s"},
461 {Anything, 0, Anything, Silent},
462 };
463
464 static Rule T_rules[] =
465 {
466 {Nothing, "THE", Nothing, "D@"},
467 {Anything, "TO", Nothing, "tu"},
468 {Anything, "THAT", Nothing, "D&t"},
469 {Nothing, "THIS", Nothing, "DIs"},
470 {Nothing, "THEY", Anything, "DeI"},
471 {Nothing, "THERE", Anything, "Der"},
472 {Anything, "THER", Anything, "D3"},
473 {Anything, "THEIR", Anything, "Der"},
474 {Nothing, "THAN", Nothing, "D&n"},
475 {Nothing, "THEM", Nothing, "Dem"},
476 {Anything, "THESE", Nothing, "Diz"},
477 {Nothing, "THEN", Anything, "Den"},
478 {Anything, "THROUGH", Anything, "Tru"},
479 {Anything, "THOSE", Anything, "D@Uz"},
480 {Anything, "THOUGH", Nothing, "D@U"},
481 {Nothing, "THUS", Anything, "DVs"},
482 {Anything, "TH", Anything, "T"},
483 {"#:", "TED", Nothing, "tId"},
484 {"S", "TI", "#N", "tS"},
485 {Anything, "TI", "O", "S"},
486 {Anything, "TI", "A", "S"},
487 {Anything, "TIEN", Anything, "S@n"},
488 {Anything, "TUR", "#", "tS3"},
489 {Anything, "TU", "A", "tSu"},
490 {Nothing, "TWO", Anything, "tu"},
491 {Anything, "T", Anything, "t"},
492 {Anything, 0, Anything, Silent},
493 };
494
495 static Rule U_rules[] =
496 {
497 {Nothing, "UN", "I", "jun"},
498 {Nothing, "UN", Anything, "Vn"},
499 {Nothing, "UPON", Anything, "@pOn"},
500 {"T", "UR", "#", "Ur"},
501 {"S", "UR", "#", "Ur"},
502 {"R", "UR", "#", "Ur"},
503 {"D", "UR", "#", "Ur"},
504 {"L", "UR", "#", "Ur"},
505 {"Z", "UR", "#", "Ur"},
506 {"N", "UR", "#", "Ur"},
507 {"J", "UR", "#", "Ur"},
508 {"TH", "UR", "#", "Ur"},
509 {"CH", "UR", "#", "Ur"},
510 {"SH", "UR", "#", "Ur"},
511 {Anything, "UR", "#", "jUr"},
512 {Anything, "UR", Anything, "3"},
513 {Anything, "U", "^ ", "V"},
514 {Anything, "U", "^^", "V"},
515 {Anything, "UY", Anything, "aI"},
516 {" G", "U", "#", ""},
517 {"G", "U", "%", ""},
518 {"G", "U", "#", "w"},
519 {"#N", "U", Anything, "ju"},
520 {"T", "U", Anything, "u"},
521 {"S", "U", Anything, "u"},
522 {"R", "U", Anything, "u"},
523 {"D", "U", Anything, "u"},
524 {"L", "U", Anything, "u"},
525 {"Z", "U", Anything, "u"},
526 {"N", "U", Anything, "u"},
527 {"J", "U", Anything, "u"},
528 {"TH", "U", Anything, "u"},
529 {"CH", "U", Anything, "u"},
530 {"SH", "U", Anything, "u"},
531 {Anything, "U", Anything, "ju"},
532 {Anything, 0, Anything, Silent},
533 };
534
535 static Rule V_rules[] =
536 {
537 {Anything, "VIEW", Anything, "vju"},
538 {Anything, "V", Anything, "v"},
539 {Anything, 0, Anything, Silent},
540 };
541
542 static Rule W_rules[] =
543 {
544 {Nothing, "WERE", Anything, "w3"},
545 {Anything, "WA", "S", "w0"},
546 {Anything, "WA", "T", "w0"},
547 {Anything, "WHERE", Anything, "hwer"},
548 {Anything, "WHAT", Anything, "hw0t"},
549 {Anything, "WHOL", Anything, "h@Ul"},
550 {Anything, "WHO", Anything, "hu"},
551 {Anything, "WH", Anything, "hw"},
552 {Anything, "WAR", Anything, "wOr"},
553 {Anything, "WOR", "^", "w3"},
554 {Anything, "WR", Anything, "r"},
555 {Anything, "W", Anything, "w"},
556 {Anything, 0, Anything, Silent},
557 };
558
559 static Rule X_rules[] =
560 {
561 {Anything, "X", Anything, "ks"},
562 {Anything, 0, Anything, Silent},
563 };
564
565 static Rule Y_rules[] =
566 {
567 {Anything, "YOUNG", Anything, "jVN"},
568 {Nothing, "YOU", Anything, "ju"},
569 {Nothing, "YES", Anything, "jes"},
570 {Nothing, "Y", Anything, "j"},
571 {"#:^", "Y", Nothing, "i"},
572 {"#:^", "Y", "I", "i"},
573 {" :", "Y", Nothing, "aI"},
574 {" :", "Y", "#", "aI"},
575 {" :", "Y", "^+:#", "I"},
576 {" :", "Y", "^#", "aI"},
577 {Anything, "Y", Anything, "I"},
578 {Anything, 0, Anything, Silent},
579 };
580
581 static Rule Z_rules[] =
582 {
583 {Anything, "Z", Anything, "z"},
584 {Anything, 0, Anything, Silent},
585 };
586
587 static Rule *Rules[] =
588 {
589 punct_rules,
590 A_rules, B_rules, C_rules, D_rules, E_rules, F_rules, G_rules,
591 H_rules, I_rules, J_rules, K_rules, L_rules, M_rules, N_rules,
592 O_rules, P_rules, Q_rules, R_rules, S_rules, T_rules, U_rules,
593 V_rules, W_rules, X_rules, Y_rules, Z_rules
594 };
595
596
597 static const char *Cardinals[] =
598 {
599 "zero", "one", "two", "three", "four",
600 "five", "six", "seven", "eight", "nine",
601 "ten", "eleven", "twelve", "thirteen", "fourteen",
602 "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"
603 };
604
605
606 static const char *Twenties[] =
607 {
608 "twenty", "thirty", "forty", "fifty",
609 "sixty", "seventy", "eighty", "ninety"
610 };
611
612
613 static const char *Ordinals[] =
614 {
615 "zeroth", "first", "second", "third", "fourth",
616 "fifth", "sixth", "seventh","eighth", "ninth",
617 "tenth", "eleventh", "twelfth", "thirteenth", "fourteenth",
618 "fifteenth", "sixteenth", "seventeenth", "eighteenth", "nineteenth"
619 };
620
621
622 static const char *Ord_twenties[] =
623 {
624 "twentieth", "thirtieth", "fortieth", "fiftieth",
625 "sixtieth", "seventieth", "eightieth", "ninetieth"
626 };
627
628
629 /*
630 ** Translate a number to phonemes. This version is for CARDINAL numbers.
631 ** Note: this is recursive.
632 */
633 static int xlate_cardinal(int value, darray *phone)
634 {
635 int nph = 0;
636
637 if (value < 0)
638 {
639 nph += xlate_string("minus", phone);
640 value = (-value);
641
642 if (value < 0) /* Overflow! -32768 */
643 {
644 nph += xlate_string("a lot", phone);
645 return nph;
646 }
647 }
648
649 if (value >= 1000000000L)
650 /* Billions */
651 {
652 nph += xlate_cardinal(value / 1000000000L, phone);
653 nph += xlate_string("billion", phone);
654 value = value % 1000000000;
655
656 if (value == 0)
657 return nph; /* Even billion */
658
659 if (value < 100)
660 nph += xlate_string("and", phone);
661
662 /* as in THREE BILLION AND FIVE */
663 }
664
665 if (value >= 1000000L)
666 /* Millions */
667 {
668 nph += xlate_cardinal(value / 1000000L, phone);
669 nph += xlate_string("million", phone);
670 value = value % 1000000L;
671
672 if (value == 0)
673 return nph; /* Even million */
674
675 if (value < 100)
676 nph += xlate_string("and", phone);
677
678 /* as in THREE MILLION AND FIVE */
679 }
680
681 /* Thousands 1000..1099 2000..99999 */
682 /* 1100 to 1999 is eleven-hunderd to ninteen-hunderd */
683
684 if ((value >= 1000L && value <= 1099L) || value >= 2000L)
685 {
686 nph += xlate_cardinal(value / 1000L, phone);
687 nph += xlate_string("thousand", phone);
688 value = value % 1000L;
689
690 if (value == 0)
691 return nph; /* Even thousand */
692
693 if (value < 100)
694 nph += xlate_string("and", phone);
695
696 /* as in THREE THOUSAND AND FIVE */
697 }
698
699 if (value >= 100L)
700 {
701 nph += xlate_string(Cardinals[value / 100], phone);
702 nph += xlate_string("hundred", phone);
703 value = value % 100;
704
705 if (value == 0)
706 return nph; /* Even hundred */
707 }
708
709 if (value >= 20)
710 {
711 nph += xlate_string(Twenties[(value - 20) / 10], phone);
712 value = value % 10;
713
714 if (value == 0)
715 return nph; /* Even ten */
716 }
717
718 nph += xlate_string(Cardinals[value], phone);
719
720 return nph;
721 }
722
723 #if 0
724 /*
725 ** Translate a number to phonemes. This version is for ORDINAL numbers.
726 ** Note: this is recursive.
727 */
728 static int xlate_ordinal(int value, darray *phone)
729 {
730 int nph = 0;
731
732 if (value < 0)
733 {
734 nph += xlate_string("minus", phone);
735 value = (-value);
736
737 if (value < 0) /* Overflow! -32768 */
738 {
739 nph += xlate_string("a lot", phone);
740 return nph;
741 }
742 }
743
744 if (value >= 1000000000L)
745 /* Billions */
746 {
747 nph += xlate_cardinal(value / 1000000000L, phone);
748 value = value % 1000000000;
749
750 if (value == 0)
751 {
752 nph += xlate_string("billionth", phone);
753 return nph; /* Even billion */
754 }
755
756 nph += xlate_string("billion", phone);
757
758 if (value < 100)
759 nph += xlate_string("and", phone);
760
761 /* as in THREE BILLION AND FIVE */
762 }
763
764 if (value >= 1000000L)
765 /* Millions */
766 {
767 nph += xlate_cardinal(value / 1000000L, phone);
768 value = value % 1000000L;
769
770 if (value == 0)
771 {
772 nph += xlate_string("millionth", phone);
773 return nph; /* Even million */
774 }
775
776 nph += xlate_string("million", phone);
777
778 if (value < 100)
779 nph += xlate_string("and", phone);
780
781 /* as in THREE MILLION AND FIVE */
782 }
783
784 /* Thousands 1000..1099 2000..99999 */
785 /* 1100 to 1999 is eleven-hunderd to ninteen-hunderd */
786
787 if ((value >= 1000L && value <= 1099L) || value >= 2000L)
788 {
789 nph += xlate_cardinal(value / 1000L, phone);
790 value = value % 1000L;
791
792 if (value == 0)
793 {
794 nph += xlate_string("thousandth", phone);
795 return nph; /* Even thousand */
796 }
797
798 nph += xlate_string("thousand", phone);
799
800 if (value < 100)
801 nph += xlate_string("and", phone);
802
803 /* as in THREE THOUSAND AND FIVE */
804 }
805
806 if (value >= 100L)
807 {
808 nph += xlate_string(Cardinals[value / 100], phone);
809 value = value % 100;
810
811 if (value == 0)
812 {
813 nph += xlate_string("hundredth", phone);
814 return nph; /* Even hundred */
815 }
816
817 nph += xlate_string("hundred", phone);
818 }
819
820 if (value >= 20)
821 {
822 if ((value % 10) == 0)
823 {
824 nph += xlate_string(Ord_twenties[(value - 20) / 10], phone);
825 return nph; /* Even ten */
826 }
827
828 nph += xlate_string(Twenties[(value - 20) / 10], phone);
829
830 value = value % 10;
831 }
832
833 nph += xlate_string(Ordinals[value], phone);
834
835 return nph;
836 }
837 #endif
838
839 static int isvowel(int chr)
840 {
841 return (chr == 'A' || chr == 'E' || chr == 'I' ||
842 chr == 'O' || chr == 'U');
843 }
844
845 static int isconsonant(int chr)
846 {
847 return (isupper(chr) && !isvowel(chr));
848 }
849
850 static int leftmatch(
851 const char *pattern, /* first char of pattern to match in text */
852 const char *context) /* last char of text to be matched */
853
854 {
855 const char *pat;
856 const char *text;
857 int count;
858
859 if (*pattern == '\0')
860 /* null string matches any context */
861 {
862 return 1;
863 }
864
865 /* point to last character in pattern string */
866 count = (int)strlen(pattern);
867
868 pat = pattern + (count - 1);
869
870 text = context;
871
872 for (; count > 0; pat--, count--)
873 {
874 /* First check for simple text or space */
875 if (isalpha(*pat) || *pat == '\'' || *pat == ' ')
876 {
877 if (*pat != *text)
878 {
879 return 0;
880 }
881 else
882 {
883 text--;
884 continue;
885 }
886 }
887
888 switch (*pat)
889 {
890
891 case '#': /* One or more vowels */
892
893 if (!isvowel(*text))
894 return 0;
895
896 text--;
897
898 while (isvowel(*text))
899 text--;
900
901 break;
902
903 case ':': /* Zero or more consonants */
904 while (isconsonant(*text))
905 text--;
906
907 break;
908
909 case '^': /* One consonant */
910 if (!isconsonant(*text))
911 return 0;
912
913 text--;
914
915 break;
916
917 case '.': /* B, D, V, G, J, L, M, N, R, W, Z */
918 if (*text != 'B' && *text != 'D' && *text != 'V'
919 && *text != 'G' && *text != 'J' && *text != 'L'
920 && *text != 'M' && *text != 'N' && *text != 'R'
921 && *text != 'W' && *text != 'Z')
922 return 0;
923
924 text--;
925
926 break;
927
928 case '+': /* E, I or Y (front vowel) */
929 if (*text != 'E' && *text != 'I' && *text != 'Y')
930 return 0;
931
932 text--;
933
934 break;
935
936 case '%':
937
938 default:
939 fprintf(stderr, "Bad char in left rule: '%c'\n", *pat);
940
941 return 0;
942 }
943 }
944
945 return 1;
946 }
947
948 static int rightmatch(
949 const char *pattern, /* first char of pattern to match in text */
950 const char *context) /* last char of text to be matched */
951 {
952 const char *pat;
953 const char *text;
954
955 if (*pattern == '\0')
956 /* null string matches any context */
957 return 1;
958
959 pat = pattern;
960
961 text = context;
962
963 for (pat = pattern; *pat != '\0'; pat++)
964 {
965 /* First check for simple text or space */
966 if (isalpha(*pat) || *pat == '\'' || *pat == ' ')
967 {
968 if (*pat != *text)
969 {
970 return 0;
971 }
972 else
973 {
974 text++;
975 continue;
976 }
977 }
978
979 switch (*pat)
980 {
981
982 case '#': /* One or more vowels */
983
984 if (!isvowel(*text))
985 return 0;
986
987 text++;
988
989 while (isvowel(*text))
990 text++;
991
992 break;
993
994 case ':': /* Zero or more consonants */
995 while (isconsonant(*text))
996 text++;
997
998 break;
999
1000 case '^': /* One consonant */
1001 if (!isconsonant(*text))
1002 return 0;
1003
1004 text++;
1005
1006 break;
1007
1008 case '.': /* B, D, V, G, J, L, M, N, R, W, Z */
1009 if (*text != 'B' && *text != 'D' && *text != 'V'
1010 && *text != 'G' && *text != 'J' && *text != 'L'
1011 && *text != 'M' && *text != 'N' && *text != 'R'
1012 && *text != 'W' && *text != 'Z')
1013 return 0;
1014
1015 text++;
1016
1017 break;
1018
1019 case '+': /* E, I or Y (front vowel) */
1020 if (*text != 'E' && *text != 'I' && *text != 'Y')
1021 return 0;
1022
1023 text++;
1024
1025 break;
1026
1027 case '%': /* ER, E, ES, ED, ING, ELY (a suffix) */
1028 if (*text == 'E')
1029 {
1030 text++;
1031
1032 if (*text == 'L')
1033 {
1034 text++;
1035
1036 if (*text == 'Y')
1037 {
1038 text++;
1039 break;
1040 }
1041
1042 else
1043 {
1044 text--; /* Don't gobble L */
1045 break;
1046 }
1047 }
1048
1049 else
1050 if (*text == 'R' || *text == 'S' || *text == 'D')
1051 text++;
1052
1053 break;
1054 }
1055
1056 else
1057 if (*text == 'I')
1058 {
1059 text++;
1060
1061 if (*text == 'N')
1062 {
1063 text++;
1064
1065 if (*text == 'G')
1066 {
1067 text++;
1068 break;
1069 }
1070 }
1071
1072 return 0;
1073 }
1074
1075 else
1076 return 0;
1077
1078 default:
1079 fprintf(stderr, "Bad char in right rule:'%c'\n", *pat);
1080
1081 return 0;
1082 }
1083 }
1084
1085 return 1;
1086 }
1087
1088 static void phone_cat(darray *arg, const char *s)
1089 {
1090 char ch;
1091
1092 while ((ch = *s++))
1093 arg->put(ch);
1094 }
1095
1096
1097 static int find_rule(darray *arg, char *word, int index, Rule *rules)
1098 {
1099 for (;;) /* Search for the rule */
1100 {
1101 Rule *rule;
1102 const char *left,
1103 *match,
1104 *right,
1105 *output;
1106 int remainder;
1107 rule = rules++;
1108 match = (*rule)[1];
1109
1110 if (match == 0)
1111 /* bad symbol! */
1112 {
1113 fprintf(stderr, "Error: Can't find rule for: '%c' in \"%s\"\n",
1114 word[index], word);
1115 return index + 1; /* Skip it! */
1116 }
1117
1118 for (remainder = index; *match != '\0'; match++, remainder++)
1119 {
1120 if (*match != word[remainder])
1121 break;
1122 }
1123
1124 if (*match != '\0')
1125 continue; /* found missmatch */
1126
1127 left = (*rule)[0];
1128
1129 right = (*rule)[2];
1130
1131 if (!leftmatch(left, &word[index - 1]))
1132 continue;
1133
1134 if (!rightmatch(right, &word[remainder]))
1135 continue;
1136
1137 output = (*rule)[3];
1138
1139 phone_cat(arg, output);
1140
1141 return remainder;
1142 }
1143 }
1144
1145 static void guess_word(darray *arg, char *word)
1146 {
1147 int index; /* Current position in word */
1148 int type; /* First letter of match part */
1149 index = 1; /* Skip the initial blank */
1150
1151 do
1152 {
1153 if (isupper(word[index]))
1154 type = word[index] - 'A' + 1;
1155 else
1156 type = 0;
1157
1158 index = find_rule(arg, word, index, Rules[type]);
1159 }
1160
1161 while (word[index] != '\0');
1162 }
1163
1164
1165 static int NRL(const char *s, int n, darray *phone)
1166 {
1167 int old = phone->getSize();
1168 char *word = (char *) malloc(n + 3); // TODO: may return null
1169 char *d = word;
1170 *d++ = ' ';
1171
1172 while (n-- > 0)
1173 {
1174 char ch = *s++;
1175
1176 if (islower(ch))
1177 ch = (char)toupper(ch);
1178
1179 *d++ = ch;
1180 }
1181
1182 *d++ = ' '; // kinda unnecessary
1183
1184 *d = '\0';
1185 guess_word(phone, word);
1186 free(word);
1187 return phone->getSize() - old;
1188 }
1189
1190
1191 static int spell_out(const char *word, int n, darray *phone)
1192 {
1193 int nph = 0;
1194
1195 while (n-- > 0)
1196 {
1197 nph += xlate_string(ASCII[*word++ & 0x7F], phone);
1198 }
1199
1200 return nph;
1201 }
1202
1203 static int suspect_word(const char *s, int n)
1204 {
1205 int i = 0;
1206 int seen_lower = 0;
1207 int seen_upper = 0;
1208 int seen_vowel = 0;
1209 int last = 0;
1210
1211 for (i = 0; i < n; i++)
1212 {
1213 char ch = *s++;
1214
1215 if (i && last != '-' && isupper(ch))
1216 seen_upper = 1;
1217
1218 if (islower(ch))
1219 {
1220 seen_lower = 1;
1221 ch = (char)toupper(ch);
1222 }
1223
1224 if (ch == 'A' || ch == 'E' || ch == 'I' || ch == 'O' || ch == 'U' || ch == 'Y')
1225 seen_vowel = 1;
1226
1227 last = ch;
1228 }
1229
1230 return !seen_vowel || (seen_upper && seen_lower) || !seen_lower;
1231 }
1232
1233 static int xlate_word(const char *word, int n, darray *phone)
1234 {
1235 int nph = 0;
1236
1237 if (*word != '[')
1238 {
1239 if (suspect_word(word, n))
1240 return spell_out(word, n, phone);
1241 else
1242 {
1243 nph += NRL(word, n, phone);
1244 }
1245 }
1246
1247 else
1248 {
1249 if ((++word)[(--n) - 1] == ']')
1250 n--;
1251
1252 while (n-- > 0)
1253 {
1254 phone->put(*word++);
1255 nph++;
1256 }
1257 }
1258
1259 phone->put(' ');
1260
1261 return nph + 1;
1262 }
1263
1264
1265 int xlate_string(const char *string, darray *phone)
1266 {
1267 int nph = 0;
1268 const char *s = string;
1269 char ch;
1270
1271 while (isspace(ch = *s))
1272 s++;
1273
1274 while (*s)
1275 {
1276 ch = *s;
1277 const char *word = s;
1278
1279 if (isalpha(ch))
1280 {
1281 while (isalpha(ch = *s) || ((ch == '\'' || ch == '-' || ch == '.') && isalpha(s[1])))
1282 {
1283 s++;
1284 }
1285
1286 if (!ch || isspace(ch) || ispunct(ch) || (isdigit(ch) && !suspect_word(word, (int)(s - word))))
1287 {
1288 nph += xlate_word(word, (int)(s - word), phone);
1289 }
1290 else
1291 {
1292 while (*s && !isspace(*s) && !ispunct(*s))
1293 {
1294 ch = *s;
1295 s++;
1296 }
1297
1298 nph += spell_out(word, (int)(s - word), phone);
1299 }
1300 }
1301 else
1302 {
1303 if (isdigit(ch) || (ch == '-' && isdigit(s[1])))
1304 {
1305 int sign = (ch == '-') ? -1 : 1;
1306 int value = 0;
1307
1308 if (sign < 0)
1309 {
1310 ch = *++s;
1311 }
1312
1313 while (isdigit(ch = *s))
1314 {
1315 value = value * 10 + ch - '0';
1316 s++;
1317 }
1318
1319 if (ch == '.' && isdigit(s[1]))
1320 {
1321 word = ++s;
1322 nph += xlate_cardinal(value * sign, phone);
1323 nph += xlate_string("point", phone);
1324
1325 while (isdigit(ch = *s))
1326 {
1327 s++;
1328 }
1329
1330 nph += spell_out(word, (int)(s - word), phone);
1331 }
1332 else
1333 {
1334 /* check for ordinals, date, time etc. can go in here */
1335 nph += xlate_cardinal(value * sign, phone);
1336 }
1337 }
1338 else
1339 {
1340 if (ch == '[' && strchr(s, ']'))
1341 {
1342 const char *thisword = s;
1343
1344 while (*s && *s++ != ']')
1345 /* nothing */
1346 ;
1347
1348 nph += xlate_word(thisword, (int)(s - thisword), phone);
1349 }
1350 else
1351 {
1352 if (ispunct(ch))
1353 {
1354 switch (ch)
1355 {
1356
1357 case '!':
1358
1359 case '?':
1360
1361 case '.':
1362 s++;
1363 phone->put('.');// (' ');
1364 break;
1365
1366 case '"': /* change pitch ? */
1367
1368 case ':':
1369
1370 case '-':
1371
1372 case ';':
1373
1374 case ',':
1375
1376 case '(':
1377
1378 case ')':
1379 s++;
1380 phone->put(' ');
1381 break;
1382
1383 case '[':
1384 {
1385 const char *e = strchr(s, ']');
1386
1387 if (e)
1388 {
1389 s++;
1390
1391 while (s < e)
1392 phone->put(*s++);
1393
1394 s = e + 1;
1395
1396 break;
1397 }
1398 }
1399 // fallthrough
1400 default:
1401 nph += spell_out(word, 1, phone);
1402 s++;
1403 break;
1404 }
1405 }
1406 else
1407 {
1408 while (*s && !isspace(*s))
1409 {
1410 ch = *s;
1411 s++;
1412 }
1413
1414 nph += spell_out(word, (int)(s - word), phone);
1415 }
1416 }
1417 }
1418
1419 while (isspace(ch = *s))
1420 s++;
1421 }
1422 }
1423
1424 return nph;
1425 }