]> git.bts.cx Git - benzene.git/blob - src/bz/audio/speech.cpp
Update git module URL
[benzene.git] / src / bz / audio / speech.cpp
1 #include <bz/audio/speech.h>
2
3 #include <bz/memory/allocator.h>
4 #include <bz/resources/resource.h>
5 #include <bz/types/identifier_internal.h>
6 #include <darray.h>
7 #include <klatt.h>
8 #include <tts.h>
9 #include <new>
10 #include <parson.h>
11 #include <string.h> // strcmp
12
13 struct BZAudioVoice {
14 darray element;
15 klatt synth;
16 size_t maxFrames;
17 // short *sample;
18 };
19
20 BZAudioVoice *bzAudioLoadSpeechVoice(BZAudioVoiceDetails *detailsOut, BZMemoryArenaID arena, const char *identifierFmt, ...) {
21 bzMakeIdentifier(identifier, identifierFmt);
22
23 int baseFrequency = 1330;
24 float baseSpeed = 10.0f;
25 float baseDeclination = 0.5f;
26 int aBaseWaveform = KW_SAW;
27
28 BZResourceID handle = bzResourcesOpenResource("voices", "assets/voices/%s.voice.json", identifier);
29 size_t length = bzResourcesFileLength(handle);
30
31 char *data = (char *)alloca(length); // FIXME, temporary memory
32 bzResourcesReadBytes(handle, data, length);
33 bzResourcesCloseResource(handle);
34
35 //json_set_allocation_functions
36 JSON_Value *voiceJson = json_parse_string(data);
37 JSON_Object *voiceJsonObject = json_object(voiceJson);
38
39 if (json_object_has_value_of_type(voiceJsonObject, "frequency", JSONNumber)) {
40 baseFrequency = json_object_get_number(voiceJsonObject, "frequency");
41 }
42
43 if (json_object_has_value_of_type(voiceJsonObject, "speed", JSONNumber)) {
44 baseSpeed = json_object_get_number(voiceJsonObject, "speed");
45 }
46
47 if (json_object_has_value_of_type(voiceJsonObject, "declination", JSONNumber)) {
48 baseDeclination = json_object_get_number(voiceJsonObject, "declination");
49 }
50
51 if (json_object_has_value_of_type(voiceJsonObject, "waveform", JSONString)) {
52 const char *waveform = json_object_get_string(voiceJsonObject, "waveform");
53 if (strcmp(waveform, "saw") == 0) {
54 aBaseWaveform = KW_SAW;
55 } else if (strcmp(waveform, "triangle") == 0) {
56 aBaseWaveform = KW_TRIANGLE;
57 } else if (strcmp(waveform, "sin") == 0) {
58 aBaseWaveform = KW_SIN;
59 } else if (strcmp(waveform, "square") == 0) {
60 aBaseWaveform = KW_SQUARE;
61 } else if (strcmp(waveform, "pulse") == 0) {
62 aBaseWaveform = KW_PULSE;
63 } else if (strcmp(waveform, "noise") == 0) {
64 aBaseWaveform = KW_NOISE;
65 } else if (strcmp(waveform, "warble") == 0) {
66 aBaseWaveform = KW_WARBLE;
67 }
68 }
69
70 json_value_free(voiceJson);
71
72 BZAudioVoice *voice = new(bzMemoryAlloc(arena, sizeof(BZAudioVoice))) BZAudioVoice();
73 voice->synth.init(baseFrequency, baseSpeed, baseDeclination, aBaseWaveform);
74 voice->maxFrames = 500;//4 * 1024;
75 // voice->sample = (short *)bzMemoryAlloc(arena, voice->synth.mNspFr * voice->maxFrames * sizeof(short));
76
77 detailsOut->channels = 1;
78 detailsOut->rate = 11025;
79 detailsOut->maxSamples = voice->synth.mNspFr * voice->maxFrames;
80
81 // *sizeOut = outputCount * sizeof(short);
82 // *samplingRateOut = 11025;
83 // *channelsOut = 1;
84 // *bitsPerSampleOut = 16;
85
86 return voice;
87 }
88
89 size_t bzAudioGenerateSpeech(BZAudioVoice *voice, short *dst, size_t dstSize, const char *speechFmt, ...) {
90 bzMakeIdentifier(speech, speechFmt);
91
92 voice->element.clear();
93
94 darray phone; // FIXME, darray should have a stable memory footprint
95 xlate_string(speech, &phone);
96 int frames = klatt::phone_to_elm(phone.getData(), phone.getSize(), &voice->element);
97 bzAssertMessage(frames <= voice->maxFrames, "Too many frames, %d", frames);
98
99 voice->synth.initsynth(voice->element.getSize(), (unsigned char *)voice->element.getData());
100
101 size_t sampleSize = voice->synth.mNspFr * frames;
102 size_t outputCount = 0;
103 while (outputCount < sampleSize) {
104 outputCount += voice->synth.synth(voice->synth.mNspFr /* This seems to be ignored... */, &dst[outputCount]);
105 if (outputCount == sampleSize) { break; }
106 }
107
108 return outputCount * sizeof(short);
109 }