mirror of
https://github.com/Mauler125/r5sdk.git
synced 2025-02-09 19:15:03 +01:00
DirtySDK (EA's Dirty Sockets library) will be used for the LiveAPI implementation, and depends on: EABase, EAThread.
1170 lines
44 KiB
C
1170 lines
44 KiB
C
/*H********************************************************************************/
|
|
/*!
|
|
\File voipnarrate.c
|
|
|
|
\Description
|
|
Voip narration API wrapping Cloud-based text-to-speech services, supporting
|
|
IBM Watson, Microsoft Speech Service, Google Speech, and Amazon Polly.
|
|
Narration requests may be up to 255 characters in length, and overlapping
|
|
requests are queued in order.
|
|
|
|
\Notes
|
|
References
|
|
|
|
IBM Watson:
|
|
Text-to Speech-API: https://www.ibm.com/watson/developercloud/text-to-speech/api/v1/curl.html
|
|
|
|
Microsoft Speech Service:
|
|
Text-to-Speech How-To: https://docs.microsoft.com/en-us/azure/cognitive-services/Speech-Service/how-to-text-to-speech
|
|
|
|
Google Text-to-Speech
|
|
Text-to-Speech API: https://cloud.google.com/text-to-speech/docs/reference/rest/
|
|
|
|
Amazon Polly
|
|
SynthesizeSpeech API: https://docs.aws.amazon.com/polly/latest/dg/API_SynthesizeSpeech.html
|
|
Amazon Endpoint Names: https://docs.aws.amazon.com/general/latest/gr/rande.html
|
|
VoiceId List: https://docs.aws.amazon.com/polly/latest/dg/API_SynthesizeSpeech.html#polly-SynthesizeSpeech-request-VoiceId
|
|
|
|
WAV:
|
|
WAVE file format: https://en.wikipedia.org/wiki/WAV#RIFF_WAVE
|
|
|
|
\Copyright
|
|
Copyright 2018 Electronic Arts
|
|
|
|
\Version 10/25/2018 (jbrookes) First Version
|
|
*/
|
|
/********************************************************************************H*/
|
|
|
|
/*** Include files ****************************************************************/
|
|
|
|
#include <string.h>
|
|
|
|
#include "DirtySDK/platform.h"
|
|
#include "DirtySDK/dirtysock.h"
|
|
#include "DirtySDK/dirtysock/dirtymem.h"
|
|
#include "DirtySDK/proto/protostream.h"
|
|
#include "DirtySDK/util/aws.h"
|
|
#include "DirtySDK/util/base64.h"
|
|
#include "DirtySDK/util/jsonformat.h"
|
|
#include "DirtySDK/util/jsonparse.h"
|
|
#include "DirtySDK/voip/voipdef.h"
|
|
|
|
#include "DirtySDK/voip/voipnarrate.h"
|
|
|
|
/*** Defines **********************************************************************/
|
|
|
|
//! protostream minimum data amount (for base64 decoding; four is the minimum amount but that produces one and a half samples, so we choose eight, BUT...
|
|
#define VOIPNARRATE_MINBUF (8)
|
|
|
|
//! how many ms of audio received should we treat as being empty audio (for metrics)
|
|
#define VOIPNARRATE_EMPTY_AUDIO_THRESHOLD_MS (300)
|
|
|
|
/*** Macros ***********************************************************************/
|
|
|
|
/*** Type Definitions *************************************************************/
|
|
|
|
typedef struct VoipNarrateConfigT
|
|
{
|
|
VoipNarrateProviderE eProvider; //!< configured provider
|
|
char strUrl[256]; //!< URL for text-to-speech request
|
|
char strKey[128]; //!< API key required for service authentication
|
|
} VoipNarrateConfigT;
|
|
|
|
//! narration request data
|
|
typedef struct VoipNarrateRequestT
|
|
{
|
|
struct VoipNarrateRequestT *pNext;
|
|
VoipNarrateGenderE eGender;
|
|
char strText[VOIPNARRATE_INPUT_MAX];
|
|
int8_t iUserIndex;
|
|
} VoipNarrateRequestT;
|
|
|
|
struct VoipNarrateRefT
|
|
{
|
|
int32_t iMemGroup;
|
|
void *pMemGroupUserData;
|
|
|
|
ProtoStreamRefT *pProtoStream; //!< stream transport module to handle buffered download of audio data
|
|
|
|
VoipNarrateVoiceDataCbT *pVoiceDataCb; //!< user callback used to provide voice data
|
|
void *pUserData; //!< user data for user callback
|
|
|
|
VoipNarrateRequestT *pRequest; //!< list of queued requests, if any
|
|
VoipNarrateConfigT Config; //!< module configuration (provider and credentials)
|
|
|
|
char strHead[256]; //!< http head for narration request
|
|
char strBody[512]; //!< http body for narration request
|
|
const char *pBody; //!< pointer to start of body (may not match buffer start)
|
|
|
|
VoipTextToSpeechMetricsT Metrics; //!< Usage metrics of the narration module
|
|
uint32_t uTtsStartTime; //!< time when we sent the request
|
|
|
|
uint8_t aVoiceBuf[160*3*2]; //!< base64 decode buffer, sized for one 30ms frame of 16khz 16bit voice audio, also a multiple of three bytes to accomodate base64 4->3 ratio
|
|
int32_t iVoiceOff; //!< read offset in buffered voice data
|
|
int32_t iVoiceLen; //!< end of buffered voice data
|
|
int32_t iSamplesInPhrase; //!< total number of samples received for this phrase
|
|
|
|
uint8_t bStart; //!< TRUE if start of stream download, else FALSE
|
|
uint8_t bActive; //!< TRUE if stream is active, else FALSE
|
|
int8_t iUserIndex; //!< index of local user current request is being made for
|
|
int8_t iVerbose; //!< verbose debug level (debug only)
|
|
};
|
|
|
|
/*** Variables ********************************************************************/
|
|
|
|
//! global config state
|
|
static VoipNarrateConfigT _VoipNarrate_Config = { VOIPNARRATE_PROVIDER_NONE, "", "" };
|
|
|
|
/*** Private Functions ************************************************************/
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateCustomHeaderCb
|
|
|
|
\Description
|
|
Custom header callback used to sign AWS requests
|
|
|
|
\Input *pState - http module state
|
|
\Input *pHeader - pointer to http header buffer
|
|
\Input uHeaderSize - size of http header buffer
|
|
\Input *pData - pointer to data (unused)
|
|
\Input iDataLen - data length (unused)
|
|
\Input *pUserRef - voipnarrate ref
|
|
|
|
\Output
|
|
int32_t - output header length
|
|
|
|
\Version 12/28/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static int32_t _VoipNarrateCustomHeaderCb(ProtoHttpRefT *pState, char *pHeader, uint32_t uHeaderSize, const char *pData, int64_t iDataLen, void *pUserRef)
|
|
{
|
|
VoipNarrateRefT *pVoipNarrate = (VoipNarrateRefT *)pUserRef;
|
|
int32_t iHdrLen = (int32_t)strlen(pHeader);
|
|
|
|
// if amazon and we have room, sign the request
|
|
if ((pVoipNarrate->Config.eProvider != VOIPNARRATE_PROVIDER_AMAZON) || (uHeaderSize < (unsigned)iHdrLen))
|
|
{
|
|
return(iHdrLen);
|
|
}
|
|
|
|
// sign the request and return the updated size
|
|
iHdrLen += AWSSignSigV4(pHeader, uHeaderSize, pVoipNarrate->pBody, pVoipNarrate->Config.strKey, "polly", NULL);
|
|
// return size to protohttp
|
|
return(iHdrLen);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateSkipWavHeader
|
|
|
|
\Description
|
|
Return offset past WAV header in input data
|
|
|
|
\Input *pData - pointer to wav header
|
|
\Input iDataLen - length of data
|
|
|
|
\Output
|
|
int32_t - offset past WAV header, or zero
|
|
|
|
\Version 11/06/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static int32_t _VoipNarrateSkipWavHeader(const uint8_t *pData, int32_t iDataLen)
|
|
{
|
|
int32_t iOffset = 0, iChkLen;
|
|
uint8_t bFoundData;
|
|
|
|
// validate and skip RIFF/WAVE header
|
|
if ((iDataLen < 12) || ds_strnicmp((const char *)pData, "RIFF", 4) || ds_strnicmp((const char *)pData+8, "WAVE", 4))
|
|
{
|
|
return(0);
|
|
}
|
|
iOffset += 12;
|
|
|
|
// process chunks
|
|
for (bFoundData = FALSE; iOffset < (iDataLen+12); iOffset += iChkLen+8)
|
|
{
|
|
// get chunk length
|
|
iChkLen = pData[iOffset+4];
|
|
iChkLen |= pData[iOffset+5]<<8;
|
|
iChkLen |= pData[iOffset+6]<<16;
|
|
iChkLen |= pData[iOffset+7]<<24;
|
|
|
|
// look for data chunk
|
|
if (!ds_strnicmp((const char *)pData+iOffset, "data", 4))
|
|
{
|
|
bFoundData = TRUE;
|
|
iOffset += 8;
|
|
break;
|
|
}
|
|
}
|
|
return(bFoundData ? iOffset : 0);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateBasicAuth
|
|
|
|
\Description
|
|
Encode Basic HTTP authorization header as per https://tools.ietf.org/html/rfc7617
|
|
|
|
\Input *pBuffer - [out] output buffer for encoded base64 string
|
|
\Input iBufSize - size of output buffer
|
|
\Input *pUser - user identifer
|
|
\Input *pPass - user password
|
|
|
|
\Output
|
|
const char * - pointer to output buffer
|
|
|
|
\Version 10/25/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static const char *_VoipNarrateBasicAuth(char *pBuffer, int32_t iBufSize, const char *pUser, const char *pPass)
|
|
{
|
|
char strAuth[128];
|
|
ds_snzprintf(strAuth, sizeof(strAuth), "%s:%s", pUser, pPass);
|
|
Base64Encode2(strAuth, (int32_t)strlen(strAuth), pBuffer, iBufSize);
|
|
return(pBuffer);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateBase64Decode
|
|
|
|
\Description
|
|
Decode Base64-encoded voice data
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input *pOutput - [out] buffer to hold decoded voice data
|
|
\Input *pOutSize - [in/out] output buffer length, size of output data
|
|
\Input *pInput - base64-encoded input data
|
|
\Input iInpSize - input buffer length
|
|
|
|
\Output
|
|
int32_t - negative=failure, else input bytes consumed
|
|
|
|
\Version 10/27/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static int32_t _VoipNarrateBase64Decode(VoipNarrateRefT *pVoipNarrate, char *pOutput, int32_t *pOutSize, const char *pInput, int32_t iInpSize)
|
|
{
|
|
static const char _strJson[] = "\"audioContent\":";
|
|
const char *pInput2, *pInpEnd = pInput + iInpSize;
|
|
int32_t iInpOff = 0;
|
|
|
|
// if we have the beginning of json envelope, skip it
|
|
if ((pInput2 = strstr(pInput, _strJson)) != NULL)
|
|
{
|
|
// skip json header
|
|
pInput2 += sizeof(_strJson);
|
|
// skip to base64 data
|
|
for (; (*pInput2 != '"') && (pInput2 < pInpEnd); pInput2 += 1)
|
|
;
|
|
if (*pInput2 != '"')
|
|
{
|
|
return(-1);
|
|
}
|
|
// skip quote
|
|
pInput2 += 1;
|
|
// remember to consume this in addition to base64 data
|
|
iInpOff = pInput2 - pInput;
|
|
pInput = pInput2;
|
|
}
|
|
// if we have end of json envelope, trim it
|
|
if ((pInput2 = strchr(pInput, '"')) != NULL)
|
|
{
|
|
// handle end of data
|
|
if (pInput2 == pInput)
|
|
{
|
|
iInpOff = iInpSize;
|
|
}
|
|
iInpSize = pInput2-pInput;
|
|
}
|
|
|
|
// constrain input size to what will fit in output buffer
|
|
if (iInpSize > Base64EncodedSize(*pOutSize))
|
|
{
|
|
iInpSize = Base64EncodedSize(*pOutSize);
|
|
}
|
|
|
|
// make sure input size is a multiple of four to produce an integral number of output bytes
|
|
iInpSize &= ~0x03;
|
|
|
|
// base64 decode and save output size
|
|
*pOutSize = Base64Decode3(pInput, iInpSize, pOutput, *pOutSize);
|
|
// return number of bytes of input consumed
|
|
return(iInpSize+iInpOff);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateRequestAdd
|
|
|
|
\Description
|
|
Queue request for later sending
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input iUserIndex - local user index of user who is requesting speech synthesis
|
|
\Input eGender - preferred gender for voice narration
|
|
\Input *pText - text to be converted
|
|
|
|
\Output
|
|
int32_t - negative=failure, else success
|
|
|
|
\Version 11/09/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static int32_t _VoipNarrateRequestAdd(VoipNarrateRefT *pVoipNarrate, int32_t iUserIndex, VoipNarrateGenderE eGender, const char *pText)
|
|
{
|
|
VoipNarrateRequestT *pRequest;
|
|
|
|
// allocate and clear the request
|
|
if ((pRequest = DirtyMemAlloc(sizeof(*pRequest), VOIPNARRATE_MEMID, pVoipNarrate->iMemGroup, pVoipNarrate->pMemGroupUserData)) == NULL)
|
|
{
|
|
NetPrintf(("voipnarrate: could not allocate request\n"));
|
|
pVoipNarrate->Metrics.uErrorCount += 1;
|
|
return(-1);
|
|
}
|
|
ds_memclr(pRequest, sizeof(*pRequest));
|
|
|
|
// copy the request data
|
|
ds_strnzcpy(pRequest->strText, pText, sizeof(pRequest->strText));
|
|
pRequest->iUserIndex = iUserIndex;
|
|
pRequest->eGender = eGender;
|
|
|
|
// add to queue
|
|
pRequest->pNext = pVoipNarrate->pRequest;
|
|
pVoipNarrate->pRequest = pRequest;
|
|
|
|
// return success
|
|
return(0);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateRequestGet
|
|
|
|
\Description
|
|
Get queued request
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input *pRequest - [out] storage for request (may be null)
|
|
|
|
\Version 11/09/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static void _VoipNarrateRequestGet(VoipNarrateRefT *pVoipNarrate, VoipNarrateRequestT *pRequest)
|
|
{
|
|
VoipNarrateRequestT **ppRequest;
|
|
// get oldest request (we add to head, so get from tail)
|
|
for (ppRequest = &pVoipNarrate->pRequest; (*ppRequest)->pNext != NULL; ppRequest = &((*ppRequest)->pNext))
|
|
;
|
|
// copy request
|
|
if (pRequest != NULL)
|
|
{
|
|
ds_memcpy_s(pRequest, sizeof(*pRequest), *ppRequest, sizeof(**ppRequest));
|
|
}
|
|
// free request
|
|
DirtyMemFree(*ppRequest, VOIPNARRATE_MEMID, pVoipNarrate->iMemGroup, pVoipNarrate->pMemGroupUserData);
|
|
// remove from list
|
|
*ppRequest = NULL;
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateFormatHeadWatson
|
|
|
|
\Description
|
|
Format connection header for IBM Watson Speech service
|
|
Ref: https://console.bluemix.net/docs/services/text-to-speech/http.html#usingHTTP
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input *pUrl - [out] buffer for formatted url
|
|
\Input iUrlLen - length of url buffer
|
|
\Input *pHead - [out] buffer for formatted request header
|
|
\Input iHeadLen - length of header buffer
|
|
\Input eGender - preferred gender for voice narration
|
|
|
|
\Output
|
|
int32_t - negative=failure, else success
|
|
|
|
\Version 11/07/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static const char *_VoipNarrateFormatHeadWatson(VoipNarrateRefT *pVoipNarrate, char *pUrl, int32_t iUrlLen, char *pHead, int32_t iHeadLen, VoipNarrateGenderE eGender)
|
|
{
|
|
char strAuth[128];
|
|
int32_t iOffset=0;
|
|
|
|
// encode Basic authorization string with string apikey:<key>
|
|
_VoipNarrateBasicAuth(strAuth, sizeof(strAuth), "apikey", pVoipNarrate->Config.strKey);
|
|
|
|
// format request header
|
|
iOffset += ds_snzprintf(pHead+iOffset, iHeadLen-iOffset, "Content-Type: application/json\r\n");
|
|
iOffset += ds_snzprintf(pHead+iOffset, iHeadLen-iOffset, "Accept: audio/wav; rate=%d\r\n", VOIPNARRATE_SAMPLERATE);
|
|
iOffset += ds_snzprintf(pHead+iOffset, iHeadLen-iOffset, "Authorization: Basic %s\r\n", strAuth);
|
|
|
|
// format url with voice based on preferred gender
|
|
ds_snzprintf(pUrl, iUrlLen, "%s?voice=%s", pVoipNarrate->Config.strUrl, (eGender == VOIPNARRATE_GENDER_FEMALE) ? "en-US_AllisonVoice" : "en-US_MichaelVoice");
|
|
return(pUrl);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateFormatBodyWatson
|
|
|
|
\Description
|
|
Format request body for IBM Watson Speech service
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input *pBody - [out] buffer to hold request body
|
|
\Input iBodyLen - buffer length
|
|
\Input *pText - pointer to text request
|
|
|
|
\Output
|
|
int32_t - negative=failure, else success
|
|
|
|
\Version 11/07/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static char *_VoipNarrateFormatBodyWatson(VoipNarrateRefT *pVoipNarrate, char *pBody, int32_t iBodyLen, const char *pText)
|
|
{
|
|
JsonInit(pBody, iBodyLen, JSON_FL_WHITESPACE);
|
|
JsonAddStr(pBody, "text", pText);
|
|
pBody = JsonFinish(pBody);
|
|
return(pBody);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateFormatHeadMicrosoft
|
|
|
|
\Description
|
|
Format connection header for Microsoft Speech service
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input *pUrl - [out] buffer for formatted url
|
|
\Input iUrlLen - length of url buffer
|
|
\Input *pHead - [out] buffer for formatted request header
|
|
\Input iHeadLen - length of header buffer
|
|
|
|
\Output
|
|
int32_t - negative=failure, else success
|
|
|
|
\Version 10/25/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static const char *_VoipNarrateFormatHeadMicrosoft(VoipNarrateRefT *pVoipNarrate, char *pUrl, int32_t iUrlLen, char *pHead, int32_t iHeadLen)
|
|
{
|
|
int32_t iOffset=0;
|
|
|
|
// format request header
|
|
iOffset += ds_snzprintf(pHead+iOffset, iHeadLen-iOffset, "Content-Type: application/ssml+xml\r\n");
|
|
iOffset += ds_snzprintf(pHead+iOffset, iHeadLen-iOffset, "X-Microsoft-OutputFormat: raw-%dkhz-16bit-mono-pcm\r\n", VOIPNARRATE_SAMPLERATE/1000);
|
|
iOffset += ds_snzprintf(pHead+iOffset, iHeadLen-iOffset, "Ocp-Apim-Subscription-Key: %s\r\n", pVoipNarrate->Config.strKey);
|
|
|
|
// copy url
|
|
ds_strnzcpy(pUrl, pVoipNarrate->Config.strUrl, iUrlLen);
|
|
return(pUrl);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateFormatBodyMicrosoft
|
|
|
|
\Description
|
|
Format request body for Microsoft Speech service
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input *pBody - [out] buffer to hold request body
|
|
\Input iBodyLen - buffer length
|
|
\Input eGender - preferred gender for voice narration
|
|
\Input *pText - pointer to text request
|
|
|
|
\Output
|
|
int32_t - negative=failure, else success
|
|
|
|
\Version 10/25/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static char *_VoipNarrateFormatBodyMicrosoft(VoipNarrateRefT *pVoipNarrate, char *pBody, int32_t iBodyLen, VoipNarrateGenderE eGender, const char *pText)
|
|
{
|
|
int32_t iOffset=0;
|
|
// format request body
|
|
iOffset += ds_snzprintf(pBody+iOffset, iBodyLen-iOffset, "<speak version='1.0' xmlns=\"http://www.w3.org/2001/10/synthesis\" xml:lang='en-US'>");
|
|
iOffset += ds_snzprintf(pBody+iOffset, iBodyLen-iOffset, "<voice name='Microsoft Server Speech Text to Speech Voice (en-US, %s)'>%s</voice>",
|
|
(eGender == VOIPNARRATE_GENDER_FEMALE) ? "JessaRUS" : "BenjaminRUS", pText);
|
|
iOffset += ds_snzprintf(pBody+iOffset, iBodyLen-iOffset, "</speak>");
|
|
// return pointer to body
|
|
return(pBody);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateFormatHeadGoogle
|
|
|
|
\Description
|
|
Format connection header for Google Text to Speech
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input *pUrl - [out] buffer for formatted url
|
|
\Input iUrlLen - length of url buffer
|
|
\Input *pHead - [out] buffer for formatted request header
|
|
\Input iHeadLen - length of header buffer
|
|
|
|
\Output
|
|
int32_t - negative=failure, else success
|
|
|
|
\Version 10/25/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static const char *_VoipNarrateFormatHeadGoogle(VoipNarrateRefT *pVoipNarrate, char *pUrl, int32_t iUrlLen, char *pHead, int32_t iHeadLen)
|
|
{
|
|
// format request header
|
|
*pHead = '\0';
|
|
// format request url
|
|
ds_snzprintf(pUrl, iUrlLen, "%s?key=%s", pVoipNarrate->Config.strUrl, pVoipNarrate->Config.strKey);
|
|
// return url
|
|
return(pUrl);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateFormatBodyGoogle
|
|
|
|
\Description
|
|
Format request body for Google text-to-speech request
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input *pBody - [out] buffer to hold request body
|
|
\Input iBodyLen - buffer length
|
|
\Input eGender - preferred gender for voice narration
|
|
\Input *pText - pointer to text request
|
|
|
|
\Output
|
|
int32_t - negative=failure, else success
|
|
|
|
\Notes
|
|
Ref: https://cloud.google.com/text-to-speech/docs/reference/rest/
|
|
|
|
\Version 10/25/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static char * _VoipNarrateFormatBodyGoogle(VoipNarrateRefT *pVoipNarrate, char *pBody, int32_t iBodyLen, VoipNarrateGenderE eGender, const char *pText)
|
|
{
|
|
static const char *_strGender[VOIPNARRATE_NUMGENDERS] = { "SSML_VOICE_GENDER_UNSPECIFIED", "FEMALE", "MALE", "NEUTRAL" };
|
|
static const char *_strVoice[VOIPNARRATE_NUMGENDERS] = { "en-US-Standard-D", "en-US-Standard-C", "en-US-Standard-B", "en-US-Standard-D" };
|
|
JsonInit(pBody, iBodyLen, JSON_FL_WHITESPACE);
|
|
|
|
JsonObjectStart(pBody, "input");
|
|
JsonAddStr(pBody, "text", pText);
|
|
JsonObjectEnd(pBody);
|
|
|
|
JsonObjectStart(pBody, "voice");
|
|
JsonAddStr(pBody, "languageCode", "en-US");
|
|
JsonAddStr(pBody, "name", _strVoice[eGender]);
|
|
JsonAddStr(pBody, "ssmlGender", _strGender[eGender]); // we specify gender here, but it is unclear if it does anything
|
|
JsonObjectEnd(pBody);
|
|
|
|
JsonObjectStart(pBody, "audioConfig");
|
|
JsonAddStr(pBody, "audioEncoding", "LINEAR16");
|
|
JsonAddInt(pBody, "sampleRateHertz", VOIPNARRATE_SAMPLERATE);
|
|
JsonObjectEnd(pBody);
|
|
|
|
pBody = JsonFinish(pBody);
|
|
return(pBody);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateFormatHeadAmazon
|
|
|
|
\Description
|
|
Format connection header for Amazon Polly
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input *pUrl - [out] buffer for formatted url
|
|
\Input iUrlLen - length of url buffer
|
|
\Input *pHead - [out] buffer for formatted request header
|
|
\Input iHeadLen - length of header buffer
|
|
|
|
\Output
|
|
int32_t - negative=failure, else success
|
|
|
|
\Version 11/21/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static const char *_VoipNarrateFormatHeadAmazon(VoipNarrateRefT *pVoipNarrate, char *pUrl, int32_t iUrlLen, char *pHead, int32_t iHeadLen)
|
|
{
|
|
int32_t iOffset=0;
|
|
// format request header
|
|
iOffset += ds_snzprintf(pHead+iOffset, iHeadLen-iOffset, "Content-Type: application/json\r\n");
|
|
iOffset += ds_snzprintf(pHead+iOffset, iHeadLen-iOffset, "Accept: audio/wav; rate=%d\r\n", VOIPNARRATE_SAMPLERATE);
|
|
// copy url
|
|
ds_strnzcpy(pUrl, pVoipNarrate->Config.strUrl, iUrlLen);
|
|
return(pUrl);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateFormatBodyAmazon
|
|
|
|
\Description
|
|
Format request body for Amazon Polly
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input *pBody - [out] buffer to hold request body
|
|
\Input iBodyLen - buffer length
|
|
\Input eGender - preferred gender for voice narration
|
|
\Input *pText - pointer to text request
|
|
|
|
\Output
|
|
int32_t - negative=failure, else success
|
|
|
|
\Version 12/21/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static char *_VoipNarrateFormatBodyAmazon(VoipNarrateRefT *pVoipNarrate, char *pBody, int32_t iBodyLen, VoipNarrateGenderE eGender, const char *pText)
|
|
{
|
|
JsonInit(pBody, iBodyLen, JSON_FL_WHITESPACE);
|
|
JsonAddStr(pBody, "OutputFormat", "pcm");
|
|
JsonAddStr(pBody, "Text", pText);
|
|
JsonAddStr(pBody, "VoiceId", (eGender == VOIPNARRATE_GENDER_FEMALE) ? "Joanna" : "Joey");
|
|
pBody = JsonFinish(pBody);
|
|
return(pBody);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateStreamCallbackGoogle
|
|
|
|
\Description
|
|
Decode Base64-encoded voice data
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input eStatus - ProtoStream status
|
|
\Input *pData - base64-encoded input data
|
|
\Input iDataSize - input buffer length
|
|
|
|
\Output
|
|
int32_t - negative=failure, else number of input bytes consumed
|
|
|
|
\Version 10/30/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static int32_t _VoipNarrateStreamCallbackGoogle(VoipNarrateRefT *pVoipNarrate, ProtoStreamStatusE eStatus, const uint8_t *pData, int32_t iDataSize)
|
|
{
|
|
int32_t iDataRead, iDataDecoded;
|
|
|
|
// submit any base64-decoded data we have first
|
|
if (pVoipNarrate->iVoiceLen > 0)
|
|
{
|
|
// if start of stream, see if we need to skip WAV header
|
|
if (pVoipNarrate->bStart)
|
|
{
|
|
pVoipNarrate->iVoiceOff = _VoipNarrateSkipWavHeader(pVoipNarrate->aVoiceBuf, pVoipNarrate->iVoiceLen);
|
|
pVoipNarrate->iVoiceLen -= pVoipNarrate->iVoiceOff;
|
|
pVoipNarrate->bStart = FALSE;
|
|
}
|
|
// pass data to user
|
|
iDataRead = pVoipNarrate->pVoiceDataCb(pVoipNarrate, pVoipNarrate->iUserIndex, (const int16_t *)(pVoipNarrate->aVoiceBuf+pVoipNarrate->iVoiceOff), pVoipNarrate->iVoiceLen, pVoipNarrate->pUserData);
|
|
// mark data as read
|
|
pVoipNarrate->iVoiceOff += iDataRead;
|
|
pVoipNarrate->iVoiceLen -= iDataRead;
|
|
}
|
|
// if we don't have data to decode, or we still have decoded voice data that hasn't been consumed yet, don't decode more
|
|
if ((pVoipNarrate->iVoiceLen > 0) || (iDataSize <= 0))
|
|
{
|
|
return(0);
|
|
}
|
|
pVoipNarrate->iVoiceOff = 0;
|
|
|
|
// base64-decode voice data
|
|
if ((iDataRead = _VoipNarrateBase64Decode(pVoipNarrate, (char *)pVoipNarrate->aVoiceBuf, (iDataDecoded = sizeof(pVoipNarrate->aVoiceBuf), &iDataDecoded), (const char *)pData, iDataSize)) >= 0)
|
|
{
|
|
pVoipNarrate->iVoiceLen = iDataDecoded;
|
|
pVoipNarrate->iSamplesInPhrase += iDataDecoded;
|
|
pData = pVoipNarrate->aVoiceBuf;
|
|
}
|
|
else
|
|
{
|
|
NetPrintf(("voipnarrate: error; could not base64 decode data\n"));
|
|
NetPrintMem(pData, iDataSize, "base64 data");
|
|
pVoipNarrate->Metrics.uErrorCount += 1;
|
|
}
|
|
return(iDataRead);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateStreamCallback
|
|
|
|
\Description
|
|
Receive streamed voice data and submit it to callback
|
|
|
|
\Input *pProtoStream - ProtoStream module state
|
|
\Input eStatus - ProtoStream status
|
|
\Input *pData - base64-encoded input data
|
|
\Input iDataSize - input buffer length
|
|
\Input *pUserData - callback user data (VoipNarrate module ref)
|
|
|
|
\Output
|
|
int32_t - negative=failure, else number of input bytes consumed
|
|
|
|
\Version 10/30/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static int32_t _VoipNarrateStreamCallback(ProtoStreamRefT *pProtoStream, ProtoStreamStatusE eStatus, const uint8_t *pData, int32_t iDataSize, void *pUserData)
|
|
{
|
|
VoipNarrateRefT *pVoipNarrate = (VoipNarrateRefT *)pUserData;
|
|
int32_t iDataRead, iResult;
|
|
char strError[256] = "";
|
|
|
|
// handle start callback notification
|
|
if (eStatus == PROTOSTREAM_STATUS_BEGIN)
|
|
{
|
|
pVoipNarrate->iSamplesInPhrase = 0;
|
|
pVoipNarrate->Metrics.uDelay += NetTickDiff(NetTick(), pVoipNarrate->uTtsStartTime);
|
|
pVoipNarrate->pVoiceDataCb(pVoipNarrate, pVoipNarrate->iUserIndex, (const int16_t *)pData, VOIPNARRATE_STREAM_START, pVoipNarrate->pUserData);
|
|
}
|
|
// handle end callback notification
|
|
if (eStatus == PROTOSTREAM_STATUS_DONE)
|
|
{
|
|
// save metrics
|
|
int32_t iPhraseDuration = ((pVoipNarrate->iSamplesInPhrase * 1000) / VOIPNARRATE_SAMPLERATE);
|
|
pVoipNarrate->Metrics.uDurationMsRecv += iPhraseDuration;
|
|
if (iPhraseDuration < VOIPNARRATE_EMPTY_AUDIO_THRESHOLD_MS)
|
|
{
|
|
pVoipNarrate->Metrics.uEmptyResultCount += 1;
|
|
}
|
|
|
|
// signal end of stream
|
|
pVoipNarrate->pVoiceDataCb(pVoipNarrate, pVoipNarrate->iUserIndex, (const int16_t *)pData, VOIPNARRATE_STREAM_END, pVoipNarrate->pUserData);
|
|
pVoipNarrate->bActive = FALSE;
|
|
|
|
// check for a completion result that is not successful, and log error response (if any) to debug output
|
|
if ((iResult = ProtoStreamStatus(pProtoStream, 'code', NULL, 0)) != PROTOHTTP_RESPONSE_SUCCESSFUL)
|
|
{
|
|
ProtoStreamStatus(pProtoStream, 'serr', strError, sizeof(strError));
|
|
NetPrintf(("voipnarrate: stream failed with http result %d:\n%s\n", iResult, strError));
|
|
pVoipNarrate->Metrics.uErrorCount += 1;
|
|
}
|
|
}
|
|
|
|
// read data and pass it to callback, processing as necessary
|
|
for (iDataRead = 0, iResult = 1; (iResult > 0) && (iDataSize > 0); )
|
|
{
|
|
if (pVoipNarrate->Config.eProvider != VOIPNARRATE_PROVIDER_GOOGLE)
|
|
{
|
|
// if start of stream, see if we need to skip WAV header
|
|
if ((pVoipNarrate->bStart) && (iDataSize > 0))
|
|
{
|
|
iDataRead = _VoipNarrateSkipWavHeader(pData, iDataSize);
|
|
pData += iDataRead;
|
|
iDataSize -= iDataRead;
|
|
pVoipNarrate->bStart = FALSE;
|
|
}
|
|
iResult = pVoipNarrate->pVoiceDataCb(pVoipNarrate, pVoipNarrate->iUserIndex, (const int16_t *)pData, iDataSize, pVoipNarrate->pUserData);
|
|
pVoipNarrate->iSamplesInPhrase += iResult;
|
|
}
|
|
else
|
|
{
|
|
// google-specific processing to deal with base64 encoded audio
|
|
iResult = _VoipNarrateStreamCallbackGoogle(pVoipNarrate, eStatus, pData, iDataSize);
|
|
}
|
|
|
|
iDataRead += iResult;
|
|
iDataSize -= iResult;
|
|
pData += iResult;
|
|
}
|
|
return(iDataRead);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateStart
|
|
|
|
\Description
|
|
Receive streamed voice data and submit it to callback
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input iUserIndex - local user index of user who is requesting speech synthesis
|
|
\Input eGender - preferred gender for voice for narration
|
|
\Input *pText - pointer to text request
|
|
|
|
\Output
|
|
int32_t - ProtoStream request result
|
|
|
|
\Version 10/25/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static int32_t _VoipNarrateStart(VoipNarrateRefT *pVoipNarrate, int32_t iUserIndex, VoipNarrateGenderE eGender, const char *pText)
|
|
{
|
|
const char *pUrl, *pReq;
|
|
char strUrl[256];
|
|
int32_t iResult;
|
|
|
|
// format header/url and request body
|
|
if (pVoipNarrate->Config.eProvider == VOIPNARRATE_PROVIDER_MICROSOFT)
|
|
{
|
|
pUrl = _VoipNarrateFormatHeadMicrosoft(pVoipNarrate, strUrl, sizeof(strUrl), pVoipNarrate->strHead, sizeof(pVoipNarrate->strHead));
|
|
pReq = _VoipNarrateFormatBodyMicrosoft(pVoipNarrate, pVoipNarrate->strBody, sizeof(pVoipNarrate->strBody), eGender, pText);
|
|
}
|
|
else if (pVoipNarrate->Config.eProvider == VOIPNARRATE_PROVIDER_GOOGLE)
|
|
{
|
|
pUrl = _VoipNarrateFormatHeadGoogle(pVoipNarrate, strUrl, sizeof(strUrl), pVoipNarrate->strHead, sizeof(pVoipNarrate->strHead));
|
|
pReq = _VoipNarrateFormatBodyGoogle(pVoipNarrate, pVoipNarrate->strBody, sizeof(pVoipNarrate->strBody), eGender, pText);
|
|
}
|
|
else if (pVoipNarrate->Config.eProvider == VOIPNARRATE_PROVIDER_IBMWATSON)
|
|
{
|
|
pUrl = _VoipNarrateFormatHeadWatson(pVoipNarrate, strUrl, sizeof(strUrl), pVoipNarrate->strHead, sizeof(pVoipNarrate->strHead), eGender);
|
|
pReq = _VoipNarrateFormatBodyWatson(pVoipNarrate, pVoipNarrate->strBody, sizeof(pVoipNarrate->strBody), pText);
|
|
}
|
|
else if (pVoipNarrate->Config.eProvider == VOIPNARRATE_PROVIDER_AMAZON)
|
|
{
|
|
pUrl = _VoipNarrateFormatHeadAmazon(pVoipNarrate, strUrl, sizeof(strUrl), pVoipNarrate->strHead, sizeof(pVoipNarrate->strHead));
|
|
pReq = _VoipNarrateFormatBodyAmazon(pVoipNarrate, pVoipNarrate->strBody, sizeof(pVoipNarrate->strBody), eGender, pText);
|
|
}
|
|
else
|
|
{
|
|
NetPrintf(("voipnarrate: undefined provider\n"));
|
|
return(-1);
|
|
}
|
|
NetPrintfVerbose((pVoipNarrate->iVerbose, 1, "voipnarrate: request body\n%s\n", pReq));
|
|
pVoipNarrate->pBody = pReq;
|
|
|
|
// set request header
|
|
ProtoStreamControl(pVoipNarrate->pProtoStream, 'apnd', 0, 0, pVoipNarrate->strHead);
|
|
|
|
pVoipNarrate->Metrics.uEventCount += 1;
|
|
pVoipNarrate->Metrics.uCharCountSent += (uint32_t)strlen(pText);
|
|
pVoipNarrate->uTtsStartTime = NetTick();
|
|
|
|
// make the request
|
|
if ((iResult = ProtoStreamOpen2(pVoipNarrate->pProtoStream, pUrl, pReq, PROTOSTREAM_FREQ_ONCE)) >= 0)
|
|
{
|
|
// mark as stream start and active
|
|
pVoipNarrate->bStart = pVoipNarrate->bActive = TRUE;
|
|
}
|
|
else
|
|
{
|
|
NetPrintf(("voipnarrate: failed to open stream\n"));
|
|
pVoipNarrate->Metrics.uErrorCount += 1;
|
|
}
|
|
|
|
// return to caller
|
|
return(iResult);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function _VoipNarrateConfig
|
|
|
|
\Description
|
|
Configure the VoipNarrate module
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input *pConfig - module configuration to set
|
|
|
|
\Output
|
|
uint32_t - TRUE if configured successfully
|
|
|
|
\Version 11/07/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
static uint32_t _VoipNarrateConfig(VoipNarrateRefT *pVoipNarrate, VoipNarrateConfigT *pConfig)
|
|
{
|
|
uint8_t uRet = TRUE;
|
|
NetCritEnter(NULL);
|
|
if (pConfig->eProvider != VOIPNARRATE_PROVIDER_NONE)
|
|
{
|
|
ds_memcpy_s(&pVoipNarrate->Config, sizeof(pVoipNarrate->Config), pConfig, sizeof(*pConfig));
|
|
}
|
|
else
|
|
{
|
|
NetPrintfVerbose((pVoipNarrate->iVerbose, 0, "voipnarrate: narration disabled\n"));
|
|
ds_memclr(&pVoipNarrate->Config, sizeof(pVoipNarrate->Config));
|
|
uRet = FALSE;
|
|
}
|
|
NetCritLeave(NULL);
|
|
return(uRet);
|
|
}
|
|
|
|
|
|
/*** Public functions *************************************************************/
|
|
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function VoipNarrateCreate
|
|
|
|
\Description
|
|
Create the narration module
|
|
|
|
\Input *pVoiceDataCb - callback used to provide voice data
|
|
\Input *pUserData - callback user data
|
|
|
|
\Output
|
|
VoipNarrateRefT * - new module state, or NULL
|
|
|
|
\Version 10/25/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
VoipNarrateRefT *VoipNarrateCreate(VoipNarrateVoiceDataCbT *pVoiceDataCb, void *pUserData)
|
|
{
|
|
VoipNarrateRefT *pVoipNarrate;
|
|
int32_t iMemGroup;
|
|
void *pMemGroupUserData;
|
|
|
|
// query current mem group data
|
|
DirtyMemGroupQuery(&iMemGroup, &pMemGroupUserData);
|
|
|
|
// validate callback
|
|
if (pVoiceDataCb == NULL)
|
|
{
|
|
NetPrintf(("voipnarrate: could not create module with null callback\n"));
|
|
return(NULL);
|
|
}
|
|
|
|
// allocate and init module state
|
|
if ((pVoipNarrate = DirtyMemAlloc(sizeof(*pVoipNarrate), VOIPNARRATE_MEMID, iMemGroup, pMemGroupUserData)) == NULL)
|
|
{
|
|
NetPrintf(("voipnarrate: could not allocate module state\n"));
|
|
return(NULL);
|
|
}
|
|
ds_memclr(pVoipNarrate, sizeof(*pVoipNarrate));
|
|
pVoipNarrate->iMemGroup = iMemGroup;
|
|
pVoipNarrate->pMemGroupUserData = pMemGroupUserData;
|
|
pVoipNarrate->pVoiceDataCb = pVoiceDataCb;
|
|
pVoipNarrate->pUserData = pUserData;
|
|
pVoipNarrate->iVerbose = 1;
|
|
|
|
// allocate streaming module with a buffer to hold up to 1s of 16khz 16bit streaming audio
|
|
if ((pVoipNarrate->pProtoStream = ProtoStreamCreate(16*2*1024)) == NULL)
|
|
{
|
|
VoipNarrateDestroy(pVoipNarrate);
|
|
return(NULL);
|
|
}
|
|
// set protostream callback with a 20ms call rate
|
|
ProtoStreamSetCallback(pVoipNarrate->pProtoStream, 20, _VoipNarrateStreamCallback, pVoipNarrate);
|
|
// set protostream minimum data amount (for base64 decoding; four is the minimum amount but that produces one and a half samples, so we choose eight)
|
|
ProtoStreamControl(pVoipNarrate->pProtoStream, 'minb', 8, 0, NULL);
|
|
// set protostream debug level
|
|
ProtoStreamControl(pVoipNarrate->pProtoStream, 'spam', 1, 0, NULL);
|
|
// set keepalive
|
|
ProtoStreamControl(pVoipNarrate->pProtoStream, 'keep', 1, 0, NULL);
|
|
// set protostream http custom header callback, used to sign AWS requests
|
|
ProtoStreamSetHttpCallback(pVoipNarrate->pProtoStream, _VoipNarrateCustomHeaderCb, NULL, pVoipNarrate);
|
|
|
|
// configure for particular provider
|
|
if (!_VoipNarrateConfig(pVoipNarrate, &_VoipNarrate_Config))
|
|
{
|
|
NetPrintf(("voipnarrate: could not configure for provider\n"));
|
|
VoipNarrateDestroy(pVoipNarrate);
|
|
return(NULL);
|
|
}
|
|
|
|
// return ref to caller
|
|
return(pVoipNarrate);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function VoipNarrateConfig
|
|
|
|
\Description
|
|
Set global state to configure the VoipNarrate modules
|
|
|
|
\Input eProvider - VOIPNARRATE_PROVIDER_* (VOIPNARRATE_PROVIDER_NONE to disable)
|
|
\Input *pUrl - pointer to url to use for tts requests
|
|
\Input *pKey - pointer to authentication key to use for tts requests
|
|
|
|
\Version 11/07/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
void VoipNarrateConfig(VoipNarrateProviderE eProvider, const char *pUrl, const char *pKey)
|
|
{
|
|
NetCritEnter(NULL);
|
|
_VoipNarrate_Config.eProvider = eProvider;
|
|
ds_strnzcpy(_VoipNarrate_Config.strUrl, pUrl, sizeof(_VoipNarrate_Config.strUrl));
|
|
ds_strnzcpy(_VoipNarrate_Config.strKey, pKey, sizeof(_VoipNarrate_Config.strKey));
|
|
NetCritLeave(NULL);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function VoipNarrateDestroy
|
|
|
|
\Description
|
|
Destroy the VoipNarrate module
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
|
|
\Version 10/25/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
void VoipNarrateDestroy(VoipNarrateRefT *pVoipNarrate)
|
|
{
|
|
// destroy protostream module, if allocated
|
|
if (pVoipNarrate->pProtoStream != NULL)
|
|
{
|
|
ProtoStreamDestroy(pVoipNarrate->pProtoStream);
|
|
}
|
|
// release any queued requests
|
|
while (pVoipNarrate->pRequest != NULL)
|
|
{
|
|
_VoipNarrateRequestGet(pVoipNarrate, NULL);
|
|
}
|
|
// dispose of module memory
|
|
DirtyMemFree(pVoipNarrate, VOIPNARRATE_MEMID, pVoipNarrate->iMemGroup, pVoipNarrate->pMemGroupUserData);
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function VoipNarrateInput
|
|
|
|
\Description
|
|
Input text to be convert to speech
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input iUserIndex - local user index of user who is requesting speech synthesis
|
|
\Input eGender - preferred gender for voice narration
|
|
\Input *pText - text to be converted
|
|
|
|
\Output
|
|
int32_t - zero=success, otherwise=failure
|
|
|
|
\Version 10/25/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
int32_t VoipNarrateInput(VoipNarrateRefT *pVoipNarrate, int32_t iUserIndex, VoipNarrateGenderE eGender, const char *pText)
|
|
{
|
|
// make sure a provider is configured
|
|
if (pVoipNarrate->Config.eProvider == VOIPNARRATE_PROVIDER_NONE)
|
|
{
|
|
NetPrintfVerbose((pVoipNarrate->iVerbose, 0, "voipnarrate: no provider configured\n"));
|
|
return(-1);
|
|
}
|
|
// handle if there is already narration ongoing
|
|
if (pVoipNarrate->bActive)
|
|
{
|
|
NetPrintfVerbose((pVoipNarrate->iVerbose, 1, "voipnarrate: queueing request '%s'\n", pText));
|
|
return(_VoipNarrateRequestAdd(pVoipNarrate, iUserIndex, eGender, pText));
|
|
}
|
|
// if ready, start the request
|
|
return(_VoipNarrateStart(pVoipNarrate, iUserIndex, eGender, pText));
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function VoipNarrateStatus
|
|
|
|
\Description
|
|
Get module status.
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input iStatus - status selector
|
|
\Input iValue - selector specific
|
|
\Input *pBuffer - selector specific
|
|
\Input iBufSize - selector specific
|
|
|
|
\Output
|
|
int32_t - selector specific
|
|
|
|
\Notes
|
|
Other status codes are passed down to the stream transport handler.
|
|
|
|
\verbatim
|
|
'ttsm' - get the VoipTextToSpeechMetricsT via pBuffer
|
|
\endverbatim
|
|
|
|
\Version 11/15/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
int32_t VoipNarrateStatus(VoipNarrateRefT *pVoipNarrate, int32_t iStatus, int32_t iValue, void *pBuffer, int32_t iBufSize)
|
|
{
|
|
if (iStatus == 'ttsm')
|
|
{
|
|
if ((pBuffer != NULL) && (iBufSize >= (int32_t)sizeof(VoipTextToSpeechMetricsT)))
|
|
{
|
|
ds_memcpy_s(pBuffer, iBufSize, &pVoipNarrate->Metrics, sizeof(VoipTextToSpeechMetricsT));
|
|
return(0);
|
|
}
|
|
return(-1);
|
|
}
|
|
return(ProtoStreamStatus(pVoipNarrate->pProtoStream, iStatus, pBuffer, iBufSize));
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function VoipNarrateControl
|
|
|
|
\Description
|
|
Set control options
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
\Input iControl - control selector
|
|
\Input iValue - selector specific
|
|
\Input iValue2 - selector specific
|
|
\Input *pValue - selector specific
|
|
|
|
\Output
|
|
int32_t - selector specific
|
|
|
|
\Notes
|
|
iStatus can be one of the following:
|
|
|
|
\verbatim
|
|
'ctsm' - clear text to speech metrics in VoipTextToSpeechMetricsT
|
|
'spam' - set verbose debug level (debug only)
|
|
\endverbatim
|
|
|
|
Unhandled codes are passed through to the stream transport handler
|
|
|
|
\Version 08/30/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
int32_t VoipNarrateControl(VoipNarrateRefT *pVoipNarrate, int32_t iControl, int32_t iValue, int32_t iValue2, void *pValue)
|
|
{
|
|
if (iControl == 'ctsm')
|
|
{
|
|
ds_memclr(&(pVoipNarrate->Metrics), sizeof(pVoipNarrate->Metrics));
|
|
return(0);
|
|
}
|
|
#if DIRTYCODE_LOGGING
|
|
// set verbosity for us and pass through to stream transport handler
|
|
if (iControl == 'spam')
|
|
{
|
|
pVoipNarrate->iVerbose = iValue;
|
|
}
|
|
#endif
|
|
// if not handled, let stream transport handler take a stab at it
|
|
return(ProtoStreamControl(pVoipNarrate->pProtoStream, iControl, iValue, iValue2, pValue));
|
|
}
|
|
|
|
/*F********************************************************************************/
|
|
/*!
|
|
\Function VoipNarrateUpdate
|
|
|
|
\Description
|
|
Update the narration module
|
|
|
|
\Input *pVoipNarrate - pointer to module state
|
|
|
|
\Version 10/25/2018 (jbrookes)
|
|
*/
|
|
/********************************************************************************F*/
|
|
void VoipNarrateUpdate(VoipNarrateRefT *pVoipNarrate)
|
|
{
|
|
// see if we need to start a queued narration request
|
|
if ((pVoipNarrate->pRequest != NULL) && !pVoipNarrate->bActive)
|
|
{
|
|
VoipNarrateRequestT Request;
|
|
_VoipNarrateRequestGet(pVoipNarrate, &Request);
|
|
_VoipNarrateStart(pVoipNarrate, Request.iUserIndex, Request.eGender, Request.strText);
|
|
}
|
|
// give life to stream module
|
|
ProtoStreamUpdate(pVoipNarrate->pProtoStream);
|
|
}
|