-
Notifications
You must be signed in to change notification settings - Fork 12
/
deno_tts_api.ts
107 lines (90 loc) · 3.49 KB
/
deno_tts_api.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import { serve } from "https://deno.land/std/http/server.ts";
import { EdgeSpeechTTS } from "https://esm.sh/@lobehub/tts@1";
const AUTH_TOKEN = Deno.env.get("AUTH_TOKEN");
const VOICES_URL = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4";
async function fetchVoiceList() {
const response = await fetch(VOICES_URL);
const voices = await response.json();
return voices.reduce((acc: Record<string, { model: string, name: string, friendlyName: string, locale: string }[]>, voice: any) => {
const { ShortName: model, ShortName: name, FriendlyName: friendlyName, Locale: locale } = voice;
if (!acc[locale]) acc[locale] = [];
acc[locale].push({ model, name, friendlyName, locale });
return acc;
}, {});
}
async function synthesizeSpeech(model: string, voice: string, text: string) {
const voiceName = model;
const params = Object.fromEntries(
voice.split("|").map((p) => p.split(":") as [string, string])
);
const rate = Number(params["rate"] || 0);
const pitch = Number(params["pitch"] || 0);
const tts = new EdgeSpeechTTS();
const payload = {
input: text,
options: {
rate: rate,
pitch: pitch,
voice: voiceName
},
};
const response = await tts.create(payload);
const mp3Buffer = new Uint8Array(await response.arrayBuffer());
console.log(`Successfully synthesized speech, returning audio/mpeg response`);
return new Response(mp3Buffer, {
headers: { "Content-Type": "audio/mpeg" },
});
}
function unauthorized(req: Request) {
const authHeader = req.headers.get("Authorization");
return AUTH_TOKEN && authHeader !== `Bearer ${AUTH_TOKEN}`;
}
async function handleSynthesisRequest(req: Request) {
if (unauthorized(req)) {
console.log("Unauthorized request");
return new Response("Unauthorized", { status: 401 });
}
if (req.method !== "GET") {
console.log(`Invalid method ${req.method}, expected GET`);
return new Response("Method Not Allowed", { status: 405 });
}
const url = new URL(req.url);
const model = url.searchParams.get("model");
const input = url.searchParams.get("input");
const voice = url.searchParams.get("voice");
if (!model || !input || !voice) {
console.log("Missing required parameters");
return new Response("Bad Request", { status: 400 });
}
console.log(`Synthesis request with model=${model}, input=${input}, voice=${voice}`);
return synthesizeSpeech(model, voice, input);
}
serve(async (req) => {
try {
const url = new URL(req.url);
if (url.pathname !== "/v1/audio/speech") {
console.log(`Unhandled path ${url.pathname}`);
return new Response("Not Found", { status: 404 });
}
if (req.method === "OPTIONS") {
return new Response(null, {
status: 204,
headers: {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "POST, GET, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type, Authorization"
}
});
}
const response = await handleSynthesisRequest(req);
response.headers.set("Access-Control-Allow-Origin", "*");
response.headers.set("Access-Control-Allow-Methods", "POST, GET, OPTIONS");
response.headers.set("Access-Control-Allow-Headers", "Content-Type, Authorization");
return response;
} catch (err) {
console.error(`Error processing request: ${err.message}`);
return new Response(`Internal Server Error\n${err.message}`, {
status: 500,
});
}
});