feat: tts service for switching out providers

denolfe · denolfe · commit 0b28a4bd2158 · 2025-01-18T22:20:45.000-05:00
diff --git a/src/index.ts b/src/index.ts
@@ -4,6 +4,7 @@ import { generatePodcastIntro, summarize } from './lib/ai'
 import { generateAudioFromText, joinAudioFiles } from './lib/audio'
 import { OUTPUT_DIR, podcastOutro as outro } from './lib/constants'
 import { fetchTopStories } from './lib/hn'
+import { getTtsService } from './lib/services'
 import { generateShowNotes } from './lib/show-notes'
 import { initCacheDir } from './utils/cache'
 import { loadEnvIfExists } from './utils/env'
@@ -17,14 +18,18 @@ const args = process.argv.slice(2)
 async function main() {
   await initOutputDir()
   await initCacheDir()
+  const ttsService = getTtsService()
   const storyData = await fetchTopStories(args[0] ? parseInt(args[0]) : 10)
   const intro = await generatePodcastIntro(storyData)
   const summaries = await summarize(storyData)
-  const audioFilenames = await generateAudioFromText([
-    { summary: intro.text, storyId: intro.cacheKey },
-    ...summaries,
-    { summary: outro, storyId: 'outro' },
-  ])
+  const audioFilenames = await generateAudioFromText(
+    [
+      { summary: intro.text, storyId: intro.cacheKey },
+      ...summaries,
+      { summary: outro, storyId: 'outro' },
+    ],
+    ttsService,
+  )
   await joinAudioFiles(audioFilenames, path.resolve(OUTPUT_DIR, 'output.mp3'))
   await generateShowNotes({ stories: summaries, introText: intro.text })
   log.info('Done!')
diff --git a/src/lib/audio.ts b/src/lib/audio.ts
@@ -1,7 +1,7 @@
 import ffmpeg from 'fluent-ffmpeg'
 import path from 'path'
 
-import type { StoryDataAggregate } from '../types'
+import type { StoryDataAggregate, TtsService } from '../types'
 
 import { CACHE_DIR } from '../lib/constants'
 import { readFromCache, writeToCache } from '../utils/cache'
@@ -18,6 +18,7 @@ type PodcastSegment = {
 
 export async function generateAudioFromText(
   storyData: (PodcastSegment | StoryDataAggregate)[],
+  ttsService: TtsService,
 ): Promise<string[]> {
   const audioFilenames: string[] = []
 
@@ -33,30 +34,9 @@ export async function generateAudioFromText(
 
     logger.info(`[${i + 1}/${storyData.length}] Generating audio: ${story.storyId}...`)
     try {
-      if (process.env.VOICE_SERVICE === 'elevenlabs') {
-        const audioStream = await getElevenLabsClient().textToSpeech.convert(
-          '56AoDkrOh6qfVPDXZ7Pt', // Cassidy
-          {
-            text: story.summary as string,
-            model_id: 'eleven_turbo_v2',
-          },
-        )
-        logger.info('Received back audio stream')
-        const buffer = await streamToBuffer(audioStream)
-        logger.info(`Audio file generated: ${filename}`)
-        await writeToCache(filename, buffer)
-      } else {
-        const mp3 = await getOpenAIClient().audio.speech.create({
-          model: 'tts-1-hd',
-          voice: 'nova',
-          input: story.summary as string,
-        })
-
-        const buffer = Buffer.from(await mp3.arrayBuffer())
-        logger.info(`Audio file generated: ${filename}`)
-        await writeToCache(filename, buffer)
-      }
-
+      const buffer = await ttsService.convert(story.summary as string)
+      logger.info(`Audio file generated: ${filename}`)
+      await writeToCache(filename, buffer)
       audioFilenames.push(filename)
     } catch (error) {
       logger.error(`Error generating audio for story: ${story.storyId}\nsummary: ${story.summary}`)
@@ -107,11 +87,3 @@ function insertBetween(array: string[], itemToInsert: string): string[] {
     return acc
   }, [] as string[])
 }
-
-async function streamToBuffer(stream: NodeJS.ReadableStream) {
-  const chunks: (Buffer | string)[] = []
-  for await (const chunk of stream) {
-    chunks.push(chunk)
-  }
-  return Buffer.concat(chunks as Buffer[])
-}
diff --git a/src/lib/services.ts b/src/lib/services.ts
@@ -0,0 +1,44 @@
+import type { TtsService } from '../types'
+
+import { log } from '../utils/log'
+import { getElevenLabsClient, getOpenAI } from './clients'
+
+export const getTtsService: () => TtsService = () => {
+  log.info(`Using voice service: ${process.env.VOICE_SERVICE || 'openai'}`)
+  if (process.env.VOICE_SERVICE === 'elevenlabs') {
+    const client = getElevenLabsClient()
+    return {
+      convert: async (text: string) => {
+        const audioStream = await client.textToSpeech.convert(
+          '56AoDkrOh6qfVPDXZ7Pt', // Cassidy
+          {
+            text,
+            model_id: 'eleven_turbo_v2',
+          },
+        )
+        return await streamToBuffer(audioStream)
+      },
+    }
+  } else {
+    const client = getOpenAI()
+    return {
+      convert: async (text: string) => {
+        const mp3 = await client.audio.speech.create({
+          model: 'tts-1-hd',
+          voice: 'nova',
+          input: text,
+        })
+
+        return Buffer.from(await mp3.arrayBuffer())
+      },
+    }
+  }
+}
+
+async function streamToBuffer(stream: NodeJS.ReadableStream) {
+  const chunks: (Buffer | string)[] = []
+  for await (const chunk of stream) {
+    chunks.push(chunk)
+  }
+  return Buffer.concat(chunks as Buffer[])
+}
diff --git a/src/types.ts b/src/types.ts
@@ -104,3 +104,7 @@ export type ResponseData = {
   query: string
   serverTimeMS: number
 }
+
+export type TtsService = {
+  convert: (text: string) => Promise<Buffer>
+}

Original file line number	Diff line number	Diff line change
`@@ -104,3 +104,7 @@ export type ResponseData = {`
`104`	`104`	`query: string`
`105`	`105`	`serverTimeMS: number`
`106`	`106`	`}`
	`107`	`+`
	`108`	`+export type TtsService = {`
	`109`	`+ convert: (text: string) => Promise<Buffer>`
	`110`	`+}`