Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 43 additions & 1 deletion .github/workflows/generate-podcast.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,25 @@ name: generate-podcast

on:
workflow_dispatch:
inputs:
debug:
description: 'Run in debug mode'
required: false
default: 'false'
type: boolean

# every morning at 4am EST
schedule:
- cron: '0 9 * * *' # 4am EST

permissions:
contents: read
actions: write # for caching

env:
NODE_VERSION: 23.6.0
PNPM_VERSION: 9.15.4
COVERED_STORIES_CACHE_KEY: ${{ github.ref }}-covered-stories

jobs:
podcast:
Expand All @@ -30,12 +38,46 @@ jobs:
pnpm-version: ${{ env.PNPM_VERSION }}
pnpm-install-cache-key: pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }}

- name: Create cache directory
run: mkdir ./cache

# Restore cache from cache/covered-stories
- name: Restore covered-stories
id: cache-restore
uses: actions/cache/restore@v4
with:
path: ./cache/covered-stories
key: ${{ env.COVERED_STORIES_CACHE_KEY }}

- run: pnpm start
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
DEBUG: ${{ github.event.inputs.debug }}

- name: Delete Previous Cache
if: ${{ steps.cache-restore.outputs.cache-hit }}
continue-on-error: true
run: |
gh extension install actions/gh-actions-cache
gh actions-cache delete "${{ env.COVERED_STORIES_CACHE_KEY }}" --confirm
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# Cache cache/covered-stories for next run
- name: Cache covered-stories
uses: actions/cache/save@v4
with:
path: ./cache/covered-stories
key: ${{ env.COVERED_STORIES_CACHE_KEY }}

- uses: actions/upload-artifact@v4
with:
name: output-$(date +'%Y-%m-%d')
name: output
path: output
retention-days: 1

- uses: actions/upload-artifact@v4
with:
name: covered-stories
path: ./cache/covered-stories
retention-days: 7
91 changes: 73 additions & 18 deletions src/lib/hn.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,88 @@ const logger = childLogger('HN')

export async function fetchTopStories(count: number = 10): Promise<StoryOutput[]> {
logger.info(`Fetching top ${count} stories...`)

// Fetch additional stories to account for stories covered in previous episodes
const response = await fetch(
`https://hn.algolia.com/api/v1/search?tags=front_page&hitsPerPage=${count}`,
`https://hn.algolia.com/api/v1/search?tags=front_page&hitsPerPage=${count + 10}`,
)
const data = (await response.json()) as ResponseData

// Extract only the data we need
const slim = data.hits.map(s => ({
title: s.title,
url: s.url,
storyId: s.story_id,
}))
const slim = data.hits.map(s => {
return {
title: s.title,
url: s.url,
storyId: s.story_id,
story_text: s.story_text,
}
})

// Check if we have already covered the story
const covered = await readFromCache('covered-stories')
const coveredStories: number[] = covered ? JSON.parse(covered) : []

logger.info(`Found ${coveredStories.length} covered stories`, {
coveredStories,
})

// Filter out stories we have already covered
const filtered = slim
.filter(s => {
const wasCovered = coveredStories.includes(s.storyId)
logger.info({ storyId: s.storyId, wasCovered })
return !wasCovered
})
.slice(0, count)

logger.debug({ filtered })

if (filtered.length < count) {
const msg = `Not enough stories to cover. Found ${filtered.length}, expected ${count}`
logger.error(msg)
throw new Error(msg)
}

const newCovered = [...coveredStories, ...filtered.map(s => s.storyId)]
logger.debug({ newCovered })

// Save the covered stories
await writeToCache('covered-stories', JSON.stringify(newCovered))

// Fetch the content and comments for each story
const output: StoryOutput[] = []
for (const hit of slim) {
logger.info(`Fetching [${hit.storyId}] - ${hit.title} - ${hit.url}`)
const cacheKey = 'story-' + hit.storyId.toString()
for (const story of filtered) {
const comments = await fetchStoryDataById(story.storyId)
logger.info(`Fetching [${story.storyId}] - ${story.title} - ${story.url}`)
const cacheKey = 'story-' + story.storyId.toString()

let htmlString = await readFromCache(cacheKey)

const baseStoryOutput: Pick<StoryOutput, 'comments' | 'hnUrl' | 'storyId' | 'title'> = {
title: story.title,
storyId: story.storyId,
comments,
hnUrl: `https://news.ycombinator.com/item?id=${story.storyId}`,
}

// Ask HN posts don't have a url, but have a story_text
if (!story.url && story.story_text) {
output.push({
content: story.story_text,
source: 'Hacker News',
...baseStoryOutput,
})
}

if (!story.url) {
logger.error(`No url or story text found for story ${story.storyId}`)
continue
}

if (!htmlString) {
htmlString = await fetch(hit.url).then(res => res.text())
htmlString = await fetch(story.url).then(res => res.text())
if (!htmlString) {
logger.info(`No content found for ${hit.url}`)
logger.info(`No content found for ${story.url}`)
continue
}
await writeToCache(cacheKey, htmlString)
Expand All @@ -54,16 +113,12 @@ export async function fetchTopStories(count: number = 10): Promise<StoryOutput[]

logger.debug({ byline, excerpt, siteName })

const comments = await fetchStoryDataById(hit.storyId)
output.push({
content: textContent,
comments,
title: hit.title,
url: hit.url,
storyId: hit.storyId,
url: story.url,
// strip http(s), parse just the domain from the url
source: siteName ?? byline ?? new URL(hit.url).hostname.replace('www.', ''),
hnUrl: `https://news.ycombinator.com/item?id=${hit.storyId}`,
source: siteName ?? byline ?? new URL(story.url).hostname.replace('www.', ''),
...baseStoryOutput,
})
}

Expand Down
5 changes: 3 additions & 2 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export type StoryOutput = {
content: string
comments: SlimComment[]
title: string
url: string
url?: string
storyId: number
/** Sitename, byline, or readable hostname */
source: string
Expand Down Expand Up @@ -73,7 +73,8 @@ export type Hit = {
story_id: number
title: string
updated_at: string
url: string
url?: string
story_text?: string
}

export type ProcessingTimingsMS = {
Expand Down