implement some feautres in transcript search

This commit is contained in:
2024-12-26 13:37:33 -06:00
parent f705cca038
commit 0dcb7634ae
8 changed files with 101 additions and 30 deletions

View File

@@ -29,9 +29,9 @@ module.exports = {
async function podcastData (data) {
var file_stem
if (data.season === 1) {
if (data.season == 1) {
file_stem = `ep${data.episode}`
} if (data.season === 2) {
} else if (data.season == 2) {
file_stem = `${seasonEpisodeFormat(null, {...data, episodePrefix: "ep"}).toLowerCase()}`
} else {
file_stem = `${seasonEpisodeFormat(null, data).toLowerCase()}`

View File

@@ -23,14 +23,14 @@ override:eleventyComputed: []
let search_index_promise = fetch('../transcript-index')
.then((res)=>res.json())
.then((documents)=>{
docs = documents.map(({title, episode, season, url, cues})=>cues.map(({startTime, text})=>({title, episode, season, url, startTime, text}))).flat()
console.log(documents)
docs = documents.map(({title, episode, season, url, transcriptPageUrl, segments})=>segments.map(({start, text, id})=>({segment_id:id, title, episode, season, transcriptPageUrl, url, start, text}))).flat()
console.log(docs)
idx = lunr(function(){
this.ref('id')
this.ref('index')
this.field('text')
this.metadataWhitelist = ['position']
docs.forEach(function (doc, idx) {
doc.id = idx;
doc.index = idx;
this.add(doc);
}, this)
})
@@ -44,14 +44,15 @@ override:eleventyComputed: []
results.forEach(r => {
r.title = docs[r.ref].title,
r.url = docs[r.ref].url
r.id = docs[r.ref].id
})
console.log('Form submitted!', results)
console.log('Form submitted!', searchQuery, results)
const results_ol = document.getElementById("results").querySelector('ol')
results_ol.innerHTML = ""
results.forEach(r => {
const el = document.createElement('li')
const {url, title, text, season, episode, startTime} = docs[r.ref]
el.innerHTML = `<a href="${url}?startAt=${startTime}">${title} (Season ${season}, episode ${episode})</a><p>${Duration.fromObject({seconds:startTime}).toFormat("hh:mm:ss")}</p><p>${text}</p>`
const {segment_id, url, title, text, season, episode, start, transcriptPageUrl} = docs[r.ref]
el.innerHTML = `<a href="${transcriptPageUrl}#${segment_id}">${title} (Season ${season}, episode ${episode})</a><p>${Duration.fromObject({seconds:start/1000}).toFormat("hh:mm:ss")}</p><p>${text}</p>`
results_ol.appendChild(el)
})
}

View File

@@ -8,23 +8,20 @@ class SearchIndex {
}
async render (data) {
const episodesWithTranscript = data.collections.episode.filter(e=>e.data.podcast?.transcriptUrl)
const promises = episodesWithTranscript.map((episode)=>{
const {transcriptUrl} = episode.data.podcast
return Fetch(transcriptUrl, {type:'text', duration: "1d"})
.then(srt_buffer=>parseText(srt_buffer.toString(), {type:'srt'}))
.then(({cues})=>cues)
.then((cues)=>({
name: path.basename(transcriptUrl,".srt"),
const episodesWithTranscript = data.collections.episode.filter(e=>e.data.podcast?.transcriptPage)
const result = episodesWithTranscript.map((episode)=>{
const transcriptPage = data.collections.transcript.find(t=>t.data.episode==episode.data.episode && t.data.season == episode.data.season)
if (!transcriptPage) {return null}
return {
transcriptPageUrl: this.url(transcriptPage.url),
episode: episode.data.episode,
season: episode.data.season,
title: episode.data.title,
url: `${this.url(episode.url)}`,
cues: cues.map(({id, startTime, text})=>({id,startTime,text}))
}))
segments: transcriptPage.data.segments
}
})
const result = await Promise.all(promises)
return JSON.stringify(result)
return JSON.stringify(result.filter(r=>r))
}
}

View File

@@ -0,0 +1,9 @@
const {extractSeasonEpisode, episodeNumber} = require('../../utils/filters');
module.exports = {
"tags":["transcript"],
"eleventyComputed": {
"episode": (data) => extractSeasonEpisode(data.page.fileSlug).episode,
"season": (data) => extractSeasonEpisode(data.page.fileSlug).season || 1,
}
}

View File

@@ -1,3 +0,0 @@
{
"tags":["transcript"]
}