implement some feautres in transcript search

This commit is contained in:
2024-12-26 13:37:33 -06:00
parent f705cca038
commit 0dcb7634ae
8 changed files with 101 additions and 30 deletions

View File

@@ -30,10 +30,10 @@ module.exports = function(eleventyConfig) {
{compile: (inputContent, inputPath)=>{
const parsed = parseSync(inputContent.toString(), {type:'srt'})
return async (data) => {
data.transcript = parsed
data.segments = parsed.map((i,idx)=>({id:idx, ...i.data}))
data.layout = "transcript"
// data.tags = ['transcript']
return JSON.stringify(parsed)
return JSON.stringify(parsed.map((i,idx)=>({id:idx, ...i.data})))
}
}})

View File

@@ -29,9 +29,9 @@ module.exports = {
async function podcastData (data) {
var file_stem
if (data.season === 1) {
if (data.season == 1) {
file_stem = `ep${data.episode}`
} if (data.season === 2) {
} else if (data.season == 2) {
file_stem = `${seasonEpisodeFormat(null, {...data, episodePrefix: "ep"}).toLowerCase()}`
} else {
file_stem = `${seasonEpisodeFormat(null, data).toLowerCase()}`

View File

@@ -23,14 +23,14 @@ override:eleventyComputed: []
let search_index_promise = fetch('../transcript-index')
.then((res)=>res.json())
.then((documents)=>{
docs = documents.map(({title, episode, season, url, cues})=>cues.map(({startTime, text})=>({title, episode, season, url, startTime, text}))).flat()
console.log(documents)
docs = documents.map(({title, episode, season, url, transcriptPageUrl, segments})=>segments.map(({start, text, id})=>({segment_id:id, title, episode, season, transcriptPageUrl, url, start, text}))).flat()
console.log(docs)
idx = lunr(function(){
this.ref('id')
this.ref('index')
this.field('text')
this.metadataWhitelist = ['position']
docs.forEach(function (doc, idx) {
doc.id = idx;
doc.index = idx;
this.add(doc);
}, this)
})
@@ -44,14 +44,15 @@ override:eleventyComputed: []
results.forEach(r => {
r.title = docs[r.ref].title,
r.url = docs[r.ref].url
r.id = docs[r.ref].id
})
console.log('Form submitted!', results)
console.log('Form submitted!', searchQuery, results)
const results_ol = document.getElementById("results").querySelector('ol')
results_ol.innerHTML = ""
results.forEach(r => {
const el = document.createElement('li')
const {url, title, text, season, episode, startTime} = docs[r.ref]
el.innerHTML = `<a href="${url}?startAt=${startTime}">${title} (Season ${season}, episode ${episode})</a><p>${Duration.fromObject({seconds:startTime}).toFormat("hh:mm:ss")}</p><p>${text}</p>`
const {segment_id, url, title, text, season, episode, start, transcriptPageUrl} = docs[r.ref]
el.innerHTML = `<a href="${transcriptPageUrl}#${segment_id}">${title} (Season ${season}, episode ${episode})</a><p>${Duration.fromObject({seconds:start/1000}).toFormat("hh:mm:ss")}</p><p>${text}</p>`
results_ol.appendChild(el)
})
}

View File

@@ -8,23 +8,20 @@ class SearchIndex {
}
async render (data) {
const episodesWithTranscript = data.collections.episode.filter(e=>e.data.podcast?.transcriptUrl)
const promises = episodesWithTranscript.map((episode)=>{
const {transcriptUrl} = episode.data.podcast
return Fetch(transcriptUrl, {type:'text', duration: "1d"})
.then(srt_buffer=>parseText(srt_buffer.toString(), {type:'srt'}))
.then(({cues})=>cues)
.then((cues)=>({
name: path.basename(transcriptUrl,".srt"),
const episodesWithTranscript = data.collections.episode.filter(e=>e.data.podcast?.transcriptPage)
const result = episodesWithTranscript.map((episode)=>{
const transcriptPage = data.collections.transcript.find(t=>t.data.episode==episode.data.episode && t.data.season == episode.data.season)
if (!transcriptPage) {return null}
return {
transcriptPageUrl: this.url(transcriptPage.url),
episode: episode.data.episode,
season: episode.data.season,
title: episode.data.title,
url: `${this.url(episode.url)}`,
cues: cues.map(({id, startTime, text})=>({id,startTime,text}))
}))
segments: transcriptPage.data.segments
}
})
const result = await Promise.all(promises)
return JSON.stringify(result)
return JSON.stringify(result.filter(r=>r))
}
}

View File

@@ -0,0 +1,9 @@
const {extractSeasonEpisode, episodeNumber} = require('../../utils/filters');
module.exports = {
"tags":["transcript"],
"eleventyComputed": {
"episode": (data) => extractSeasonEpisode(data.page.fileSlug).episode,
"season": (data) => extractSeasonEpisode(data.page.fileSlug).season || 1,
}
}

View File

@@ -1,3 +0,0 @@
{
"tags":["transcript"]
}

View File

@@ -2,10 +2,60 @@
layout: base
---
<script>
const transcript = {{{content}}}
</script>
{{#each transcript}}
<div>
{{formatDuration this.data.start "hh:mm:ss"}}: {{this.data.text}}
<script>
function findPos(obj) {
var curtop = 0;
if (obj.offsetParent) {
do {
curtop += obj.offsetTop;
} while (obj = obj.offsetParent);
return [curtop];
}
}
var vid
document.addEventListener('DOMContentLoaded', ()=>{
vid = document.getElementById('audio-player')
var startingOffset = window.location.hash.substring(1);
if(startingOffset) {
const el = document.getElementById(`segment${startingOffset}`)
window.scroll(0,findPos(el));
el.classList.add('fw-bolder', 'active')
vid.currentTime=el.dataset.timestart
}
var segmentsTimeList = Array.from(document.querySelectorAll('.segment')).map((s)=>({...s.dataset, id: s.id, element: s}))
vid.addEventListener('timeupdate', (event)=>{
const currentSegment = segmentsTimeList.find(s=>s.timestart<vid.currentTime && s.timeend>vid.currentTime)?.element
document.querySelectorAll('.segment').forEach(s=>s.classList.remove('fw-bolder', 'active'))
if (currentSegment){currentSegment.classList.add('fw-bolder','active')}
console.log(event)
})
})
</script>
{{#with (getEpisodeData collections.episode season episode) as |episode_data|}}
<h1>{{#if episode_data.title}}{{{episode_data.title}}}{{else}}Episode {{episode_data.episode}}{{/if}}</h1>
Season {{episode_data.season}}, Episode {{episode_data.episode}} <br>
<date>{{formatDate episode_data.date "MMMM d, yyyy"}}</date><br>
<hr>
{{#if episode_data.podcast}}
{{#if episode_data.podcast.enclosureUrl}}
<audio controls id="audio-player" >
<source src="{{{episode_data.podcast.enclosureUrl}}}" type="audio/mpeg">
Your browser does not support the audio element.
</audio>
{{/if}}
{{/if}}
{{/with}}
{{#each segments as |segment|}}
<div class="segment", id="segment{{segment.id}}" data-segment-id="{{segment.id}}" data-timestart="{{divide segment.start 1000}}" data-timeend="{{divide segment.end 1000}}" >
<p id="{{segment.id}}">
<div class="btn-group">
<span class="time btn btn-sm btn-outline-dark p-0 px-1 disabled">{{formatDuration segment.start "hh:mm:ss"}}</span>
<a class="btn btn-sm btn-outline-dark p-0" onclick="vid.pause()"><i class="bi bi-pause-fill"></i></a>
<a class="btn btn-sm btn-outline-dark p-0 " onclick="vid.currentTime={{divide segment.start 1000}};vid.play()"><i class="bi bi-play-fill"></i></a>
</div>
<span>{{segment.text}}</span>
</p>
</div>
{{/each}}

View File

@@ -66,5 +66,22 @@ module.exports = {
},
episodeNumber: (s, episode) => {
return episode ? Number(episode) : Number(s.replace(/[^0-9]/,''))
},
extractSeasonEpisode: (input, property) => {
const regex = /(?:[Ss](?<season>\d{1,2}))?(?:[eE][Pp]?)?(?<episode>\d{1,2})/;
const match = input.match(regex);
if (match) {
const season = match.groups?.season ? parseInt(match.groups.season, 10) : null;
const episode = match.groups?.episode ? parseInt(match.groups.episode, 10) : null;
const result = { season, episode };
return property ? result[property] : result; // Return specific property if requested
}
return property ? null : { season: null, episode: null }; // Return null or full object
},
getEpisodeData: (episodes, season, episode) => {
return episodes.find((e=>e.data.episode==episode && e.data.season==season))?.data
}
}