feat: improve Markdown support and add transcript search functionality

- Add `.cache` to `.gitignore` for temporary build files.
- Introduce Markdown snippets for creating Season 1 and Season 5 show notes in `.vscode/markdown.code-snippets`.
- Update `.vscode/settings.json`:
  - Exclude `node_modules` directory.
  - Adjust quick suggestions to disable them for comments, strings, and others.
- Remove unnecessary `console.log` statements from `episodes.11tydata.js`.
- Add an image file `content/episodes/image.jpg`.
- Implement a transcript search feature:
  - Add `search-transcripts.hbs` to enable searching transcript cues with time markers.
  - Add `transcript-index.11ty.js` to generate a searchable transcript index.
- Update `search-index.11ty.js` to skip processing `<hr>` and `<img>` tags.
- Enhance episode layout with `startAt` query parameter to allow audio playback from a specific time.
- Add a new dependency:
  - `@11ty/eleventy-fetch` for fetching transcripts.
  - `media-captions` for parsing and handling transcript files.
- Update package-lock.json and package.json to include new dependencies.
This commit is contained in:
2024-12-24 12:07:32 -06:00
parent 4350690071
commit 27eb1e634c
13 changed files with 345 additions and 13 deletions

View File

@@ -39,7 +39,6 @@ async function podcastData (data) {
resolve(data.podcast.enclosureUrl)
} else {
const url = `${data.site.cdn}/${file_stem}.mp3`
console.log(`Inferring URL @ ${url} for ${data.page.url}`)
fetch(url, { method: "HEAD" })
.then((res)=>{
if (res.ok) {
@@ -56,7 +55,6 @@ async function podcastData (data) {
resolve(data.podcast.transcriptUrl)
} else {
const url = `${data.site.cdn}/${file_stem}.srt`
console.log(`Inferring URL @ ${url}`)
fetch(url, { method: "HEAD" })
.then((res)=>{
if (res.ok) {

BIN
content/episodes/image.jpg Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 498 KiB

View File

@@ -2,7 +2,6 @@
title: I'm A Little Mindfreak
date: 2024-12-16
---
{% prologue %}
Mindflayer Monday
===

View File

@@ -1,6 +1,4 @@
import markdownit from 'markdown-it';
import { convert } from 'html-to-text';
const md = markdownit({html: true})
const { convert } = require('html-to-text');
class SearchIndex {
data() {
@@ -12,7 +10,12 @@ class SearchIndex {
return {
url:`${this.url(episode.url)}`,
title: episode.data.title,
text: convert (episode.content),
text: convert (episode.content, {
selectors: [
{ selector: 'hr', format: 'skip' },
{ selector: 'img', format: 'skip'}
]
}),
season: episode.data.season,
episode: episode.data.episode
}})
@@ -20,4 +23,4 @@ class SearchIndex {
}
}
export default SearchIndex
module.exports = SearchIndex

View File

@@ -0,0 +1,60 @@
---
layout: base-with-heading
title: Search Transcripts
eleventyExcludeFromCollections: ["episode"]
override:tags: []
override:eleventyComputed: []
---
<script src="https://unpkg.com/lunr/lunr.js"></script>
<div>
<form id="search-form">
<input type="text" class="form-control mb-3" name="searchQuery" id="searchQuery">
<button class="btn btn-primary mb-3" type="submit">Search</button>
</form>
<div id="results">
<ol></ol>
</div>
</div>
<script type="module">
import {Duration} from 'https://cdn.jsdelivr.net/npm/luxon@3.5.0/+esm' ;
let idx, docs
let search_index_promise = fetch('../transcript-index')
.then((res)=>res.json())
.then((documents)=>{
docs = documents.map(({title, episode, season, url, cues})=>cues.map(({startTime, text})=>({title, episode, season, url, startTime, text}))).flat()
console.log(documents)
idx = lunr(function(){
this.ref('id')
this.field('text')
this.metadataWhitelist = ['position']
docs.forEach(function (doc, idx) {
doc.id = idx;
this.add(doc);
}, this)
})
})
function handleSubmit(evt) {
evt.preventDefault();
const formData = new FormData(evt.target)
const {searchQuery} = Object.fromEntries(formData)
const results = idx.search(searchQuery)
results.forEach(r => {
r.title = docs[r.ref].title,
r.url = docs[r.ref].url
})
console.log('Form submitted!', results)
const results_ol = document.getElementById("results").querySelector('ol')
results_ol.innerHTML = ""
results.forEach(r => {
const el = document.createElement('li')
const {url, title, text, season, episode, startTime} = docs[r.ref]
el.innerHTML = `<a href="${url}?startAt=${startTime}">${title} (Season ${season}, episode ${episode})</a><p>${Duration.fromObject({seconds:startTime}).toFormat("hh:mm:ss")}</p><p>${text}</p>`
results_ol.appendChild(el)
})
}
document.getElementById('search-form').addEventListener('submit', handleSubmit)
</script>

View File

@@ -0,0 +1,31 @@
import {parseText} from 'media-captions';
import Fetch from "@11ty/eleventy-fetch";
import path from 'path';
class SearchIndex {
data() {
return {eleventyExcludeFromCollections:["episode"], layout: null}
}
async render (data) {
const episodesWithTranscript = data.collections.episode.filter(e=>e.data.podcast?.transcriptUrl)
const promises = episodesWithTranscript.map((episode)=>{
const {transcriptUrl} = episode.data.podcast
return Fetch(transcriptUrl, {type:'text', duration: "1d"})
.then(srt_buffer=>parseText(srt_buffer.toString(), {type:'srt'}))
.then(({cues})=>cues)
.then((cues)=>({
name: path.basename(transcriptUrl,".srt"),
episode: episode.data.episode,
season: episode.data.season,
title: episode.data.title,
url: `${this.url(episode.url)}`,
cues: cues.map(({id, startTime, text})=>({id,startTime,text}))
}))
})
const result = await Promise.all(promises)
return JSON.stringify(result)
}
}
export default SearchIndex

View File

@@ -8,6 +8,9 @@ links:
- name: Discord
url: discord
iconClasses: bi bi-discord
- name: Search Episodes (Beta)
url: /episodes/search
iconClasses: bi bi-binoculars
- name: GM Tools
url: gm-tools
iconClasses: bi bi-journal-bookmark