Skip to content

Instantly share code, notes, and snippets.

@yuuki7
Last active November 28, 2025 04:19
  • Select an option

Select an option

Convert a GitHub Wiki to static HTML files
{
"indent_size": 2,
"end_with_newline": true,
"html": {
"indent_inner_html": true,
"preserve_newlines": false,
"extra_liners": [],
"js": {
"end_with_newline": false
}
}
}
##
# Constants for the main script
#
##
# Constants for the GitHub Wiki
#
# Path to the locally cloned wiki repository
WIKI_REPO = Pathname('../wikinder.wiki')
# URL of the wiki
WIKI_URL = URI('https://github.com/wikinder/wikinder/wiki/')
##
# Constants for the generated site
#
# Path to the output directory
OUTPUT_DIRECTORY = Pathname('./out')
# Path to the HTML template file
HTML_TEMPLATE_FILE = Pathname('./template.html.liquid')
# Title of the site
SITE_NAME = 'Wikinder'
# URL of the site
SITE_URL = URI('https://wikinder.org/')
# Base path for internal links
BASE_PATH = Pathname(SITE_URL.path)
# URL of the site logo
LOGO_URL = URI.join(SITE_URL, '/assets/images/icon.jpg')
# Path to the stylesheet file
STYLESHEET_FILE = Pathname('/assets/css/style.css')
# Path to the MathJax configuration script
MATHJAX_CONFIG_SCRIPT = Pathname('/assets/js/mathjax-config.js')
# Article date format for display
DATE_FORMAT = '%B %-d, %-Y'
require 'cgi'
require 'pathname'
require 'uri'
require 'commonmarker'
require 'gollum-lib'
require 'liquid'
require 'nokogiri'
# Load constants
require_relative './config'
# Load Gollum configuration
require_relative './gollum-config'
# Load methods
require_relative './utils'
# Load the HTML template
html_template = Liquid::Template.parse(HTML_TEMPLATE_FILE.read, error_mode: :strict)
# Load the wiki
wiki = Gollum::Wiki.new(WIKI_REPO.to_s, GOLLUM_OPTIONS)
home_page = wiki.page('Home')
page_footer_html = home_page.footer.formatted_data
# Pages to list on the home page and sitemap
all_pages = []
# Generate individual article pages and add them to the list
wiki.pages.each do |page|
# Skip non-Markdown pages
next unless page.format == :markdown
slug = page.filename_stripped
# Skip Home and special pages
next if slug =~ /^(?:Home|LICENSE|README)$/
article_title = slug.tr('-', ' ')
encoded_slug = URI.encode_uri_component(slug)
# URL of the page on the generated site
canonical_url = URI.join(SITE_URL, encoded_slug)
# URL of the page on the wiki
wiki_page_url = URI.join(WIKI_URL, encoded_slug)
# Get the first commit of the page (following renames)
first_commit = page.versions({
follow: true,
per_page: 10000,
}).last
last_commit = page.last_version
is_modified = last_commit.id != first_commit.id
# Published date in UTC
published_date = first_commit.authored_date.getutc
published_date_iso = published_date.iso8601
published_date_display = published_date.strftime(DATE_FORMAT)
# Last modified date in UTC
modified_date = last_commit.authored_date.getutc
modified_date_iso = modified_date.iso8601
modified_date_display = modified_date.strftime(DATE_FORMAT)
author_name = first_commit.author.name
# Generate the HTML file
generate_html_file("#{slug}.html", page.formatted_data, html_template, {
'is_home' => false,
'canonical_url' => canonical_url.to_s,
'wiki_page_url' => wiki_page_url.to_s,
'article_title' => article_title,
'page_footer' => page_footer_html,
'is_modified' => is_modified,
'published_date_display' => published_date_display,
'published_date_iso' => published_date_iso,
'modified_date_display' => modified_date_display,
'modified_date_iso' => modified_date_iso,
'author_name' => author_name,
})
# Add the page to the list
all_pages << {
encoded_slug: encoded_slug,
canonical_url: canonical_url,
title: article_title,
escaped_title: CGI.escapeHTML(article_title),
published_date: published_date,
modified_date: modified_date,
modified_date_iso: modified_date_iso,
}
end
# Generate the home page
generate_html_file('index.html', home_page.formatted_data, html_template, {
'is_home' => true,
'canonical_url' => SITE_URL.to_s,
'wiki_page_url' => WIKI_URL.to_s.delete_suffix('/'), # Remove the trailing slash
'article_title' => SITE_NAME,
'page_footer' => page_footer_html,
# Sort pages by published date (newest first)
'all_pages' => all_pages
.reject { |page| page[:title].start_with?(SITE_NAME) } # Exclude About pages
.sort_by { |page| page[:published_date] }
.reverse
.map { |page| page.transform_keys(&:to_s) }, # Stringify keys because Liquid doesn't support symbols
})
# Generate the sitemap sorted by modified date (newest first)
sitemap_urls_xml = all_pages
.sort_by { |page| page[:modified_date] }
.reverse
.map { |page|
<<~XML % page
<url>
<loc>%{canonical_url}</loc>
<lastmod>%{modified_date_iso}</lastmod>
</url>
XML
}
.join('')
sitemap_xml = <<~XML
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>#{SITE_URL}</loc>
</url>
#{sitemap_urls_xml}
</urlset>
XML
sitemap_file = OUTPUT_DIRECTORY.join('sitemap.xml')
sitemap_file.write(sitemap_xml)
#!/bin/bash
# Exit on error
set -e
ruby ./github-wiki-to-html.rb
find . -name '*.html' -type f -print0 | xargs -0 html-beautify --replace --quiet
find . -name '*.xml' -type f -print0 | xargs -0 html-beautify --replace --quiet
##
# Configuration for Gollum
#
# Options for the Gollum::Wiki constructor
GOLLUM_OPTIONS = {
# Base path for internal links
base_path: BASE_PATH.to_s,
# Convert spaces to hyphens in internal links
hyphened_tag_lookup: true,
# Do not add class="editable" to section headings
allow_editing: false,
# Keep Gollum's filter chain minimal
# :Tags - Convert internal links to standard Markdown links
# :Render - Render Markdown to HTML
filter_chain: [:Tags, :Render],
}
# Use Commonmarker as the Markdown renderer
GitHub::Markup::Markdown::MARKDOWN_GEMS.clear
GitHub::Markup::Markdown::MARKDOWN_GEMS['commonmarker'] = proc do |markdown|
::Commonmarker.to_html(markdown, options: {
render: {
# Allow raw HTML tags to support <details> tags etc.
unsafe: true,
},
extension: {
# Remove blacklisted HTML tags (GFM)
tagfilter: true,
# Enable footnote syntax (GFM)
footnotes: true,
},
})
end
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="color-scheme" content="light dark">
<meta name="format-detection" content="telephone=no">
<!-- OG meta tags -->
{% if is_home %}
<meta property="og:type" content="website">
{% else %}
<meta property="og:type" content="article">
{% endif %}
<meta property="og:title" content="{{ article_title }}">
<meta property="og:url" content="{{ canonical_url }}">
<meta property="og:image" content="{{ logo_url }}">
<!-- /OG meta tags -->
<!-- Twitter Card meta tags -->
<meta name="twitter:card" content="summary">
<!-- /Twitter Card meta tags -->
<title>{{ article_title }}</title>
<link rel="canonical" href="{{ canonical_url }}">
<link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/">
<link rel="stylesheet" href="{{ stylesheet_file }}">
<!-- MathJax -->
<script defer src="{{ mathjax_config_script }}"></script>
<script
defer
src="https://cdn.jsdelivr.net/npm/mathjax@4.0.0/startup.js"
integrity="sha384-V8Uc+jzQMe7n4tFx1oAuCOiBj0WFbXurxmgghjXXzYHRKbpk8D/aCD16BkdE/MDh"
crossorigin="anonymous"
></script>
<!-- /MathJax -->
<!-- Structured data -->
{% if is_home %}
<script type="application/ld+json">
{
"@context": "https://schema.org/",
"@type": "WebSite",
"name": "{{ site_name }}",
"url": "{{ canonical_url }}",
"publisher": {
"@type": "Organization",
"name": "{{ site_name }}",
"url": "{{ site_url }}",
"logo": "{{ logo_url }}"
}
}
</script>
{% else %}
<script type="application/ld+json">
{
"@context": "https://schema.org/",
"@type": "Article",
"headline": "{{ article_title }}",
"mainEntityOfPage": {
"@type": "WebPage",
"@id": "{{ canonical_url }}"
},
"image": "{{ logo_url }}",
"datePublished": "{{ published_date_iso }}",
"dateModified": "{{ modified_date_iso }}",
"author": {
"@type": "Person",
"name": "{{ author_name }}"
},
"publisher": {
"@type": "Organization",
"name": "{{ site_name }}",
"url": "{{ site_url }}",
"logo": "{{ logo_url }}"
}
}
</script>
{% endif %}
<!-- /Structured data -->
</head>
<body>
<!-- Page header -->
<header>
<nav>
<p>
{% if is_home %}
{% for page in all_pages %}
<a href="{{ page.encoded_slug }}">{{ page.escaped_title }}</a>
{% unless forloop.last %}
·
{% endunless %}
{% endfor %}
{% else %}
<a href="{{ base_path }}">{{ site_name }}</a>
{% endif %}
</p>
</nav>
</header>
<!-- /Page header -->
<!-- Page body -->
<main>
<article>
<!-- Article header -->
<header>
<h1>{{ article_title }}</h1>
{% unless is_home %}
<p>
<time datetime="{{ published_date_iso }}">{{ published_date_display }}</time>
{% if is_modified %}
· 🕒
<time datetime="{{ modified_date_iso }}">{{ modified_date_display }}</time>
{% endif %}
</p>
{% endunless %}
</header>
<!-- /Article header -->
<!-- Article body -->
<div>
{{ article_body }}
</div>
<!-- /Article body -->
</article>
</main>
<!-- /Page body -->
<!-- Page footer -->
<footer>
{{ page_footer }}
<p>
<a href="{{ wiki_page_url }}">Edit this page</a>
</p>
</footer>
<!-- /Page footer -->
</body>
</html>
##
# Methods for the main script
#
# Tweak HTML converted from Markdown
def postprocess_html(html)
dom = Nokogiri::HTML5.fragment(html)
# Handle links with class="internal"
dom.css('a.internal').each do |a|
uri = URI(a['href'])
path = Pathname(uri.path)
# Strip the extension
path = path.sub_ext('')
# Make the path relative
path = path.relative_path_from(BASE_PATH)
uri.path = path.to_s
a['href'] = uri.to_s
end
dom.to_html
end
# Generate an HTML file
def generate_html_file(output_filename, article_body_html, html_template, options)
article_body_html = postprocess_html(article_body_html)
# Render full HTML
full_html = html_template.render!({
'site_url' => SITE_URL.to_s,
'site_name' => SITE_NAME,
'base_path' => BASE_PATH.to_s,
'logo_url' => LOGO_URL.to_s,
'stylesheet_file' => STYLESHEET_FILE.to_s,
'mathjax_config_script' => MATHJAX_CONFIG_SCRIPT.to_s,
'article_body' => article_body_html,
**options
}, {
strict_variables: true,
strict_filters: true,
})
# Write HTML to a file
output_file = OUTPUT_DIRECTORY.join(output_filename)
output_file.write(full_html)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment