my eye

Bootstrap

Committed b82773

index 0000000..f9f0d23
--- /dev/null

+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.poetry]
+name = "webint-cache"
+version = "0.0.1"
+description = "manage resource caching on your website"
+keywords = ["micropub"]
+homepage = "https://ragt.ag/code/projects/webint-cache"
+repository = "https://ragt.ag/code/projects/webint-cache.git"
+documentation = "https://ragt.ag/code/projects/webint-cache/api"
+authors = ["Angelo Gladding <angelo@ragt.ag>"]
+license = "AGPL-3.0-or-later"
+packages = [{include="webint_cache"}]
+
+[tool.pyright]
+reportGeneralTypeIssues = false
+reportOptionalMemberAccess = false
+
+[tool.poetry.plugins."webapps"]
+cache = "webint_cache:app"
+
+[[tool.poetry.source]]
+name = "main"
+url = "https://ragt.ag/code/pypi"
+
+[tool.poetry.dependencies]
+python = ">=3.10,<3.11"
+webint = ">=0.0"
+svglib = "^1.5.1"
+python-whois = "^0.8.0"
+feedparser = "^6.0.11"
+phonenumbers = "^8.13.27"
+
+[tool.poetry.group.dev.dependencies]
+gmpg = {path="../gmpg", develop=true}
+bgq = {path="../bgq", develop=true}
+newmath = {path="../newmath", develop=true}
+sqlyte = {path="../sqlyte", develop=true}
+webagt = {path="../webagt", develop=true}
+webint = {path="../webint", develop=true}

index 0000000..4f4711a
--- /dev/null

+""""""
+
+import collections
+import hashlib
+import logging
+import os
+import pathlib
+import subprocess
+import time
+
+import PIL
+import requests
+import web
+import webagt
+import whois
+from reportlab.graphics import renderPM
+from svglib.svglib import svg2rlg
+from web import tx
+
+from .silos import silos
+
+logging.basicConfig(level=logging.DEBUG, filename="crawl.log", filemode="w", force=True)
+
+app = web.application(
+    __name__,
+    prefix="cache",
+    args={
+        "site": r"[a-z\d.-]+\.[a-z]+",
+        "page": r".*",
+    },
+    model={
+        "cache": {
+            "url": "TEXT UNIQUE NOT NULL",
+            "crawled": "DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP",
+            "details": "JSON NOT NULL",
+        },
+        "cache_redirects": {
+            "incoming": "TEXT UNIQUE NOT NULL",
+            "outgoing": "TEXT NOT NULL",
+        },
+    },
+)
+
+sites_path = pathlib.Path("sites")
+sites_path.mkdir(exist_ok=True)
+
+agent = webagt.Agent("webint-cache")
+blocklist = ["accounts.google.com"]
+ignored_rels = [
+    "author",
+    "bookmark",
+    "canonical",
+    "category",
+    "contents",
+    "home",
+    "nofollow",
+    "noreferrer",
+    "noopener",
+    "pingback",
+    "profile",
+    "shortcut",
+    "shortlink",
+    "syndication",
+    "tag",
+    "ugc",
+]
+social_network_rels = ["acquaintance", "colleague", "friend", "met"]
+
+# def refresh_page(url):
+#     try:
+#         response = agent.get(domain)
+#     except (requests.ConnectionError, requests.Timeout) as err:
+#         return {"status": "not responding", "error": str(err)}
+#     try:
+#         tx.db.insert(
+#             "cache",
+#             url=url,
+#             details={
+#                 "metaverse":
+#                   hashlib.sha256(domain.encode("utf-8")).hexdigest().upper(),
+#                 "domain": {
+#                     "name": domain,
+#                     "suffix": domain_details.suffix,
+#                     "hsts": domain_details.in_hsts,
+#                 },
+#             },
+#         )
+#         web.enqueue(query_whois, domain)
+#     except tx.db.IntegrityError:
+#         pass
+#     return
+
+
+def refresh_site(domain):
+    """Fetch `domain` and store site details and related media."""
+    if domain in blocklist or not webagt.uri(domain).suffix:
+        logging.debug(f"skipping {domain}")
+        return
+    # TODO logging.debug("getting previous details..")  # for etag
+    start = time.time()
+    logging.debug("downloading HTML..")
+    try:
+        response = agent.get(domain)
+    except (requests.ConnectionError, requests.Timeout) as err:
+        return {"status": "not responding", "error": str(err)}
+    if domain != response.url.host:
+        try:
+            tx.db.insert("cache_redirects", incoming=domain, outgoing=response.url.host)
+        except tx.db.IntegrityError:
+            tx.db.update(
+                "cache_redirects",
+                outgoing=response.url.host,
+                where="incoming = ?",
+                vals=[domain],
+            )
+        refresh_site(response.url.host)
+        return
+    domain_details = webagt.uri(domain)
+    try:
+        tx.db.insert(
+            "cache",
+            url=domain,
+            details={
+                "metaverse": hashlib.sha256(domain.encode("utf-8")).hexdigest().upper(),
+                "domain": {
+                    "name": domain,
+                    "suffix": domain_details.suffix,
+                    "hsts": domain_details.in_hsts,
+                },
+            },
+        )
+        web.enqueue(query_whois, domain)
+    except tx.db.IntegrityError:
+        pass
+    site_path = sites_path / domain
+    site_path.mkdir(parents=True, exist_ok=True)
+
+    web.enqueue(run_lighthouse, domain)
+    web.enqueue(run_pa11y, domain)
+
+    update_details = get_updater(domain)
+    update_details(
+        accessed=web.now().to_iso8601_string(),
+        response={
+            "status": response.status,
+            "time": time.time() - start,
+            "headers": dict(response.headers),
+            "length": round(len(response.text) / 1000),
+        },
+    )
+    logging.debug("parsing Microformats..")
+    mf2json = response.mf2json
+    rels = dict(mf2json["rels"])
+
+    if authorization_endpoint := rels.pop("authorization_endpoint", None):
+        indieauth_details = {"authorization_endpoint": authorization_endpoint}
+        if token_endpoint := rels.pop("token_endpoint", None):
+            indieauth_details["token_endpoint"] = token_endpoint
+        update_details(indieauth=indieauth_details)
+    if indieauth_metadata_endpoint := rels.pop("indieauth-metadata", None):
+        web.enqueue(get_indieauth_metadata, domain, indieauth_metadata_endpoint[0])
+
+    if search := rels.pop("search", None):
+        web.enqueue(get_search_description, domain, search[0])
+
+    if manifest := rels.pop("manifest", None):
+        web.enqueue(get_manifest, domain, manifest[0])
+
+    if hub_endpoint := rels.pop("hub", None):
+        web.enqueue(
+            get_websub_hub, domain, hub_endpoint[0], rels.pop("self", [domain])[0]
+        )
+
+    web.enqueue(get_activitypub, domain)
+
+    card = response.card
+    update_details(mf2json=mf2json, card=card, rels=rels)
+    photo_url = rels.pop("apple-touch-icon", None)
+    card_type = None
+    if card:
+        card_type = "person"
+        if card_org := card.get("org"):
+            if card["name"][0] == card_org[0]:
+                card_type = "organization"
+        if emails := card.get("email"):
+            gravatars = {}
+            for email in emails:
+                email = email.removeprefix("mailto:")
+                gravatars[email] = hashlib.md5(
+                    email.strip().lower().encode("utf-8")
+                ).hexdigest()
+            # TODO SET `gravatars`
+        if photo_urls := card.get("photo"):  # TODO move to on-demand like icon?
+            if isinstance(photo_urls[0], dict):
+                photo_url = photo_urls[0]["value"]
+            else:
+                photo_url = photo_urls[0]
+    try:
+        icon_url = rels.pop("icon")[0]
+    except KeyError:
+        icon_url = f"{domain}/favicon.ico"
+    web.enqueue(get_media, domain, photo_url, icon_url)
+
+    scripts = []
+    for script in response.dom.select("script"):
+        script_details = dict(script.element.attrib)
+        script_details["content_length"] = len(script.text)
+        script_details["text"] = script.text
+        scripts.append(script_details)
+    stylesheets = rels.pop("stylesheet", [])
+    for stylesheet in response.dom.select("style"):
+        stylesheets.append(
+            {
+                "content_length": len(stylesheet.text),
+                "text": stylesheet.text,
+            }
+        )
+    whostyle = rels.pop("whostyle", None)
+    try:
+        title = response.dom.select("title")[0].text
+    except IndexError:
+        title = ""
+    update_details(
+        scripts=scripts, stylesheets=stylesheets, whostyle=whostyle, title=title
+    )
+
+    for ignored_rel in ignored_rels:
+        rels.pop(ignored_rel, None)
+    social_network = {}
+    for social_network_rel in social_network_rels:
+        if people_rels := rels.pop(social_network_rel, None):
+            social_network[social_network_rel] = people_rels
+    logging.debug("determining reciprocal rel=me..")
+    reciprocals = set()
+    rel_me_silos = []
+    for silo, silo_details in silos.items():
+        if len(silo_details) == 3:
+            rel_me_silos.append(silo_details[0])
+    rel_mes = rels.pop("me", [])
+    url = webagt.uri(domain)  # TODO XXX
+    for me_url in rel_mes:
+        if not me_url.startswith(("http", "https")):
+            continue
+        me_url = webagt.uri(me_url)
+        logging.debug(f"  rel=me {me_url}")
+        # XXX if (me_url.domain, me_url.suffix) == ("twitter", "com"):
+        # XXX     if "/" in me_url.path:
+        # XXX         continue
+        # XXX     twitter_id = me_url.path.split("/")[0]
+        # XXX     twitter_bearer = app.cfg.get("TWITTER")
+        # XXX     print(
+        # XXX         agent.get(
+        # XXX             f"https://api.twitter.com/2/users"
+        # XXX             f"/by/username/{twitter_id}?user.fields=url",
+        # XXX             headers={"Authorization": f"Bearer {twitter_bearer}"},
+        # XXX         ).json
+        # XXX     )
+        # XXX     twitter_profile = agent.get(
+        # XXX         f"https://api.twitter.com/2/users"
+        # XXX         f"/by/username/{twitter_id}?user.fields=url",
+        # XXX         headers={"Authorization": f"Bearer {twitter_bearer}"},
+        # XXX     ).json["data"]
+        # XXX     if twitter_profile_url := twitter_profile.get("url", None):
+        # XXX         try:
+        # XXX             recip_url = agent.get(twitter_profile_url).url
+        # XXX         except requests.Timeout:
+        # XXX             continue
+        # XXX         if recip_url == url:
+        # XXX             reciprocals.add(me_url.minimized)
+        if (me_url.subdomain, me_url.domain, me_url.suffix) == (
+            "en",
+            "wikipedia",
+            "org",
+        ):
+            wp_props = agent.get(me_url).mf2json["items"][0]["properties"]
+            if wp_url := wp_props.get("url"):
+                if wp_url[0] == url:
+                    reciprocals.add(me_url.minimized)
+        if me_url.host not in rel_me_silos:
+            continue
+        try:
+            reverse_rel_mes = agent.get(me_url).mf2json["rels"]["me"]
+        except KeyError:
+            continue
+        for reverse_rel_me in reverse_rel_mes:
+            if webagt.uri(reverse_rel_me).minimized == url.minimized:
+                reciprocals.add(me_url.minimized)
+    update_details(
+        social_network=social_network, reciprocals=list(reciprocals), rel_me=rel_mes
+    )
+
+    feed = response.feed
+    alt_feed_urls = set()
+    if not feed["items"]:
+        try:
+            alt_feed_urls = set(rels["home"]) & set(rels["alternate"])
+        except KeyError:
+            pass
+    alternate_reprs = rels.pop("alternate", [])
+    alternate_feeds = rels.pop("feed", [])
+    if not feed["items"]:
+        for alt_feed_url in alternate_reprs + alternate_feeds:
+            try:
+                feed = agent.get(alt_feed_url).feed
+            except ValueError:  # XML feed
+                pass
+            finally:
+                print("using", alt_feed_url)
+    # rels.pop("alternate", None)
+    for entry in feed["items"]:
+        try:
+            published = entry["published"]
+            permalink = entry["url"]
+            entry.pop("published-str")
+        except KeyError:
+            continue
+        entry.pop("uid", None)
+        # TODO refresh_page(permalink)
+    update_details(feed=feed)
+
+    # logging.debug("archiving to WARC..")
+    # warc_file = site_path / "warc_output"
+    # subprocess.run(
+    #     [
+    #         "wget",
+    #         "-EHkpq",
+    #         site,
+    #         f"--warc-file={warc_file}",
+    #         "--no-warc-compression",
+    #         "--delete-after",
+    #     ]
+    # )
+
+    logging.debug("calculating IndieMark score..")
+    scores = [
+        [(3, None)] * 10,
+        [(3, None)] * 10,
+        [(3, None)] * 10,
+        [(3, None)] * 10,
+        [(3, None)] * 10,
+    ]
+
+    # L1 Identity
+    if card:
+        if "icon" in rels:
+            scores[0][0] = (0, "contact info and icon on home page")
+        else:
+            scores[0][0] = (1, "contact info but no icon on home page")
+    else:
+        scores[0][0] = (2, "no contact info on home page")
+
+    # L1 Authentication
+    if rel_mes:
+        scores[0][1] = (
+            1,
+            "<code>rel=me</code>s found but none for GitHub or Twitter",
+        )
+        for rel_me in rel_mes:
+            if rel_me.startswith(("https://github.com", "https://twitter.com/")):
+                scores[0][1] = (
+                    0,
+                    "<code>rel=me</code>s found for GitHub and/or Twitter",
+                )
+                break
+    else:
+        scores[0][1] = (2, "no <code>rel=me</code>s found")
+
+    # L1 Posts
+    if feed["items"]:
+        if len(feed["items"]) > 1:
+            scores[0][2] = (0, "more than one post")
+        else:
+            scores[0][2] = (1, "only one post")
+    else:
+        scores[0][2] = (2, "no posts")
+
+    # L1 Search
+    # XXX if details["ddg"]:
+    # XXX     scores[0][6] = (0, "your content was found on DuckDuckgo")
+    # XXX else:
+    # XXX     scores[0][6] = (
+    # XXX         1,
+    # XXX         "your content was <strong>not</strong> found on DuckDuckgo",
+    # XXX     )
+
+    # L1 Interactivity
+    scores[0][8] = (0, "content is accessible (select/copy text/permalinks)")
+
+    # L2 Identity
+    scores[1][0] = (0, "you've linked to silo profiles")
+
+    # L3 'h-card contact info and icon on homepage'
+    # L3 'multiple post types'
+    # L3 'POSSE'
+    # L3 'Posting UI'
+    # L3 'Next/Previus Navigation between posts'
+    # L3 'Search box on your site'
+    # L3 'Embeds/aggregation'
+    # L3 'Web Actions'
+
+    # L4 'Send Webmentions'
+    # L4 'PubSubHubbub support'
+    # L4 'Display Search Results on your site'
+    # L4 'Display Reply Context'
+
+    # L5 'Automatic Webmentions'
+    # L5 'Handle Webmentions'
+    # L5 'Display full content rich reply-contexts'
+    # L5 'Search on your own search backend'
+    # L5 'Multiple Reply Types'
+    # L5 'Display Backfeed of Comments'
+
+    update_details(scores=scores)
+    # logging.debug("dumping details..")
+    # details["stored"] = web.now().to_iso8601_string()
+    web.dump(scores, path=site_path / "scores.json")
+    logging.debug("generating scoreboard..")
+    subprocess.run(["node", "../index.js", domain])
+
+
+def get_updater(url):
+    """Return an update function catered to `domain`."""
+
+    def update_details(**kwargs):
+        """Atomically update the resource's details with `kwargs`."""
+        keys = ", ".join([f"'$.{key}', json(?)" for key in kwargs.keys()])
+        tx.db.update(
+            "cache",
+            what=f"details = json_set(details, {keys})",
+            where="url = ?",
+            vals=[web.dump(v) for v in kwargs.values()] + [url],
+        )
+
+    return update_details
+
+
+def query_whois(domain):
+    """Update the creation date for the domain."""
+    logging.debug("querying WHOIS")
+    domain_created = whois.whois(domain)["creation_date"]
+    if isinstance(domain_created, list):
+        domain_created = domain_created[0]
+    try:
+        domain_created = domain_created.isoformat()
+    except AttributeError:
+        pass
+    get_updater(domain)(**{"domain.created": domain_created})
+
+
+def get_media(domain, photo_url, icon_url):
+    """Download the representative photo for the domain."""
+    site_path = sites_path / domain
+    if photo_url:
+        logging.debug("downloading representative photo..")
+        filename = photo_url.rpartition("/")[2]
+        suffix = filename.rpartition(".")[2]
+        if not suffix:
+            suffix = "jpg"
+        original = site_path / f"photo.{suffix}"
+        webagt.download(photo_url, original)
+        final = site_path / "photo.png"
+        if suffix != "png":
+            if suffix == "svg":
+                drawing = svg2rlg(original)
+                renderPM.drawToFile(drawing, final, fmt="PNG")
+            else:
+                try:
+                    image = PIL.Image.open(original)
+                except PIL.UnidentifiedImageError:
+                    pass
+                else:
+                    image.save(final)
+    logging.debug("downloading iconography..")
+    final = site_path / "icon.png"
+    filename = icon_url.rpartition("/")[2]
+    suffix = filename.rpartition(".")[2]
+    original = site_path / f"icon.{suffix}"
+    try:
+        download = webagt.download(icon_url, original)
+    except web.ConnectionError:
+        pass
+    else:
+        if download.status == 200 and suffix != "png":
+            try:
+                image = PIL.Image.open(original)
+            except PIL.UnidentifiedImageError:
+                pass
+            else:
+                image.save(final)
+
+
+def get_indieauth_metadata(domain, indieauth_metadata_endpoint):
+    """Download IndieAuth metadata for the domain."""
+    logging.debug("downloading IndieAuth metadata..")
+    metadata = agent.get(indieauth_metadata_endpoint).json
+    get_updater(domain)(**{"indieauth": {"metadata": metadata}})
+
+
+def get_search_description(domain, search_url):
+    """Download OpenSearch description document at `search_url`."""
+    logging.debug("downloading OpenSearch description..")
+    search_xml = agent.get(search_url).xml
+    search_url = webagt.uri(search_xml.find("Url", search_xml.nsmap).attrib["template"])
+    search_endpoint = f"//{search_url.host}/{search_url.path}"
+    name = None
+    for name, values in search_url.query.items():
+        if values[0] == "{template}":
+            break
+    get_updater(domain)(**{"search_url": [search_endpoint, name]})
+
+
+def get_manifest(domain, manifest_url):
+    """Download site manifest at `manifest_url`."""
+    logging.debug("downloading site manifest..")
+    # if "patches" in web.get(manifest_url).headers:
+    #     get_updater(domain)(**{"manifest": "hot"})
+    webagt.download(manifest_url, sites_path / domain / "manifest.json")
+
+
+def get_websub_hub(domain, endpoint, self):
+    """Subscribe to site via WebSub `endpoint`."""
+    # TODO subscribe if not already
+    logging.debug("subscribing to WebSub hub..")
+    get_updater(domain)(**{"hub": [endpoint, self]})
+
+
+def run_lighthouse(domain):
+    """Run lighthouse for the domain."""
+    logging.debug("running lighthouse..")
+    subprocess.Popen(
+        [
+            "lighthouse",
+            f"https://{domain}",
+            "--output=json",
+            f"--output-path={sites_path}/{domain}/audits.json",
+            "--only-audits=total-byte-weight",
+            '--chrome-flags="--headless"',
+            "--quiet",
+        ],
+        stdout=subprocess.PIPE,
+    ).stdout.read()
+
+
+def run_pa11y(domain):
+    """Run pa11y for the domain."""
+    site_path = sites_path / domain
+    logging.debug("running pa11y..")
+    web.dump(
+        web.load(
+            subprocess.Popen(
+                [
+                    "pa11y",
+                    domain,
+                    "--reporter",
+                    "json",
+                    "--screen-capture",
+                    site_path / "site.png",
+                ],
+                stdout=subprocess.PIPE,
+            ).stdout.read()
+        ),
+        path=site_path / "a11y.json",
+    )
+
+    found_icon = True  # TODO XXX
+    logging.debug("finding most used color, generating images..")
+    try:
+        screenshot = PIL.Image.open(site_path / "site.png")
+    except FileNotFoundError:
+        pass
+    else:
+        screenshot.crop((0, 0, 1280, 1024)).save(site_path / "screenshot.png")
+        colors = collections.Counter()
+        for x in range(screenshot.width):
+            for y in range(screenshot.height):
+                colors[screenshot.getpixel((x, y))] += 1
+        most_used_color = colors.most_common()[0][0]
+        icon = PIL.Image.new("RGB", (1, 1), color=most_used_color)
+        if not found_icon:
+            icon.save(site_path / "icon.png")
+        if not (site_path / "photo.png").exists():
+            icon.save(site_path / "photo.png")
+
+
+def get_activitypub(domain):
+    webfinger = agent.get(f"https://{domain}/.well-known/webfinger")
+
+
+@app.query
+def get_posts(db):
+    return []
+
+
+@app.query
+def get_people(db):
+    return {
+        url: details["card"]
+        for url, details in tx.db.select("cache", what="url, details", order="url ASC")
+    }
+
+
+@app.query
+def get_people_details(db):
+    return tx.db.select("people", order="url ASC")
+
+
+@app.query
+def get_categories(db):
+    categories = collections.Counter()
+    with db.transaction as cur:
+        for post in cur.cur.execute(
+            "select json_extract(cache.details, '$.category') "
+            "AS categories from cache"
+        ):
+            if not post["categories"]:
+                continue
+            if post_categories := web.load(post["categories"]):
+                for post_category in post_categories:
+                    categories[post_category] += 1
+    return categories
+
+
+@app.query
+def get_resources(db):
+    return db.select(
+        "cache",
+        where="crawled > ?",
+        vals=[web.now().subtract(days=7)],
+        order="crawled DESC",
+    )
+
+
+@app.control("")
+class Cache:
+    """All cached resources."""
+
+    def get(self):
+        """Return a list of all cached resources."""
+        return app.view.index()
+
+    def post(self):
+        address = web.form("address").address
+        details = get_resource(address)
+        raise web.SeeOther(f"/cache/{address}")
+
+        # TODO if no-flash-header or use form argument:
+        # TODO     raise web.SeeOther(); flash user's session with message to insert as CSS
+        # TODO elif flash-header:
+        # TODO     return just message as JSON
+        # TODO
+        # TODO raise web.flash("crawl enqueued")
+
+
+@app.control("resource")
+class PreviewResource:
+    """"""
+
+    def get(self):
+        url = web.form(url=None).url
+        web.header("Content-Type", "application/json")
+        if not url:
+            return {}
+        resource = web.get(url)
+        if resource.entry:
+            return resource.entry
+        if resource.event:
+            return resource.event
+        if resource.feed:
+            return resource.feed
+        return {}
+
+        # XXX data = cache.parse(url)
+        # XXX if "license" in data["data"]["rels"]:
+        # XXX     data["license"] = data["data"]["rels"]["license"][0]
+        # XXX try:
+        # XXX     edit_page = data["html"].cssselect("#ca-viewsource a")[0]
+        # XXX except IndexError:
+        # XXX     # h = html2text.HTML2Text()
+        # XXX     # try:
+        # XXX     #     data["content"] = h.handle(data["entry"]["content"]).strip()
+        # XXX     # except KeyError:
+        # XXX     #     pass
+        # XXX     try:
+        # XXX         markdown_input = ("html", data["entry"]["content"])
+        # XXX     except (KeyError, TypeError):
+        # XXX         markdown_input = None
+        # XXX else:
+        # XXX     edit_url = web.uri.parse(str(data["url"]))
+        # XXX     edit_url.path = edit_page.attrib["href"]
+        # XXX     edit_page = fromstring(requests.get(edit_url).text)
+        # XXX     data["mediawiki"] = edit_page.cssselect("#wpTextbox1")[0].value
+        # XXX     data["mediawiki"] = (
+        # XXX         data["mediawiki"].replace("{{", r"{!{").replace("}}", r"}!}")
+        # XXX     )
+        # XXX     markdown_input = ("mediawiki", data["mediawiki"])
+
+        # XXX if markdown_input:
+        # XXX     markdown = str(
+        # XXX         sh.pandoc(
+        # XXX         "-f", markdown_input[0], "-t", "markdown", _in=markdown_input[1]
+        # XXX         )
+        # XXX     )
+        # XXX     for n in range(1, 5):
+        # XXX         indent = "    " * n
+        # XXX         markdown = markdown.replace(f"\n{indent}-",
+        # XXX                                     f"\n{indent}\n{indent}-")
+        # XXX     markdown = re.sub(r'\[(\w+)\]\(\w+ "wikilink"\)', r"[[\1]]", markdown)
+        # XXX     markdown = markdown.replace("–", "--")
+        # XXX     markdown = markdown.replace("—", "---")
+        # XXX     data["content"] = markdown
+
+        # XXX data.pop("html")
+        # XXX # XXX data["category"] = list(set(data["entry"].get("category", [])))
+        # XXX web.header("Content-Type", "application/json")
+        # XXX return dump_json(data)
+
+
+@app.control("details/{site}(/{page})?")
+class SiteDetails:
+    """A web resource."""
+
+    def get(self, site, page=None):
+        web.header("Content-Type", "application/json")
+        return tx.db.select("cache", where="url = ?", vals=[site])[0]["details"]
+
+
+@app.control("a11y/{site}(/{page})?")
+class Accessibility:
+    """A web resource."""
+
+    def get(self, site, page=None):
+        try:
+            a11y = web.load(path=sites_path / site / "a11y.json")
+        except FileNotFoundError:
+            a11y = None
+        return app.view.a11y(site, a11y)
+
+
+@app.control("sites")
+class Sites:
+    """Index of sites as HTML."""
+
+    def get(self):
+        """Return a list of indexed sites."""
+        # TODO # accept a
+        # TODO tx.db.select(
+        # TODO     tx.db.subquery(
+        # TODO         "crawls", where="url not like '%/%'", order="crawled desc"
+        # TODO     ),
+        # TODO     group="url",
+        # TODO )
+        with tx.db.transaction as cur:
+            urls = cur.cur.execute(
+                " select * from ("
+                + "select * from cache where url not like '%/%' order by crawled desc"
+                + ") group by url"
+            )
+        return app.view.sites(urls)
+
+
+@app.control("sites/{site}/screenshot.png")
+class SiteScreenshot:
+    """A site's screenshot."""
+
+    def get(self, site):
+        """Return a PNG document rendering given site's screenshot."""
+        if os.getenv("WEBCTX") == "dev":
+            return sites_path / site / "screenshot.png"
+        web.header("Content-Type", "image/png")
+        web.header("X-Accel-Redirect", f"/X/sites/{site}/screenshot.png")
+
+
+@app.control("sites/{site}/scoreboard.svg")
+class SiteScoreboard:
+    """A site's scoreboard."""
+
+    def get(self, site):
+        """Return an SVG document rendering given site's scoreboard."""
+        if os.getenv("WEBCTX") == "dev":
+            return sites_path / site / "scoreboard.svg"
+        web.header("Content-Type", "image/svg+xml")
+        web.header("X-Accel-Redirect", f"/X/sites/{site}/scoreboard.svg")
+
+
+@app.control("{site}")
+class Site:
+    """A website."""
+
+    def get(self, site):
+        """Return a site analysis."""
+        # TODO if site in [s[0] for s in silos.values()]:
+        # TODO     return app.view.silo(site, details)
+        return app.view.site(*get_site(site))
+
+
+@app.control("{site}/{page}")
+class Page:
+    """A webpage."""
+
+    def get(self, site, page):
+        return app.view.page(*get_page(f"{site}/{page}"))
+
+
+def get_resource(url):
+    url = webagt.uri(str(url))
+    min_url = url.minimized
+    redirect = tx.db.select(
+        "cache_redirects", what="outgoing", where="incoming = ?", vals=[min_url]
+    )
+    try:
+        raise web.SeeOther(redirect[0]["outgoing"])
+    except IndexError:
+        pass
+    try:
+        details = tx.db.select("cache", where="url = ?", vals=[min_url])[0]["details"]
+    except IndexError:
+        web.enqueue(refresh_site, min_url)
+        raise web.Accepted(app.view.crawl_enqueued(min_url))
+    return url, details
+
+
+def get_site(site):
+    url, details = get_resource(site)
+    try:
+        audits = web.load(path=sites_path / site / "audits.json")
+    except FileNotFoundError:
+        audits = None
+    try:
+        a11y = web.load(path=sites_path / site / "a11y.json")
+    except FileNotFoundError:
+        a11y = None
+    try:
+        manifest = web.load(path=sites_path / site / "manifest.json")
+    except FileNotFoundError:
+        manifest = None
+    return url, details, audits, a11y, manifest
+
+
+def get_page(page_url):
+    url, details = get_resource(page_url)
+    return url, details

index 0000000..bfcfcc0
--- /dev/null

+silos = {
+    "IndieWeb.rocks": ("indieweb.rocks", [r"([\w\.]+)"], True),
+    "GitHub": ("github.com", [r"(\w+)"], True),
+    "Keybase": ("keybase.io", [r"(\w+)"], True),
+    "sourcehut": ("sr.ht", [r"~(\w+)"], True),
+    "IndieWeb": ("indieweb.org", [r"User:([\w.]+)"]),
+    "PyPI": ("pypi.org", [r"user/([\w.]+)"]),
+    "Micro.blog": ("micro.blog", [r"(\w+)"]),
+    "Twitter": ("twitter.com", [r"(\w+)"]),
+    "Reddit": ("reddit.com", [r"u/(\w+)", r"user/(\w+)"]),
+    "Facebook": ("facebook.com", [r"(\w+)"]),
+    "Instagram": ("instagram.com", [r"(\w+)"]),
+    "LinkedIn": ("linkedin.com", [r"in/(\w+)"]),
+    "Foursquare": ("foursquare.com", [r"user/(\d+)", r"(\w+)"]),
+    "Last.fm": ("last.fm", [r"user/(\w+)"]),
+    "Flickr": ("flickr.com", [r"people/(\w+)", r"(\w+)"]),
+    "Amazon": ("amazon.com", [r"shop/(\w+)"]),
+    "Dribbble": ("dribbble.com", [r"(\w+)"]),
+    "Gravatar": ("gravatar.com", [r"(\w+)"]),
+    "Pinboard": ("pinboard.in", [r"u:(\w+)"]),
+    "Wordpress": ("profiles.wordpress.org", [r"(\w+)"]),
+    "Gumroad": ("gumroad.com", [r"(\w+)"]),
+    "Ko-fi": ("ko-fi.com", [r"(\w+)"]),
+    "Twitch": ("twitch.tv", [r"(\w+)"]),
+    "Soundcloud": ("soundcloud.com", [r"(\w+)"]),
+    "Asmodee": ("account.asmodee.net", [r"en/profile/(\d+)"]),
+    "Wikipedia (EN) User": ("en.wikipedia.org", [r"wiki/User:(\w+)"]),
+    "Wikipedia (EN) Notable Person": ("en.wikipedia.org", [r"wiki/([\w\(\)_]+)"]),
+    "Cash App": ("cash.me", [r"\$(\w+)"]),
+    "Kit": ("kit.co", [r"(\w+)"]),
+    "PayPal": ("paypal.me", [r"(\w+)"]),
+    "Speaker Deck": ("speakerdeck.com", [r"(\w+)"]),
+    "WeChat": ("u.wechat.com", [r"([\w\W]+)"]),
+    "Venmo": ("venmo.com", [r"(\w+)"]),
+    "Duolingo": ("duolingo.com", [r"profile/(\w+)"]),
+    "SlideShare": ("slideshare.net", [r"(\w+)"]),
+    "W3": ("w3.org", [r"users/(\w+)"]),
+    "YouTube": ("youtube.com", [r"(\w+)"]),
+    "Vimeo": ("vimeo.com", [r"(\w+)"]),
+    "500px": ("500px.com", [r"(\w+)"]),
+    "Findery": ("findery.com", [r"(\w+)"]),
+    "Untappd": ("untappd.com", [r"user/(\w+)"]),
+    "del.icio.us": ("del.icio.us", [r"(\w+)"]),
+    "Pocket": ("getpocket.com", [r"@(\w+)"]),
+    "Huffduffer": ("huffduffer.com", [r"(\w+)"]),
+    "Hypothesis": ("hypothes.is", [r"users/(\w+)"]),
+    "Lobsters": ("lobste.rs", [r"u/(\w+)"]),
+    "Medium": ("medium.com", [r"@(\w+)"]),
+    "Myspace": ("myspace.com", [r"(\d+)"]),
+    "Hacker News": ("news.ycombinator.com", [r"user\?id=(\w+)"]),
+    "Nextdoor": ("nextdoor.com", [r"profile/(\d+)"]),
+    "Spotify": ("open.spotify.com", [r"user/(\w+)"]),
+    "Pinterest": ("pinterest.com", [r"(\w+)"]),
+    "Pnut": ("pnut.io", [r"@(\w+)"]),
+    "Upcoming": ("upcoming.org", [r"@(\w+)"]),
+    "Diggo": ("diigo.com", [r"profile/(\w+)"]),
+    "Goodreads": ("goodreads.com", [r"user/show/(\d+)"]),
+    "Notist": ("noti.st", [r"(\w+)"]),
+    "Kickstarter": ("kickstarter.com", [r"profile/([\w-]+)"]),
+    "CodePen": ("codepen.io", [r"([\w-]+)"]),
+    "Listen Notes": ("listennotes.com", [r"@(\w+)"]),
+    "Meetup": ("meetup.com", [r"members/(\d+)"]),
+    "Patreon": ("patreon.com", [r"(\w+)"]),
+    "Periscope": ("periscope.tv", [r"(\w+)"]),
+    "Quora": ("quora.com", [r"([\W\w]+)"]),
+    "eBird": ("ebird.org", [r"profile/([\W\w]+)"]),
+    "Stack Overflow": ("stackoverflow.com", [r"users/(\d+/\w+)"]),
+    "npm": ("npmjs.com", [r"~(\w+)"]),
+    "Trakt": ("trakt.tv", [r"users/(\w+)"]),
+    "ORCID": ("orcid.org", [r"([\d-]+)"]),
+    "Wishlistr": ("wishlistr.com", [r"(\w+)"]),
+    "GitLab": ("gitlab.com", [r"(\w+)"]),
+    "AngelList": ("angel.co", [r"(\w+)"]),
+    "OpenStreetMap": ("openstreetmap.org", [r"user/(\w+)"]),
+    "Google+": ("plus.google.com", [r"\+(\w+)"]),
+}

index 0000000..951d7d9
--- /dev/null

+import math
+import re
+from collections import defaultdict
+from hashlib import sha256
+from pprint import pformat
+
+import pendulum
+import pendulum.parser
+import phonenumbers
+import webagt
+from web import now, tx
+from webagt import uri
+
+from ..silos import silos
+
+__all__ = [
+    "re",
+    "get_dt",
+    "tx",
+    "uri",
+    "silos",
+    "pformat",
+    "get_silo",
+    "get_human_size",
+    "now",
+    "sha256",
+    "format_phonenumber",
+    "defaultdict",
+    "pendulum",
+    "math",
+]
+
+
+def format_phonenumber(tel):
+    return phonenumbers.format_number(
+        phonenumbers.parse(tel, "US"), phonenumbers.PhoneNumberFormat.INTERNATIONAL
+    )
+
+
+def get_dt(dt):
+    try:
+        return pendulum.instance(dt)
+    except ValueError:
+        return pendulum.parser.parse(dt)
+
+
+def get_silo(url):
+    for silo, details in silos.items():
+        try:
+            domain, profile_patterns, _ = details
+        except ValueError:
+            domain, profile_patterns = details
+        for profile_pattern in profile_patterns:
+            if match := re.match(
+                f"{domain}/{profile_pattern}", url.removeprefix("www.")
+            ):
+                return silo, webagt.uri(url).host, profile_pattern, match.groups()[0]
+    return None
+
+
+suffixes = ["B", "KB", "MB", "GB", "TB", "PB"]
+
+
+def get_human_size(nbytes):
+    i = 0
+    while nbytes >= 1024 and i < len(suffixes) - 1:
+        nbytes /= 1024.0
+        i += 1
+    f = ("%.2f" % nbytes).rstrip("0").rstrip(".")
+    return "%s %s" % (f, suffixes[i])

index 0000000..287ea40
--- /dev/null

+$def with (address)
+$var title: Crawl enqueued
+
+<p>Crawl enqueued for: <code>$address</code></p>

index 0000000..833695b
--- /dev/null

+$def with ()
+$var title: Cache
+
+<form method=post style="margin:3em 0">
+<label style=font-size:.9em><strong>Address</strong>
+<small><em>i.e. website URL or fediverse handle</em></small><br>
+<input style="border:.2em inset #ccc;border-radius:.2em;font-size:1.1em;margin:.25em 0 .5em 0;padding:.2em;width:calc(100% - .8em)"
+name=address type=text autofocus placeholder="e.g. example.com or alice@example.com"></label>
+<div style=text-align:right><button
+style="font-size:.8em;padding:0 .2em;text-transform:uppercase">Validate</button></div>
+</form>
+
+<h2 id=standards>Supported Standards</h2>
+
+<h3>IndieWeb</h3>
+
+<ul style=columns:3>
+<li><a href=https://www.w3.org/TR/webmention>Webmention</a></li>
+<li><a href=https://www.w3.org/TR/websub>WebSub</a></li>
+<li><a href=https://www.w3.org/TR/indieauth>IndieAuth</a></li>
+<li><a href=https://indieweb.org/Ticketing_for_IndieAuth>Ticketing</a></li>
+<li><a href=https://www.w3.org/TR/micropub>Micropub</a></li>
+<li><a href=https://indieweb.org/Microsub>Microsub</a></li>
+<li><a href=https://microformats.org/wiki/microformats>microformats</a></li>
+<li><a href=https://microformats.org/wiki/metaformats>metaformats</a></li>
+<li><a href=https://www.w3.org/TR/post-type-discovery><abbr title="Post Type Discovery">PTD</abbr></a></li>
+<li><a href=https://gmpg.org/xfn/><abbr title="XHTML Friends Network">XFN</abbr></a></li>
+<li><a href=https://microformats.org/wiki/rel-me>rel=me</a></li>
+<li><a href=https://microformats.org/wiki/web-sign-in>web sign in</a></li>
+<li><a href=https://www.w3.org/TR/jf2>JF2</a></li>
+</ul>
+
+<h3>Complimentary</h3>
+
+<ul style=columns:3>
+<li><a href=https://cyber.harvard.edu/rss/rss.html>RSS</a></li>
+<li><a href=https://datatracker.ietf.org/doc/html/rfc4287>Atom Syndication Format</a></li>
+<li><a href=https://www.jsonfeed.org/version/1.1>JSON Feed</a></li>
+<li><a href=https://datatracker.ietf.org/doc/html/rfc6415>Web Host Metadata</a></li>
+<li><a href=http://nodeinfo.diaspora.software/protocol.html>NodeInfo</a></li>
+<li><a href=https://datatracker.ietf.org/doc/html/rfc7033>WebFinger</a></li>
+<li><a href=https://datatracker.ietf.org/doc/html/draft-ietf-httpbis-message-signatures>HTTP Signatures</a></li>
+<li><a href=https://www.w3.org/TR/activitypub>ActivityPub</a></li>
+<li><a href=https://www.w3.org/TR/activitystreams-vocabulary>Activity Vocabulary</a></li>
+<li><a href=https://datatracker.ietf.org/doc/html/rfc6797><abbr title="HTTP Strict Transport Security">HSTS</abbr></a></li>
+</ul>

index 0000000..e107f92
--- /dev/null

+$def with (url, details, audits, a11y, manifest)
+$ short_title = str(url).removeprefix("@").removeprefix("https://")
+$# var title = short_title
+
+$ axes = ["identity", "authentication", "posts", "syndication", "posting UI",
+$         "navigation", "search", "aggregation", "interactivity", "security",
+$         "responses"]
+$ statuses = ["pass", "maybe", "fail"]
+
+$def render_uninterpreted(title, object, type):
+    <div class=uninterpreted>
+    <a href=//microformats.org/wiki/$title.rstrip("=")><img src=/static/specs/microformats.png
+    style=float:right;height:2em alt="microformats logo"></a>
+    <p><em>Uninterpreted <code>$title</code> $type</em>:</p>
+    <dl>
+    $for key, values in sorted(object.items()):
+        <dt>$key</dt>
+        $if not isinstance(values, list):
+            $ values = [values]
+        $for value in values:
+            <dd>
+            $if type == "links":
+                $uri(value).minimized
+            $elif type == "properties":
+                $value
+            </dd>
+    </dl>
+    </div>
+
+$ card = details["card"]
+
+$if featured := card.pop("featured", [None])[0]:
+    <img src=$featured style=width:100%>
+
+$ all_urls = []
+$ rels = details["mf2json"]["rels"]
+
+$ meta_props = {}
+$ items = details["mf2json"]["items"]
+$if items and items[-1]["source"] == "metaformats":
+    $ meta_props = items[-1]["properties"]
+
+<header>
+$ name = card.pop("name", [None])[0]
+$if name:
+    $var title = name
+$elif meta_name := meta_props.get("name"):
+    $var title = meta_name[0]
+<p>
+<a href=$url class=urlbox rel=me>
+<span><img src=/sites/$(url.minimized)/icon.png style=height:1em;width:1em title="\
+$if page_title := details.get("title"):
+    $page_title\
+">
+$if details["domain"]["hsts"]:
+    <span class=pass><strong>https://</strong></span>
+$elif url.scheme == "https":
+    <span class=pass>https://</span>
+$else:
+    <span class=fail>http://</span>
+<small
+$if whois_created := details.get("whois_created", None):
+    title="$whois_created"
+    $ years_held = (pendulum.now() - pendulum.parse(whois_created)).years
+    $if years_held < 1:
+        $ whois_color = "red"
+    $elif years_held < 5:
+        $ whois_color = "orange"
+    $elif years_held < 10:
+        $ whois_color = "yellow"
+    $elif years_held < 15:
+        $ whois_color = "green"
+    $elif years_held < 20:
+        $ whois_color = "blue"
+    $elif years_held < 25:
+        $ whois_color = "purple"
+    style="color:$whois_color"
+>$details["domain"]["name"]</small></span></a>
+$if "metaverse" in details:
+    $ hash = details["metaverse"][:5]
+    <small><a href=/the-street#$hash><code>$hash</code></a></small>
+</p>
+
+<p>Accessed $details["accessed"]</p>
+
+<p>
+$ response = details["response"]
+<strong title="initial weight">$response["length"] KB</strong>
+$if audits:
+    / <strong title="total weight">$get_human_size(audits["audits"]["total-byte-weight"]["numericValue"])</strong>
+/ <strong title="response time">
+$if response["time"] < 1:
+    $round(response["time"] * 1000) ms
+$else:
+    $round(response["time"], 2) s
+</strong>
+</p>
+</header>
+
+<div style=font-size:.9em>
+
+$ urls = []
+$ web_sign_in = []
+$if card:
+    <div class=h-card>
+    $# XXX $var title: $:name
+    $ card.pop("family-name", None)
+    $ card.pop("given-name", None)
+    $ nicknames = card.pop("nickname", [])
+    $ orgs = card.pop("org", None)
+    $if photo := card.pop("photo", None):
+        <img src=/sites/$url.minimized/photo.png style=width:100% alt="$name's profile picture">
+    $# XXX <h1 style=margin-bottom:0>$name</h1>
+    $ ipa = card.pop("ipa", None)
+    $ sound = card.pop("sound", None)
+    <p>
+    $if ipa:
+        $ipa[0]
+    $if sound:
+        <button>🗣️</button>
+    </p>
+    $if nicknames:
+        <p style=margin-top:0><small>a.k.a. $", ".join(nicknames)</small></p>
+
+    $ pronouns = card.pop("pronouns", [])
+    $ card.pop("pronoun", None)
+    $if orgs and name == orgs[0]:
+        🧑‍🤝‍🧑
+    $elif pronouns:
+        $if "they" in pronouns[0]:
+            🧑
+        $elif "she" in pronouns[0]:
+            👩
+        $elif "he" in pronouns[0]:
+            👨
+    $else:
+        🧑
+    <small>
+    $if pronouns:
+        $:pronouns[0].replace(" ", "").replace("/", "&thinsp;/&thinsp;")
+    $elif pronouns := card.pop("pronoun", None):
+        $for pronoun in pronouns:
+            $pronoun\
+            $if not loop.last:
+                &thinsp;/&thinsp;\
+    </small>
+    $if bday := card.pop("bday", None):
+        $ year, month, day = re.match("(\d\d\d\d|-)-(\d\d?|-)-(\d\d?|-)", bday[0]).groups()
+        $if year != "-":
+            $ year = int(year)
+        $ month = int(month)
+        $ day = int(day)
+        $ months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun"]
+        $ months += ["Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
+        $ n = now()
+        <span title="$months[int(month)-1] $day, $year"
+        $if not (month == n.month and day == n.day):
+            style="opacity:25%"
+        >🎂</span>
+    $if "rel_me" in details:
+        $ details["rel_me"] = details["rels"].pop("me", [])  # TODO REMOVE
+    $ urls = set(uri(u).minimized for u in card.pop("url", []) + details["rel_me"])
+    $ reciprocals = set(details.pop("reciprocals", []))
+    $ self_rel_me = f"indieweb.rocks/{url.minimized}"
+    $if self_rel_me in reciprocals:
+        $ urls.discard(self_rel_me)
+        $ reciprocals.discard(self_rel_me)
+        🍄
+    $if orgs:
+        <br><small>🧑‍🤝‍🧑
+        $for org in orgs:
+            $if isinstance(org, dict):
+                $ org_props = org.pop("properties")
+                $if "url" in org_props:
+                    <a href=$org_props["url"][0]>\
+                $org_props["name"][0]\
+                $if "url" in org_props:
+                    </a>\
+            $else:
+                $org\
+            $if not loop.last:
+                ,
+        </small>
+    $if roles := card.pop("role", None):
+        <br><small>
+        $for role in roles:
+            <code>$role</code>\
+            $if not loop.last:
+                ,
+        </small>
+    $if note := card.pop("note", None):
+        <p style=font-size:.75em;hyphens:auto>$note[0]</p>
+    $if categories := card.pop("category", None):
+        <p>🏷️ <small>
+        $for category in categories:
+            <code>\
+            $if isinstance(category, dict):
+                $ cat_props = category.pop("properties")
+                $if "url" in cat_props:
+                    <a href=$cat_props["url"][0]>\
+                $cat_props["name"][0]\
+                $if "url" in cat_props:
+                    </a>\
+            $else:
+                $category\
+            </code>\
+            $if not loop.last:
+                ,
+        </small></p>
+    $ street_address = card.pop("street-address", None)
+    $ locality = card.pop("locality", None)
+    $ region = card.pop("region", None)
+    $ postal_code = card.pop("postal-code", None)
+    $ country_name = card.pop("country-name", None)
+    $if street_address:
+        <p>📍
+        $if street_address:
+            $street_address[0]
+        $ area_line = []
+        $if locality:
+            $ area_line.append(locality[0])
+        $if region:
+            $ area_line.append(region[0])
+        $", ".join(area_line)
+        $if postal_code:
+            $postal_code[0]
+        $if country_name:
+            $country_name[0]
+        </p>
+    $ emails = [e.partition(":")[2] for e in card.pop("email", [])]
+    $ tels = []
+    $for tel in card.pop("tel", []):
+        $if ":" in tel:
+            $tels.append(tel.partition(":")[2])
+        $else:
+            $tels.append(tel)
+    $ keys = set(card.pop("key", []) + rels.pop("pgpkey", []))
+    $ all_urls = list(urls)
+    $for _url in sorted(urls):
+        $if _url.startswith("sms:") or _url.startswith("callto:"):
+            $ tel = _url.partition(":")[2]
+            $if tel not in tels:
+                $ tels.append(tel)
+            $urls.remove(_url)
+        $elif _url.startswith("mailto:"):
+            $ email = _url.partition(":")[2]
+            $if email not in emails:
+                $ emails.append(email)
+            $urls.remove(_url)
+    $if emails:
+        <ul class=elsewhere>
+        $for email in emails:
+            <li>📧 <small><a href=mailto:$email>$email</a></small>
+            $if "gravatars" in details:
+                $if gravatar := details["gravatars"].pop(email, None):
+                    <a href=//www.gravatar.com/$gravatar><img style=height:1em
+                    src=//www.gravatar.com/avatar/$(gravatar).jpg></a>
+            </li>
+            $ web_sign_in.append(email)
+        </ul>
+    $if tels:
+        <ul class=elsewhere>
+        $for tel in tels:
+            <li>📱 <small>$format_phonenumber(tel)</small><br>
+            <small><a href=callto:$tel>call</a> <a href=sms:$tel>message</a></small>
+            </li>
+            $ web_sign_in.append(tel)
+        </ul>
+    $if keys:
+        <p>🔐
+        $for key in keys:
+            $key
+            $if not loop.last:
+                ,
+            $ web_sign_in.append(uri(key).minimized)
+        </p>
+
+    $def render_rel_me(silo_name, domain, profile_pattern, user):
+        $ path = re.sub(r"(\(.+\))", user, profile_pattern).replace("\\", "")
+        <a href=/$domain title=$silo_name><img src=/sites/$domain/icon.png
+        style=height:1em></a> <a href=https://$domain/$path>$user</a>
+
+    $ supported_web_signin_silos = ["github.com", "twitter.com"]
+    $if urls:
+        $for _url in sorted(urls):
+            $if _url.startswith(url.minimized):
+                $ urls.remove(_url)
+                $continue
+        <ul class=elsewhere>
+        $for _url in sorted(urls):
+            $if _url in reciprocals:
+                $ urls.remove(_url)
+                <li>
+                $if silo := get_silo(_url):
+                    $:render_rel_me(*silo)
+                $else:
+                    $_url
+                ☑️
+                </li>
+                $if _url.partition("/")[0] in supported_web_signin_silos:
+                    $ web_sign_in.append(_url)
+        $for _url in sorted(urls):
+            $if silo := get_silo(_url):
+                $ urls.remove(_url)
+                <li>$:render_rel_me(*silo)</li>
+        $for _url in sorted(urls):
+            <li>\
+            $if _url.endswith(".onion"):
+                🧅 <a href=http://$_url>$(_url[:12])&hellip;</a></li>
+            $else:
+                <a href=//$_url>$_url</a></li>
+        </ul>
+    $ card.pop("uid", None)  # FIXME: what is it good for?
+    $if card:
+        $:render_uninterpreted("h-card", card, "properties")
+    </div>
+    $if payments := rels.pop("payment", None):
+        <h3>Payment</h3>
+        <ul>
+        $for payment in payments:
+            $ payment_url = uri(payment)
+            <li><img src=/sites/$payment_url.host/icon.png><a href=$payment>$payment_url</a></li>
+        </ul>
+$else:
+    $if meta_props:
+        $if meta_photo := meta_props.get("photo"):
+            <img style=width:100% src=$meta_photo[0]><br>
+        $if meta_summary := meta_props.get("summary"):
+            <p>$meta_summary[0]</p>
+        $if meta_content := meta_props.get("content"):
+            <div>$meta_content[0]</div>
+    <p>No <a href=https://indieweb.org/representative_h-card>representative
+    card</a> found.
+    $if meta_props:
+        Falling back to <a href=//microformats.org/wiki/metaformats>metaformats</a>.
+    </p>
+
+$ license = rels.pop("license", None)
+$if license:
+    <p><a href=$license[0]>
+    $if cc := re.match(r"https://creativecommons.org/licenses/([a-z-]+)/(\d.\d)", license[0]):
+        $ license, version = cc.groups()
+        <span title="CC $license.upper() $version">
+        <img class=cclicense src=/static/cc/cc.svg alt="Creative Commons logo">\
+        $for part in license.split("-"):
+            <img class=cclicense src=/static/cc/$(part).svg \
+            alt="Creative Commons $(part) license logo">\
+        </span>
+    $else:
+        Licensed $license[0].
+    </a></p>
+
+$if "search_url" in details:
+    $ search_url, search_query_name = details["search_url"]
+    <form action=$search_url method=get>
+    <input type=text name=$search_query_name>
+    <button>Search</button>
+    </form>
+    $ rels.pop("search")
+
+$if manifest:
+    $ bgcolor = manifest.get("background_color", "none")
+    <div style="background-color:$bgcolor;height:3em;width:100%"></div>
+
+$ auth_ep = rels.pop("authorization_endpoint", None)
+$ token_ep = rels.pop("token_endpoint", None)
+$ ticket_ep = None
+$ indieauth_metadata = details.pop("indieauth_metadata", None)
+$ openid_delegate = rels.pop("openid.delegate", None)
+$ openid_server = rels.pop("openid.server", None)
+$if indieauth_metadata:
+    $ auth_ep = indieauth_metadata.get("authorization_endpoint", None)
+    $ token_ep = indieauth_metadata.get("token_endpoint", None)
+    $ ticket_ep = indieauth_metadata.get("ticket_endpoint", None)
+
+$if auth_ep:
+    <p class=pass>Supports
+$else:
+    <p class=fail>Does not support
+<a href=/indieauth><img src=/static/specs/indieauth.svg style=height:1.5em;opacity:25%> IndieAuth</a>\
+$if auth_ep:
+    $if token_ep:
+         with a <a href=//indieauth.spec.indieweb.org/#token-endpoint>token endpoint</a>\
+        $if ticket_ep:
+             and a <a href=//indieweb.org/IndieAuth_Ticket_Auth#Create_the_IndieAuth_ticket>ticket endpoint</a>\
+.
+</p>
+
+$# $if auth_ep and not indieauth_metadata:
+$#     <p class=NOTE><code>rel=authorization_endpoint</code> is deprecated, leave
+$#     it for now but start using <code>rel=indieauth-metadata</code> instead
+$#     <sup><a href=https://indieauth.spec.indieweb.org/\
+$#     #changes-from-26-november-2020-to-this-version-li-1>read more</a></sup></p>
+
+$ authn = [uri(authn).minimized for authn in rels.pop("authn", [])]
+$if web_sign_in:
+    <p class=pass>Supports <a href=https://microformats.org/wiki/web_sign-in>web sign-in</a>.</p>
+    <ul>
+    $for web_sign_in_endpoint in web_sign_in:
+        $if authn and web_sign_in_endpoint not in authn:
+            $continue
+        <li>$web_sign_in_endpoint</li>
+    </ul>
+
+$# $if openid_delegate and openid_server:
+$#     <p class=NOTE>OpenID <strong>was a protocol</strong> for using a web address
+$#     as an identity to sign-in to websites; it is losing support, <strong>is
+$#     effectively dead</strong> (versions 1 & 2 are both deprecated, sites are
+$#     dropping support), and <strong>has been replaced on the IndieWeb with
+$#     web-sign-in and IndieAuth</strong>. <sup><a
+$#     href=https://indieweb.org/OpenID>read more</a></sup></p>
+
+$ webmention_ep = rels.pop("webmention", None)
+$if webmention_ep:
+    <p class=pass>Supports
+$else:
+    <p class=fail>Does not support
+<a href=//www.w3.org/TR/webmention/><img src=/static/specs/webmention.svg style=height:1.5em;opacity:25%> Webmention</a> on the homepage.
+</p>
+
+$ micropub_ep = rels.pop("micropub", None)
+$ media_ep = rels.pop("media-endpoint", None)
+$if micropub_ep:
+    <p class=pass>Supports
+$else:
+    <p class=fail>Does not support
+<a href=//micropub.spec.indieweb.org><img src=/static/specs/micropub.svg style=height:1.5em;opacity:25%> Micropub</a>\
+$if micropub_ep and media_ep:
+     with a <a href=//micropub.spec.indieweb.org/#media-endpoint>media endpoint</a>\
+.
+</p>
+
+$ microsub_ep = rels.pop("microsub", None)
+$if microsub_ep:
+    <p class=pass>Supports
+$else:
+    <p class=fail>Does not support
+<a href=//indieweb.org/Microsub><img src=/static/specs/microsub.svg style=height:1.5em;opacity:25%> Microsub</a>.
+</p>
+</div>
+
+$ dependencies = []
+$#details.pop("stylesheets")
+$# $for stylesheet in details.pop("stylesheets"):
+$#     $if not stylesheet.startswith(url.normalized):
+$#         $ dependencies.append(stylesheet)
+$# $for script in details.pop("scripts"):
+$#     $if "src" in script:
+$#         $if not script["src"].startswith(url.normalized):
+$#             $ dependencies.append(script["src"])
+
+$# <h2>Media</h2>
+$#
+$# <h3>Stylesheets</h3>
+$# $if details["stylesheets"]:
+$#     <ol>
+$#     $for stylesheet in details["stylesheets"]:
+$#         <li>$uri(stylesheet).normalized</li>
+$#     </ol>
+$# $else:
+$#     <p><em>No external stylesheets.</em></p>
+$# $# TODO inline stylesheets
+$#
+$# <h3>Scripts</h3>
+$# $ scripts = details.pop("scripts")
+$# $if scripts:
+$#     <!--p class=NOTE>Some users have scripting turned off. See
+$#     <a href=https://indieweb.org/js;dr>js;dr</a>.</p-->
+$#     <ul>
+$#     $for script in scripts:
+$#         <li>
+$#         $if "src" in script:
+$#             $if not script["src"].startswith(url.normalized):
+$#                 $ dependencies.append(script["src"])
+$#             $uri(script["src"]).normalized
+$#         $elif "text" in script:
+$#             $# TODO $if script.get("type", None) == "application/ld+json":
+$#             <details><summary>inline, $len(script["text"]) characters</summary><pre>$script["text"]</pre></details>
+$#         $else:
+$#             Unknown: $script
+$#         </li>
+$#     </ul>
+$# $else:
+$#     <p><em>No scripting.</em></p>
+$#
+$# <h3>Images/Audio/Video</h3>
+$# <p>...</p>
+
+<h2>Privacy</h2>
+$ dns_prefetches = rels.pop("dns-prefetch", None)
+$ preconnects = rels.pop("preconnect", None)
+$if dns_prefetches or preconnects:
+    $if dns_prefetches:
+        $ dependencies.extend(dns_prefetches)
+        <h5>DNS Prefetch</h5>
+        <ol>
+        $for dns_prefetch in dns_prefetches:
+            <li>$dns_prefetch</li>
+        </ol>
+    $if preconnects:
+        $ dependencies.extend(preconnects)
+        <h5>Preconnect</h5>
+        <ol>
+        $for preconnect in preconnects:
+            <li>$preconnect</li>
+        </ol>
+
+$if dependencies:
+    <p class=fail>This site has external dependencies.</p>
+    <ul>
+    $for dependency in dependencies:
+        <li>$dependency</li>
+    </ul>
+$else:
+    <p class=pass>This site is truly independent.</p>
+
+<h2>Accessibility</h2>
+$if a11y:
+    <p class=fail>$len(a11y) accessibility concerns.</p>    
+$else:
+    <p class=pass>There are no accessibility concerns.</p>
+
+<img src=/sites/$(url.minimized)/screenshot.png style=width:100%>
+
+<div>
+$if feed := details.pop("feed", None):
+    $if feed["items"]:
+        <div class=h-feed>
+        $for entry in feed["items"]:
+            $# <pre>$pformat(entry)</pre>
+
+            $# $if details["whostyle"]:
+            $#     <iframe
+            $#     onload="this.style.height=(this.contentWindow.document.body.scrollHeight+25)+'px'"
+            $#     style=border:0;width:100% srcdoc='<link rel=stylesheet href=$uri(details["whostyle"][0]).normalized>
+            $#     <div class=whostyle-$uri(url).minimized.replace(".", "-")>
+
+            <div class=entry>
+            $ entry_url = entry.pop("url", [None])[0]
+            $ entry_type = entry.pop("type")[0].partition("-")[2]
+            $ post_type = entry.pop("post-type", None)
+            $if entry_type == "entry":
+                $if in_reply_to := entry.pop("in-reply-to", None):
+                    $ reply_url = in_reply_to[0]
+                    $if isinstance(reply_url, dict):
+                        $ reply_url = reply_url["properties"]["url"][0]
+                    <p>↩️
+                    $ gh_issue_re = r"https://github.com/(\w+)/([\w-]+)/issues/(\d+)(#([\w-]+))?"
+                    $if gh_match := re.match(gh_issue_re, reply_url):
+                        $ user, repo, issue, _, comment = gh_match.groups()
+                        <img src=/sites/github.com/icon.png style=height:1em alt="GitHub logo">
+                        <a href=https://github.com/$user>$user</a> /
+                        <a href=https://github.com/$user/$repo>$repo</a> /
+                        <a href=https://github.com/$user/$repo/issues>issues</a> /
+                        <a href=https://github.com/$user/$repo/issues/$issue>$issue</a> #
+                        <a href=https://github.com/$user/$repo/issues/$issue#$comment>$comment</a>
+                    $elif tw_match := re.match(r"https://twitter.com/(\w+)/status/(\d+)", reply_url):
+                        $ user, tweet = tw_match.groups()
+                        <img src=/sites/twitter.com/icon.png style=height:1em class="Twitter logo">
+                        <a href=https://twitter.com/$user>$user</a> /
+                        <a href=https://twitter.com/$user/status/$tweet>$tweet</a>
+                    $else:
+                        <a href=$reply_url>$reply_url</a>
+                    </p>
+                $if photo := entry.pop("photo", [None])[0]:
+                    <p><img src=$photo style=max-width:100% alt=$photo /></p>
+                $if entry_name := entry.pop("name", None):
+                    <h3>$entry_name[0]</h3>
+                $if summary := entry.pop("summary", None):
+                    $if entry_name != summary:
+                        <p>$summary</p>
+                $if like_of := entry.pop("like-of", [None])[0]:
+                    <p>♥️ <a href=$like_of>$like_of</a></p>
+                $if content := entry.pop("content", [None])[0]:
+                    $# XXX $if post_type == "article":
+                    $# XXX     <p>$content["value"][:280]&hellip;</p>
+                    $# XXX $else:
+                    <p>$" ".join(content["value"].split()[:50])&hellip;&nbsp;\
+                    <small><a href=$entry_url>read more</a></small></p>
+                $if categories := entry.pop("category", None):
+                    <p><small>
+                    $for category in categories:
+                        <code>$category</code>\
+                        $if not loop.last:
+                            ,
+                    </small></p>
+            $elif entry_type == "event":
+                <p>$entry.pop("name")<br>
+                <small>$entry.pop("start")&thinsp;&ndash;&thinsp;$entry.pop("end", None)</small></p>
+                $ entry.pop("start-str", None)
+                $ entry.pop("end-str", None)
+                <form method=post action=/micropub>
+                <input type=hidden name=in-reply-to value="$entry_url">
+                <select name=rsvp>
+                <option value=yes>Yes</option>
+                <option value=no>No</option>
+                <option value=maybe>Maybe</option>
+                </select>
+                <button>RSVP</button>
+                </form>
+            <p style=text-align:right>\
+            $if author := entry.pop("author", [None])[0]:
+                $if isinstance(author, str):
+                    $author
+                $elif author_url := author.pop("url", None):
+                    $if uri(author_url).minimized not in all_urls:
+                        $author_url
+            <small>
+            $if location := entry.pop("location", None):
+                $if "latitude" in location:
+                    <a href=/map?lat=$location['latitude']&lng=$location['longitude']>\
+                    $location["latitude"], $location["longitude"]</a>
+            $if published := entry.pop("published", [None])[0]:
+                <time value="$published.isoformat()" datetime="$published"
+                class="dt-published">$published.isoformat()</time>
+                $# $get_dt(published).diff_for_humans()
+                $# $if updated := entry.pop("updated", None):
+                $#     $if updated != published:
+                $#         , <small>updated $get_dt(updated).diff_for_humans()</small>
+                $ entry.pop("published-str", None)
+                $ entry.pop("updated", None)
+                $ entry.pop("updated-str", None)
+    
+            $if entry_url:
+                <br><a href=$entry_url>/$uri(entry_url).path.rstrip("/")</a>
+    
+            $if syndication_urls := entry.pop("syndication", None):
+                $for syndication_url in syndication_urls:
+                    $if tw_match := re.match(r"https://twitter.com/(\w+)/status/(\d+)", syndication_url):
+                        $ user, tweet = tw_match.groups()
+                        <a href=https://twitter.com/$user/status/$tweet><img
+                        src=/sites/twitter.com/icon.png style=height:1em class="Twitter logo"></a>
+                        $# <a href=https://twitter.com/$user>$user</a> /
+                        $# <a href=https://twitter.com/$user/status/$tweet>$tweet</a>
+                    $else:
+                        $syndication_url\
+                    $if not loop.last:
+                        ,
+            </small></p>
+            $ entry.pop("uid", None)  # FIXME: what is it good for?
+            $if entry:
+                $:render_uninterpreted(f"h-{entry_type}", entry, "properties")
+            </div>
+            $if details["whostyle"]:
+                </div>'>
+                </iframe>
+        $if rel_next := rels.pop("next", None):
+            <p>next: <a href=$rel_next[0]>$rel_next[0]</a></p>
+        $if rel_prev := rels.pop("prev", None):
+            <p>previous: <a href=$rel_prev[0]>$rel_prev[0]</a></p>
+        </div>
+$else:
+    <p><em>No <a href=https://indieweb.org/feed#How_to_Publish>content
+    feed</a> available.</em></p>
+</div>
+
+$if scores := details.get("scores"):
+    <div style="display:grid;grid-template-columns:50% 50%;">
+
+    $def list_reasons(level):
+        <ul id=level$level>
+        $for n, (status, reason) in enumerate(details["scores"][level-1]):
+            $if status != 3:
+                <li id=$(level)-$axes[n] class=$statuses[status]>$:(reason.capitalize()).</li>
+        </ul>
+    
+    <object id=scoreboard data=/sites/$(url.minimized)/scoreboard.svg></object>
+    <div id=indiemark>
+    <div style="background-color:#222;color:#999;font-size:.8em;padding:.5em 1em;">
+    <h4>Level 1: Use your domain for identity, sign-in, and publishing posts</h4>
+    $:list_reasons(1)
+    <h4>Level 2: Improve your personal identity and post multiple types of posts</h4>
+    $:list_reasons(2)
+    <h4>Level 3: Post and send replies from your own site</h4>
+    $:list_reasons(3)
+    <h4>Level 4: Receive and show comments</h4>
+    $:list_reasons(4)
+    <h4>Level 5: Manage comments</h4>
+    $:list_reasons(5)
+    </div>
+    </div>
+
+    </div>
+</div>
+
+$if rels:
+    $:render_uninterpreted("rel=", rels, "links")
+
+<footer style=font-size:.8em>
+<p><a href=/details/$(url.minimized)>Details (JSON)</a></p>
+
+<form method=post>
+$# $if headers := details.get("headers", None):
+$#     <p>$details["headers"]</p>
+<button>Recrawl</button>
+</form>
+$if not tx.user.session:
+    <form method=get action=/guests/sign-in>
+    <input type=hidden name=me value=$url.normalized>
+    <p>If you are the owner of this site you may sign in to gain access to more tools.</p>
+    <button>Sign in as $details["domain"]["name"]</button>
+    </form>
+$# $if tx.user.session and (tx.user.session["uid"][0] == details["url"]):
+$#     <h3>Site Owner Controls</h3>
+$#     <button>Test</button>
+</footer>
+
+<style>
+h2 {
+  border-bottom: .1em solid #333;
+  font-size: .9em; }
+</style>

index 0000000..5657e95
--- /dev/null

+$def with (urls)
+$var title: Indexed Sites
+
+<ul>
+$for url in urls:
+    <li><a href=/$url["url"]>$url["url"]</a>
+    $# <code>\
+    $# $ size = url["details"]["size"]
+    $# $if size < 10:
+    $#     $round(size, 1)\
+    $# $else:
+    $#     $round(size)\
+    $# <small style=text-transform:uppercase>kb</small></code>
+    $# last crawled $url["crawled"].diff_for_humans()
+    </li>
+</ul>