Bootstrap
Committed b82773
index 0000000..f9f0d23
--- /dev/null
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.poetry]
+name = "webint-cache"
+version = "0.0.1"
+description = "manage resource caching on your website"
+keywords = ["micropub"]
+homepage = "https://ragt.ag/code/projects/webint-cache"
+repository = "https://ragt.ag/code/projects/webint-cache.git"
+documentation = "https://ragt.ag/code/projects/webint-cache/api"
+authors = ["Angelo Gladding <angelo@ragt.ag>"]
+license = "AGPL-3.0-or-later"
+packages = [{include="webint_cache"}]
+
+[tool.pyright]
+reportGeneralTypeIssues = false
+reportOptionalMemberAccess = false
+
+[tool.poetry.plugins."webapps"]
+cache = "webint_cache:app"
+
+[[tool.poetry.source]]
+name = "main"
+url = "https://ragt.ag/code/pypi"
+
+[tool.poetry.dependencies]
+python = ">=3.10,<3.11"
+webint = ">=0.0"
+svglib = "^1.5.1"
+python-whois = "^0.8.0"
+feedparser = "^6.0.11"
+phonenumbers = "^8.13.27"
+
+[tool.poetry.group.dev.dependencies]
+gmpg = {path="../gmpg", develop=true}
+bgq = {path="../bgq", develop=true}
+newmath = {path="../newmath", develop=true}
+sqlyte = {path="../sqlyte", develop=true}
+webagt = {path="../webagt", develop=true}
+webint = {path="../webint", develop=true}
index 0000000..4f4711a
--- /dev/null
+""""""
+
+import collections
+import hashlib
+import logging
+import os
+import pathlib
+import subprocess
+import time
+
+import PIL
+import requests
+import web
+import webagt
+import whois
+from reportlab.graphics import renderPM
+from svglib.svglib import svg2rlg
+from web import tx
+
+from .silos import silos
+
+logging.basicConfig(level=logging.DEBUG, filename="crawl.log", filemode="w", force=True)
+
+app = web.application(
+ __name__,
+ prefix="cache",
+ args={
+ "site": r"[a-z\d.-]+\.[a-z]+",
+ "page": r".*",
+ },
+ model={
+ "cache": {
+ "url": "TEXT UNIQUE NOT NULL",
+ "crawled": "DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP",
+ "details": "JSON NOT NULL",
+ },
+ "cache_redirects": {
+ "incoming": "TEXT UNIQUE NOT NULL",
+ "outgoing": "TEXT NOT NULL",
+ },
+ },
+)
+
+sites_path = pathlib.Path("sites")
+sites_path.mkdir(exist_ok=True)
+
+agent = webagt.Agent("webint-cache")
+blocklist = ["accounts.google.com"]
+ignored_rels = [
+ "author",
+ "bookmark",
+ "canonical",
+ "category",
+ "contents",
+ "home",
+ "nofollow",
+ "noreferrer",
+ "noopener",
+ "pingback",
+ "profile",
+ "shortcut",
+ "shortlink",
+ "syndication",
+ "tag",
+ "ugc",
+]
+social_network_rels = ["acquaintance", "colleague", "friend", "met"]
+
+# def refresh_page(url):
+# try:
+# response = agent.get(domain)
+# except (requests.ConnectionError, requests.Timeout) as err:
+# return {"status": "not responding", "error": str(err)}
+# try:
+# tx.db.insert(
+# "cache",
+# url=url,
+# details={
+# "metaverse":
+# hashlib.sha256(domain.encode("utf-8")).hexdigest().upper(),
+# "domain": {
+# "name": domain,
+# "suffix": domain_details.suffix,
+# "hsts": domain_details.in_hsts,
+# },
+# },
+# )
+# web.enqueue(query_whois, domain)
+# except tx.db.IntegrityError:
+# pass
+# return
+
+
+def refresh_site(domain):
+ """Fetch `domain` and store site details and related media."""
+ if domain in blocklist or not webagt.uri(domain).suffix:
+ logging.debug(f"skipping {domain}")
+ return
+ # TODO logging.debug("getting previous details..") # for etag
+ start = time.time()
+ logging.debug("downloading HTML..")
+ try:
+ response = agent.get(domain)
+ except (requests.ConnectionError, requests.Timeout) as err:
+ return {"status": "not responding", "error": str(err)}
+ if domain != response.url.host:
+ try:
+ tx.db.insert("cache_redirects", incoming=domain, outgoing=response.url.host)
+ except tx.db.IntegrityError:
+ tx.db.update(
+ "cache_redirects",
+ outgoing=response.url.host,
+ where="incoming = ?",
+ vals=[domain],
+ )
+ refresh_site(response.url.host)
+ return
+ domain_details = webagt.uri(domain)
+ try:
+ tx.db.insert(
+ "cache",
+ url=domain,
+ details={
+ "metaverse": hashlib.sha256(domain.encode("utf-8")).hexdigest().upper(),
+ "domain": {
+ "name": domain,
+ "suffix": domain_details.suffix,
+ "hsts": domain_details.in_hsts,
+ },
+ },
+ )
+ web.enqueue(query_whois, domain)
+ except tx.db.IntegrityError:
+ pass
+ site_path = sites_path / domain
+ site_path.mkdir(parents=True, exist_ok=True)
+
+ web.enqueue(run_lighthouse, domain)
+ web.enqueue(run_pa11y, domain)
+
+ update_details = get_updater(domain)
+ update_details(
+ accessed=web.now().to_iso8601_string(),
+ response={
+ "status": response.status,
+ "time": time.time() - start,
+ "headers": dict(response.headers),
+ "length": round(len(response.text) / 1000),
+ },
+ )
+ logging.debug("parsing Microformats..")
+ mf2json = response.mf2json
+ rels = dict(mf2json["rels"])
+
+ if authorization_endpoint := rels.pop("authorization_endpoint", None):
+ indieauth_details = {"authorization_endpoint": authorization_endpoint}
+ if token_endpoint := rels.pop("token_endpoint", None):
+ indieauth_details["token_endpoint"] = token_endpoint
+ update_details(indieauth=indieauth_details)
+ if indieauth_metadata_endpoint := rels.pop("indieauth-metadata", None):
+ web.enqueue(get_indieauth_metadata, domain, indieauth_metadata_endpoint[0])
+
+ if search := rels.pop("search", None):
+ web.enqueue(get_search_description, domain, search[0])
+
+ if manifest := rels.pop("manifest", None):
+ web.enqueue(get_manifest, domain, manifest[0])
+
+ if hub_endpoint := rels.pop("hub", None):
+ web.enqueue(
+ get_websub_hub, domain, hub_endpoint[0], rels.pop("self", [domain])[0]
+ )
+
+ web.enqueue(get_activitypub, domain)
+
+ card = response.card
+ update_details(mf2json=mf2json, card=card, rels=rels)
+ photo_url = rels.pop("apple-touch-icon", None)
+ card_type = None
+ if card:
+ card_type = "person"
+ if card_org := card.get("org"):
+ if card["name"][0] == card_org[0]:
+ card_type = "organization"
+ if emails := card.get("email"):
+ gravatars = {}
+ for email in emails:
+ email = email.removeprefix("mailto:")
+ gravatars[email] = hashlib.md5(
+ email.strip().lower().encode("utf-8")
+ ).hexdigest()
+ # TODO SET `gravatars`
+ if photo_urls := card.get("photo"): # TODO move to on-demand like icon?
+ if isinstance(photo_urls[0], dict):
+ photo_url = photo_urls[0]["value"]
+ else:
+ photo_url = photo_urls[0]
+ try:
+ icon_url = rels.pop("icon")[0]
+ except KeyError:
+ icon_url = f"{domain}/favicon.ico"
+ web.enqueue(get_media, domain, photo_url, icon_url)
+
+ scripts = []
+ for script in response.dom.select("script"):
+ script_details = dict(script.element.attrib)
+ script_details["content_length"] = len(script.text)
+ script_details["text"] = script.text
+ scripts.append(script_details)
+ stylesheets = rels.pop("stylesheet", [])
+ for stylesheet in response.dom.select("style"):
+ stylesheets.append(
+ {
+ "content_length": len(stylesheet.text),
+ "text": stylesheet.text,
+ }
+ )
+ whostyle = rels.pop("whostyle", None)
+ try:
+ title = response.dom.select("title")[0].text
+ except IndexError:
+ title = ""
+ update_details(
+ scripts=scripts, stylesheets=stylesheets, whostyle=whostyle, title=title
+ )
+
+ for ignored_rel in ignored_rels:
+ rels.pop(ignored_rel, None)
+ social_network = {}
+ for social_network_rel in social_network_rels:
+ if people_rels := rels.pop(social_network_rel, None):
+ social_network[social_network_rel] = people_rels
+ logging.debug("determining reciprocal rel=me..")
+ reciprocals = set()
+ rel_me_silos = []
+ for silo, silo_details in silos.items():
+ if len(silo_details) == 3:
+ rel_me_silos.append(silo_details[0])
+ rel_mes = rels.pop("me", [])
+ url = webagt.uri(domain) # TODO XXX
+ for me_url in rel_mes:
+ if not me_url.startswith(("http", "https")):
+ continue
+ me_url = webagt.uri(me_url)
+ logging.debug(f" rel=me {me_url}")
+ # XXX if (me_url.domain, me_url.suffix) == ("twitter", "com"):
+ # XXX if "/" in me_url.path:
+ # XXX continue
+ # XXX twitter_id = me_url.path.split("/")[0]
+ # XXX twitter_bearer = app.cfg.get("TWITTER")
+ # XXX print(
+ # XXX agent.get(
+ # XXX f"https://api.twitter.com/2/users"
+ # XXX f"/by/username/{twitter_id}?user.fields=url",
+ # XXX headers={"Authorization": f"Bearer {twitter_bearer}"},
+ # XXX ).json
+ # XXX )
+ # XXX twitter_profile = agent.get(
+ # XXX f"https://api.twitter.com/2/users"
+ # XXX f"/by/username/{twitter_id}?user.fields=url",
+ # XXX headers={"Authorization": f"Bearer {twitter_bearer}"},
+ # XXX ).json["data"]
+ # XXX if twitter_profile_url := twitter_profile.get("url", None):
+ # XXX try:
+ # XXX recip_url = agent.get(twitter_profile_url).url
+ # XXX except requests.Timeout:
+ # XXX continue
+ # XXX if recip_url == url:
+ # XXX reciprocals.add(me_url.minimized)
+ if (me_url.subdomain, me_url.domain, me_url.suffix) == (
+ "en",
+ "wikipedia",
+ "org",
+ ):
+ wp_props = agent.get(me_url).mf2json["items"][0]["properties"]
+ if wp_url := wp_props.get("url"):
+ if wp_url[0] == url:
+ reciprocals.add(me_url.minimized)
+ if me_url.host not in rel_me_silos:
+ continue
+ try:
+ reverse_rel_mes = agent.get(me_url).mf2json["rels"]["me"]
+ except KeyError:
+ continue
+ for reverse_rel_me in reverse_rel_mes:
+ if webagt.uri(reverse_rel_me).minimized == url.minimized:
+ reciprocals.add(me_url.minimized)
+ update_details(
+ social_network=social_network, reciprocals=list(reciprocals), rel_me=rel_mes
+ )
+
+ feed = response.feed
+ alt_feed_urls = set()
+ if not feed["items"]:
+ try:
+ alt_feed_urls = set(rels["home"]) & set(rels["alternate"])
+ except KeyError:
+ pass
+ alternate_reprs = rels.pop("alternate", [])
+ alternate_feeds = rels.pop("feed", [])
+ if not feed["items"]:
+ for alt_feed_url in alternate_reprs + alternate_feeds:
+ try:
+ feed = agent.get(alt_feed_url).feed
+ except ValueError: # XML feed
+ pass
+ finally:
+ print("using", alt_feed_url)
+ # rels.pop("alternate", None)
+ for entry in feed["items"]:
+ try:
+ published = entry["published"]
+ permalink = entry["url"]
+ entry.pop("published-str")
+ except KeyError:
+ continue
+ entry.pop("uid", None)
+ # TODO refresh_page(permalink)
+ update_details(feed=feed)
+
+ # logging.debug("archiving to WARC..")
+ # warc_file = site_path / "warc_output"
+ # subprocess.run(
+ # [
+ # "wget",
+ # "-EHkpq",
+ # site,
+ # f"--warc-file={warc_file}",
+ # "--no-warc-compression",
+ # "--delete-after",
+ # ]
+ # )
+
+ logging.debug("calculating IndieMark score..")
+ scores = [
+ [(3, None)] * 10,
+ [(3, None)] * 10,
+ [(3, None)] * 10,
+ [(3, None)] * 10,
+ [(3, None)] * 10,
+ ]
+
+ # L1 Identity
+ if card:
+ if "icon" in rels:
+ scores[0][0] = (0, "contact info and icon on home page")
+ else:
+ scores[0][0] = (1, "contact info but no icon on home page")
+ else:
+ scores[0][0] = (2, "no contact info on home page")
+
+ # L1 Authentication
+ if rel_mes:
+ scores[0][1] = (
+ 1,
+ "<code>rel=me</code>s found but none for GitHub or Twitter",
+ )
+ for rel_me in rel_mes:
+ if rel_me.startswith(("https://github.com", "https://twitter.com/")):
+ scores[0][1] = (
+ 0,
+ "<code>rel=me</code>s found for GitHub and/or Twitter",
+ )
+ break
+ else:
+ scores[0][1] = (2, "no <code>rel=me</code>s found")
+
+ # L1 Posts
+ if feed["items"]:
+ if len(feed["items"]) > 1:
+ scores[0][2] = (0, "more than one post")
+ else:
+ scores[0][2] = (1, "only one post")
+ else:
+ scores[0][2] = (2, "no posts")
+
+ # L1 Search
+ # XXX if details["ddg"]:
+ # XXX scores[0][6] = (0, "your content was found on DuckDuckgo")
+ # XXX else:
+ # XXX scores[0][6] = (
+ # XXX 1,
+ # XXX "your content was <strong>not</strong> found on DuckDuckgo",
+ # XXX )
+
+ # L1 Interactivity
+ scores[0][8] = (0, "content is accessible (select/copy text/permalinks)")
+
+ # L2 Identity
+ scores[1][0] = (0, "you've linked to silo profiles")
+
+ # L3 'h-card contact info and icon on homepage'
+ # L3 'multiple post types'
+ # L3 'POSSE'
+ # L3 'Posting UI'
+ # L3 'Next/Previus Navigation between posts'
+ # L3 'Search box on your site'
+ # L3 'Embeds/aggregation'
+ # L3 'Web Actions'
+
+ # L4 'Send Webmentions'
+ # L4 'PubSubHubbub support'
+ # L4 'Display Search Results on your site'
+ # L4 'Display Reply Context'
+
+ # L5 'Automatic Webmentions'
+ # L5 'Handle Webmentions'
+ # L5 'Display full content rich reply-contexts'
+ # L5 'Search on your own search backend'
+ # L5 'Multiple Reply Types'
+ # L5 'Display Backfeed of Comments'
+
+ update_details(scores=scores)
+ # logging.debug("dumping details..")
+ # details["stored"] = web.now().to_iso8601_string()
+ web.dump(scores, path=site_path / "scores.json")
+ logging.debug("generating scoreboard..")
+ subprocess.run(["node", "../index.js", domain])
+
+
+def get_updater(url):
+ """Return an update function catered to `domain`."""
+
+ def update_details(**kwargs):
+ """Atomically update the resource's details with `kwargs`."""
+ keys = ", ".join([f"'$.{key}', json(?)" for key in kwargs.keys()])
+ tx.db.update(
+ "cache",
+ what=f"details = json_set(details, {keys})",
+ where="url = ?",
+ vals=[web.dump(v) for v in kwargs.values()] + [url],
+ )
+
+ return update_details
+
+
+def query_whois(domain):
+ """Update the creation date for the domain."""
+ logging.debug("querying WHOIS")
+ domain_created = whois.whois(domain)["creation_date"]
+ if isinstance(domain_created, list):
+ domain_created = domain_created[0]
+ try:
+ domain_created = domain_created.isoformat()
+ except AttributeError:
+ pass
+ get_updater(domain)(**{"domain.created": domain_created})
+
+
+def get_media(domain, photo_url, icon_url):
+ """Download the representative photo for the domain."""
+ site_path = sites_path / domain
+ if photo_url:
+ logging.debug("downloading representative photo..")
+ filename = photo_url.rpartition("/")[2]
+ suffix = filename.rpartition(".")[2]
+ if not suffix:
+ suffix = "jpg"
+ original = site_path / f"photo.{suffix}"
+ webagt.download(photo_url, original)
+ final = site_path / "photo.png"
+ if suffix != "png":
+ if suffix == "svg":
+ drawing = svg2rlg(original)
+ renderPM.drawToFile(drawing, final, fmt="PNG")
+ else:
+ try:
+ image = PIL.Image.open(original)
+ except PIL.UnidentifiedImageError:
+ pass
+ else:
+ image.save(final)
+ logging.debug("downloading iconography..")
+ final = site_path / "icon.png"
+ filename = icon_url.rpartition("/")[2]
+ suffix = filename.rpartition(".")[2]
+ original = site_path / f"icon.{suffix}"
+ try:
+ download = webagt.download(icon_url, original)
+ except web.ConnectionError:
+ pass
+ else:
+ if download.status == 200 and suffix != "png":
+ try:
+ image = PIL.Image.open(original)
+ except PIL.UnidentifiedImageError:
+ pass
+ else:
+ image.save(final)
+
+
+def get_indieauth_metadata(domain, indieauth_metadata_endpoint):
+ """Download IndieAuth metadata for the domain."""
+ logging.debug("downloading IndieAuth metadata..")
+ metadata = agent.get(indieauth_metadata_endpoint).json
+ get_updater(domain)(**{"indieauth": {"metadata": metadata}})
+
+
+def get_search_description(domain, search_url):
+ """Download OpenSearch description document at `search_url`."""
+ logging.debug("downloading OpenSearch description..")
+ search_xml = agent.get(search_url).xml
+ search_url = webagt.uri(search_xml.find("Url", search_xml.nsmap).attrib["template"])
+ search_endpoint = f"//{search_url.host}/{search_url.path}"
+ name = None
+ for name, values in search_url.query.items():
+ if values[0] == "{template}":
+ break
+ get_updater(domain)(**{"search_url": [search_endpoint, name]})
+
+
+def get_manifest(domain, manifest_url):
+ """Download site manifest at `manifest_url`."""
+ logging.debug("downloading site manifest..")
+ # if "patches" in web.get(manifest_url).headers:
+ # get_updater(domain)(**{"manifest": "hot"})
+ webagt.download(manifest_url, sites_path / domain / "manifest.json")
+
+
+def get_websub_hub(domain, endpoint, self):
+ """Subscribe to site via WebSub `endpoint`."""
+ # TODO subscribe if not already
+ logging.debug("subscribing to WebSub hub..")
+ get_updater(domain)(**{"hub": [endpoint, self]})
+
+
+def run_lighthouse(domain):
+ """Run lighthouse for the domain."""
+ logging.debug("running lighthouse..")
+ subprocess.Popen(
+ [
+ "lighthouse",
+ f"https://{domain}",
+ "--output=json",
+ f"--output-path={sites_path}/{domain}/audits.json",
+ "--only-audits=total-byte-weight",
+ '--chrome-flags="--headless"',
+ "--quiet",
+ ],
+ stdout=subprocess.PIPE,
+ ).stdout.read()
+
+
+def run_pa11y(domain):
+ """Run pa11y for the domain."""
+ site_path = sites_path / domain
+ logging.debug("running pa11y..")
+ web.dump(
+ web.load(
+ subprocess.Popen(
+ [
+ "pa11y",
+ domain,
+ "--reporter",
+ "json",
+ "--screen-capture",
+ site_path / "site.png",
+ ],
+ stdout=subprocess.PIPE,
+ ).stdout.read()
+ ),
+ path=site_path / "a11y.json",
+ )
+
+ found_icon = True # TODO XXX
+ logging.debug("finding most used color, generating images..")
+ try:
+ screenshot = PIL.Image.open(site_path / "site.png")
+ except FileNotFoundError:
+ pass
+ else:
+ screenshot.crop((0, 0, 1280, 1024)).save(site_path / "screenshot.png")
+ colors = collections.Counter()
+ for x in range(screenshot.width):
+ for y in range(screenshot.height):
+ colors[screenshot.getpixel((x, y))] += 1
+ most_used_color = colors.most_common()[0][0]
+ icon = PIL.Image.new("RGB", (1, 1), color=most_used_color)
+ if not found_icon:
+ icon.save(site_path / "icon.png")
+ if not (site_path / "photo.png").exists():
+ icon.save(site_path / "photo.png")
+
+
+def get_activitypub(domain):
+ webfinger = agent.get(f"https://{domain}/.well-known/webfinger")
+
+
+@app.query
+def get_posts(db):
+ return []
+
+
+@app.query
+def get_people(db):
+ return {
+ url: details["card"]
+ for url, details in tx.db.select("cache", what="url, details", order="url ASC")
+ }
+
+
+@app.query
+def get_people_details(db):
+ return tx.db.select("people", order="url ASC")
+
+
+@app.query
+def get_categories(db):
+ categories = collections.Counter()
+ with db.transaction as cur:
+ for post in cur.cur.execute(
+ "select json_extract(cache.details, '$.category') "
+ "AS categories from cache"
+ ):
+ if not post["categories"]:
+ continue
+ if post_categories := web.load(post["categories"]):
+ for post_category in post_categories:
+ categories[post_category] += 1
+ return categories
+
+
+@app.query
+def get_resources(db):
+ return db.select(
+ "cache",
+ where="crawled > ?",
+ vals=[web.now().subtract(days=7)],
+ order="crawled DESC",
+ )
+
+
+@app.control("")
+class Cache:
+ """All cached resources."""
+
+ def get(self):
+ """Return a list of all cached resources."""
+ return app.view.index()
+
+ def post(self):
+ address = web.form("address").address
+ details = get_resource(address)
+ raise web.SeeOther(f"/cache/{address}")
+
+ # TODO if no-flash-header or use form argument:
+ # TODO raise web.SeeOther(); flash user's session with message to insert as CSS
+ # TODO elif flash-header:
+ # TODO return just message as JSON
+ # TODO
+ # TODO raise web.flash("crawl enqueued")
+
+
+@app.control("resource")
+class PreviewResource:
+ """"""
+
+ def get(self):
+ url = web.form(url=None).url
+ web.header("Content-Type", "application/json")
+ if not url:
+ return {}
+ resource = web.get(url)
+ if resource.entry:
+ return resource.entry
+ if resource.event:
+ return resource.event
+ if resource.feed:
+ return resource.feed
+ return {}
+
+ # XXX data = cache.parse(url)
+ # XXX if "license" in data["data"]["rels"]:
+ # XXX data["license"] = data["data"]["rels"]["license"][0]
+ # XXX try:
+ # XXX edit_page = data["html"].cssselect("#ca-viewsource a")[0]
+ # XXX except IndexError:
+ # XXX # h = html2text.HTML2Text()
+ # XXX # try:
+ # XXX # data["content"] = h.handle(data["entry"]["content"]).strip()
+ # XXX # except KeyError:
+ # XXX # pass
+ # XXX try:
+ # XXX markdown_input = ("html", data["entry"]["content"])
+ # XXX except (KeyError, TypeError):
+ # XXX markdown_input = None
+ # XXX else:
+ # XXX edit_url = web.uri.parse(str(data["url"]))
+ # XXX edit_url.path = edit_page.attrib["href"]
+ # XXX edit_page = fromstring(requests.get(edit_url).text)
+ # XXX data["mediawiki"] = edit_page.cssselect("#wpTextbox1")[0].value
+ # XXX data["mediawiki"] = (
+ # XXX data["mediawiki"].replace("{{", r"{!{").replace("}}", r"}!}")
+ # XXX )
+ # XXX markdown_input = ("mediawiki", data["mediawiki"])
+
+ # XXX if markdown_input:
+ # XXX markdown = str(
+ # XXX sh.pandoc(
+ # XXX "-f", markdown_input[0], "-t", "markdown", _in=markdown_input[1]
+ # XXX )
+ # XXX )
+ # XXX for n in range(1, 5):
+ # XXX indent = " " * n
+ # XXX markdown = markdown.replace(f"\n{indent}-",
+ # XXX f"\n{indent}\n{indent}-")
+ # XXX markdown = re.sub(r'\[(\w+)\]\(\w+ "wikilink"\)', r"[[\1]]", markdown)
+ # XXX markdown = markdown.replace("–", "--")
+ # XXX markdown = markdown.replace("—", "---")
+ # XXX data["content"] = markdown
+
+ # XXX data.pop("html")
+ # XXX # XXX data["category"] = list(set(data["entry"].get("category", [])))
+ # XXX web.header("Content-Type", "application/json")
+ # XXX return dump_json(data)
+
+
+@app.control("details/{site}(/{page})?")
+class SiteDetails:
+ """A web resource."""
+
+ def get(self, site, page=None):
+ web.header("Content-Type", "application/json")
+ return tx.db.select("cache", where="url = ?", vals=[site])[0]["details"]
+
+
+@app.control("a11y/{site}(/{page})?")
+class Accessibility:
+ """A web resource."""
+
+ def get(self, site, page=None):
+ try:
+ a11y = web.load(path=sites_path / site / "a11y.json")
+ except FileNotFoundError:
+ a11y = None
+ return app.view.a11y(site, a11y)
+
+
+@app.control("sites")
+class Sites:
+ """Index of sites as HTML."""
+
+ def get(self):
+ """Return a list of indexed sites."""
+ # TODO # accept a
+ # TODO tx.db.select(
+ # TODO tx.db.subquery(
+ # TODO "crawls", where="url not like '%/%'", order="crawled desc"
+ # TODO ),
+ # TODO group="url",
+ # TODO )
+ with tx.db.transaction as cur:
+ urls = cur.cur.execute(
+ " select * from ("
+ + "select * from cache where url not like '%/%' order by crawled desc"
+ + ") group by url"
+ )
+ return app.view.sites(urls)
+
+
+@app.control("sites/{site}/screenshot.png")
+class SiteScreenshot:
+ """A site's screenshot."""
+
+ def get(self, site):
+ """Return a PNG document rendering given site's screenshot."""
+ if os.getenv("WEBCTX") == "dev":
+ return sites_path / site / "screenshot.png"
+ web.header("Content-Type", "image/png")
+ web.header("X-Accel-Redirect", f"/X/sites/{site}/screenshot.png")
+
+
+@app.control("sites/{site}/scoreboard.svg")
+class SiteScoreboard:
+ """A site's scoreboard."""
+
+ def get(self, site):
+ """Return an SVG document rendering given site's scoreboard."""
+ if os.getenv("WEBCTX") == "dev":
+ return sites_path / site / "scoreboard.svg"
+ web.header("Content-Type", "image/svg+xml")
+ web.header("X-Accel-Redirect", f"/X/sites/{site}/scoreboard.svg")
+
+
+@app.control("{site}")
+class Site:
+ """A website."""
+
+ def get(self, site):
+ """Return a site analysis."""
+ # TODO if site in [s[0] for s in silos.values()]:
+ # TODO return app.view.silo(site, details)
+ return app.view.site(*get_site(site))
+
+
+@app.control("{site}/{page}")
+class Page:
+ """A webpage."""
+
+ def get(self, site, page):
+ return app.view.page(*get_page(f"{site}/{page}"))
+
+
+def get_resource(url):
+ url = webagt.uri(str(url))
+ min_url = url.minimized
+ redirect = tx.db.select(
+ "cache_redirects", what="outgoing", where="incoming = ?", vals=[min_url]
+ )
+ try:
+ raise web.SeeOther(redirect[0]["outgoing"])
+ except IndexError:
+ pass
+ try:
+ details = tx.db.select("cache", where="url = ?", vals=[min_url])[0]["details"]
+ except IndexError:
+ web.enqueue(refresh_site, min_url)
+ raise web.Accepted(app.view.crawl_enqueued(min_url))
+ return url, details
+
+
+def get_site(site):
+ url, details = get_resource(site)
+ try:
+ audits = web.load(path=sites_path / site / "audits.json")
+ except FileNotFoundError:
+ audits = None
+ try:
+ a11y = web.load(path=sites_path / site / "a11y.json")
+ except FileNotFoundError:
+ a11y = None
+ try:
+ manifest = web.load(path=sites_path / site / "manifest.json")
+ except FileNotFoundError:
+ manifest = None
+ return url, details, audits, a11y, manifest
+
+
+def get_page(page_url):
+ url, details = get_resource(page_url)
+ return url, details
index 0000000..bfcfcc0
--- /dev/null
+silos = {
+ "IndieWeb.rocks": ("indieweb.rocks", [r"([\w\.]+)"], True),
+ "GitHub": ("github.com", [r"(\w+)"], True),
+ "Keybase": ("keybase.io", [r"(\w+)"], True),
+ "sourcehut": ("sr.ht", [r"~(\w+)"], True),
+ "IndieWeb": ("indieweb.org", [r"User:([\w.]+)"]),
+ "PyPI": ("pypi.org", [r"user/([\w.]+)"]),
+ "Micro.blog": ("micro.blog", [r"(\w+)"]),
+ "Twitter": ("twitter.com", [r"(\w+)"]),
+ "Reddit": ("reddit.com", [r"u/(\w+)", r"user/(\w+)"]),
+ "Facebook": ("facebook.com", [r"(\w+)"]),
+ "Instagram": ("instagram.com", [r"(\w+)"]),
+ "LinkedIn": ("linkedin.com", [r"in/(\w+)"]),
+ "Foursquare": ("foursquare.com", [r"user/(\d+)", r"(\w+)"]),
+ "Last.fm": ("last.fm", [r"user/(\w+)"]),
+ "Flickr": ("flickr.com", [r"people/(\w+)", r"(\w+)"]),
+ "Amazon": ("amazon.com", [r"shop/(\w+)"]),
+ "Dribbble": ("dribbble.com", [r"(\w+)"]),
+ "Gravatar": ("gravatar.com", [r"(\w+)"]),
+ "Pinboard": ("pinboard.in", [r"u:(\w+)"]),
+ "Wordpress": ("profiles.wordpress.org", [r"(\w+)"]),
+ "Gumroad": ("gumroad.com", [r"(\w+)"]),
+ "Ko-fi": ("ko-fi.com", [r"(\w+)"]),
+ "Twitch": ("twitch.tv", [r"(\w+)"]),
+ "Soundcloud": ("soundcloud.com", [r"(\w+)"]),
+ "Asmodee": ("account.asmodee.net", [r"en/profile/(\d+)"]),
+ "Wikipedia (EN) User": ("en.wikipedia.org", [r"wiki/User:(\w+)"]),
+ "Wikipedia (EN) Notable Person": ("en.wikipedia.org", [r"wiki/([\w\(\)_]+)"]),
+ "Cash App": ("cash.me", [r"\$(\w+)"]),
+ "Kit": ("kit.co", [r"(\w+)"]),
+ "PayPal": ("paypal.me", [r"(\w+)"]),
+ "Speaker Deck": ("speakerdeck.com", [r"(\w+)"]),
+ "WeChat": ("u.wechat.com", [r"([\w\W]+)"]),
+ "Venmo": ("venmo.com", [r"(\w+)"]),
+ "Duolingo": ("duolingo.com", [r"profile/(\w+)"]),
+ "SlideShare": ("slideshare.net", [r"(\w+)"]),
+ "W3": ("w3.org", [r"users/(\w+)"]),
+ "YouTube": ("youtube.com", [r"(\w+)"]),
+ "Vimeo": ("vimeo.com", [r"(\w+)"]),
+ "500px": ("500px.com", [r"(\w+)"]),
+ "Findery": ("findery.com", [r"(\w+)"]),
+ "Untappd": ("untappd.com", [r"user/(\w+)"]),
+ "del.icio.us": ("del.icio.us", [r"(\w+)"]),
+ "Pocket": ("getpocket.com", [r"@(\w+)"]),
+ "Huffduffer": ("huffduffer.com", [r"(\w+)"]),
+ "Hypothesis": ("hypothes.is", [r"users/(\w+)"]),
+ "Lobsters": ("lobste.rs", [r"u/(\w+)"]),
+ "Medium": ("medium.com", [r"@(\w+)"]),
+ "Myspace": ("myspace.com", [r"(\d+)"]),
+ "Hacker News": ("news.ycombinator.com", [r"user\?id=(\w+)"]),
+ "Nextdoor": ("nextdoor.com", [r"profile/(\d+)"]),
+ "Spotify": ("open.spotify.com", [r"user/(\w+)"]),
+ "Pinterest": ("pinterest.com", [r"(\w+)"]),
+ "Pnut": ("pnut.io", [r"@(\w+)"]),
+ "Upcoming": ("upcoming.org", [r"@(\w+)"]),
+ "Diggo": ("diigo.com", [r"profile/(\w+)"]),
+ "Goodreads": ("goodreads.com", [r"user/show/(\d+)"]),
+ "Notist": ("noti.st", [r"(\w+)"]),
+ "Kickstarter": ("kickstarter.com", [r"profile/([\w-]+)"]),
+ "CodePen": ("codepen.io", [r"([\w-]+)"]),
+ "Listen Notes": ("listennotes.com", [r"@(\w+)"]),
+ "Meetup": ("meetup.com", [r"members/(\d+)"]),
+ "Patreon": ("patreon.com", [r"(\w+)"]),
+ "Periscope": ("periscope.tv", [r"(\w+)"]),
+ "Quora": ("quora.com", [r"([\W\w]+)"]),
+ "eBird": ("ebird.org", [r"profile/([\W\w]+)"]),
+ "Stack Overflow": ("stackoverflow.com", [r"users/(\d+/\w+)"]),
+ "npm": ("npmjs.com", [r"~(\w+)"]),
+ "Trakt": ("trakt.tv", [r"users/(\w+)"]),
+ "ORCID": ("orcid.org", [r"([\d-]+)"]),
+ "Wishlistr": ("wishlistr.com", [r"(\w+)"]),
+ "GitLab": ("gitlab.com", [r"(\w+)"]),
+ "AngelList": ("angel.co", [r"(\w+)"]),
+ "OpenStreetMap": ("openstreetmap.org", [r"user/(\w+)"]),
+ "Google+": ("plus.google.com", [r"\+(\w+)"]),
+}
index 0000000..951d7d9
--- /dev/null
+import math
+import re
+from collections import defaultdict
+from hashlib import sha256
+from pprint import pformat
+
+import pendulum
+import pendulum.parser
+import phonenumbers
+import webagt
+from web import now, tx
+from webagt import uri
+
+from ..silos import silos
+
+__all__ = [
+ "re",
+ "get_dt",
+ "tx",
+ "uri",
+ "silos",
+ "pformat",
+ "get_silo",
+ "get_human_size",
+ "now",
+ "sha256",
+ "format_phonenumber",
+ "defaultdict",
+ "pendulum",
+ "math",
+]
+
+
+def format_phonenumber(tel):
+ return phonenumbers.format_number(
+ phonenumbers.parse(tel, "US"), phonenumbers.PhoneNumberFormat.INTERNATIONAL
+ )
+
+
+def get_dt(dt):
+ try:
+ return pendulum.instance(dt)
+ except ValueError:
+ return pendulum.parser.parse(dt)
+
+
+def get_silo(url):
+ for silo, details in silos.items():
+ try:
+ domain, profile_patterns, _ = details
+ except ValueError:
+ domain, profile_patterns = details
+ for profile_pattern in profile_patterns:
+ if match := re.match(
+ f"{domain}/{profile_pattern}", url.removeprefix("www.")
+ ):
+ return silo, webagt.uri(url).host, profile_pattern, match.groups()[0]
+ return None
+
+
+suffixes = ["B", "KB", "MB", "GB", "TB", "PB"]
+
+
+def get_human_size(nbytes):
+ i = 0
+ while nbytes >= 1024 and i < len(suffixes) - 1:
+ nbytes /= 1024.0
+ i += 1
+ f = ("%.2f" % nbytes).rstrip("0").rstrip(".")
+ return "%s %s" % (f, suffixes[i])
index 0000000..287ea40
--- /dev/null
+$def with (address)
+$var title: Crawl enqueued
+
+<p>Crawl enqueued for: <code>$address</code></p>
index 0000000..833695b
--- /dev/null
+$def with ()
+$var title: Cache
+
+<form method=post style="margin:3em 0">
+<label style=font-size:.9em><strong>Address</strong>
+<small><em>i.e. website URL or fediverse handle</em></small><br>
+<input style="border:.2em inset #ccc;border-radius:.2em;font-size:1.1em;margin:.25em 0 .5em 0;padding:.2em;width:calc(100% - .8em)"
+name=address type=text autofocus placeholder="e.g. example.com or alice@example.com"></label>
+<div style=text-align:right><button
+style="font-size:.8em;padding:0 .2em;text-transform:uppercase">Validate</button></div>
+</form>
+
+<h2 id=standards>Supported Standards</h2>
+
+<h3>IndieWeb</h3>
+
+<ul style=columns:3>
+<li><a href=https://www.w3.org/TR/webmention>Webmention</a></li>
+<li><a href=https://www.w3.org/TR/websub>WebSub</a></li>
+<li><a href=https://www.w3.org/TR/indieauth>IndieAuth</a></li>
+<li><a href=https://indieweb.org/Ticketing_for_IndieAuth>Ticketing</a></li>
+<li><a href=https://www.w3.org/TR/micropub>Micropub</a></li>
+<li><a href=https://indieweb.org/Microsub>Microsub</a></li>
+<li><a href=https://microformats.org/wiki/microformats>microformats</a></li>
+<li><a href=https://microformats.org/wiki/metaformats>metaformats</a></li>
+<li><a href=https://www.w3.org/TR/post-type-discovery><abbr title="Post Type Discovery">PTD</abbr></a></li>
+<li><a href=https://gmpg.org/xfn/><abbr title="XHTML Friends Network">XFN</abbr></a></li>
+<li><a href=https://microformats.org/wiki/rel-me>rel=me</a></li>
+<li><a href=https://microformats.org/wiki/web-sign-in>web sign in</a></li>
+<li><a href=https://www.w3.org/TR/jf2>JF2</a></li>
+</ul>
+
+<h3>Complimentary</h3>
+
+<ul style=columns:3>
+<li><a href=https://cyber.harvard.edu/rss/rss.html>RSS</a></li>
+<li><a href=https://datatracker.ietf.org/doc/html/rfc4287>Atom Syndication Format</a></li>
+<li><a href=https://www.jsonfeed.org/version/1.1>JSON Feed</a></li>
+<li><a href=https://datatracker.ietf.org/doc/html/rfc6415>Web Host Metadata</a></li>
+<li><a href=http://nodeinfo.diaspora.software/protocol.html>NodeInfo</a></li>
+<li><a href=https://datatracker.ietf.org/doc/html/rfc7033>WebFinger</a></li>
+<li><a href=https://datatracker.ietf.org/doc/html/draft-ietf-httpbis-message-signatures>HTTP Signatures</a></li>
+<li><a href=https://www.w3.org/TR/activitypub>ActivityPub</a></li>
+<li><a href=https://www.w3.org/TR/activitystreams-vocabulary>Activity Vocabulary</a></li>
+<li><a href=https://datatracker.ietf.org/doc/html/rfc6797><abbr title="HTTP Strict Transport Security">HSTS</abbr></a></li>
+</ul>
index 0000000..e107f92
--- /dev/null
+$def with (url, details, audits, a11y, manifest)
+$ short_title = str(url).removeprefix("@").removeprefix("https://")
+$# var title = short_title
+
+$ axes = ["identity", "authentication", "posts", "syndication", "posting UI",
+$ "navigation", "search", "aggregation", "interactivity", "security",
+$ "responses"]
+$ statuses = ["pass", "maybe", "fail"]
+
+$def render_uninterpreted(title, object, type):
+ <div class=uninterpreted>
+ <a href=//microformats.org/wiki/$title.rstrip("=")><img src=/static/specs/microformats.png
+ style=float:right;height:2em alt="microformats logo"></a>
+ <p><em>Uninterpreted <code>$title</code> $type</em>:</p>
+ <dl>
+ $for key, values in sorted(object.items()):
+ <dt>$key</dt>
+ $if not isinstance(values, list):
+ $ values = [values]
+ $for value in values:
+ <dd>
+ $if type == "links":
+ $uri(value).minimized
+ $elif type == "properties":
+ $value
+ </dd>
+ </dl>
+ </div>
+
+$ card = details["card"]
+
+$if featured := card.pop("featured", [None])[0]:
+ <img src=$featured style=width:100%>
+
+$ all_urls = []
+$ rels = details["mf2json"]["rels"]
+
+$ meta_props = {}
+$ items = details["mf2json"]["items"]
+$if items and items[-1]["source"] == "metaformats":
+ $ meta_props = items[-1]["properties"]
+
+<header>
+$ name = card.pop("name", [None])[0]
+$if name:
+ $var title = name
+$elif meta_name := meta_props.get("name"):
+ $var title = meta_name[0]
+<p>
+<a href=$url class=urlbox rel=me>
+<span><img src=/sites/$(url.minimized)/icon.png style=height:1em;width:1em title="\
+$if page_title := details.get("title"):
+ $page_title\
+">
+$if details["domain"]["hsts"]:
+ <span class=pass><strong>https://</strong></span>
+$elif url.scheme == "https":
+ <span class=pass>https://</span>
+$else:
+ <span class=fail>http://</span>
+<small
+$if whois_created := details.get("whois_created", None):
+ title="$whois_created"
+ $ years_held = (pendulum.now() - pendulum.parse(whois_created)).years
+ $if years_held < 1:
+ $ whois_color = "red"
+ $elif years_held < 5:
+ $ whois_color = "orange"
+ $elif years_held < 10:
+ $ whois_color = "yellow"
+ $elif years_held < 15:
+ $ whois_color = "green"
+ $elif years_held < 20:
+ $ whois_color = "blue"
+ $elif years_held < 25:
+ $ whois_color = "purple"
+ style="color:$whois_color"
+>$details["domain"]["name"]</small></span></a>
+$if "metaverse" in details:
+ $ hash = details["metaverse"][:5]
+ <small><a href=/the-street#$hash><code>$hash</code></a></small>
+</p>
+
+<p>Accessed $details["accessed"]</p>
+
+<p>
+$ response = details["response"]
+<strong title="initial weight">$response["length"] KB</strong>
+$if audits:
+ / <strong title="total weight">$get_human_size(audits["audits"]["total-byte-weight"]["numericValue"])</strong>
+/ <strong title="response time">
+$if response["time"] < 1:
+ $round(response["time"] * 1000) ms
+$else:
+ $round(response["time"], 2) s
+</strong>
+</p>
+</header>
+
+<div style=font-size:.9em>
+
+$ urls = []
+$ web_sign_in = []
+$if card:
+ <div class=h-card>
+ $# XXX $var title: $:name
+ $ card.pop("family-name", None)
+ $ card.pop("given-name", None)
+ $ nicknames = card.pop("nickname", [])
+ $ orgs = card.pop("org", None)
+ $if photo := card.pop("photo", None):
+ <img src=/sites/$url.minimized/photo.png style=width:100% alt="$name's profile picture">
+ $# XXX <h1 style=margin-bottom:0>$name</h1>
+ $ ipa = card.pop("ipa", None)
+ $ sound = card.pop("sound", None)
+ <p>
+ $if ipa:
+ $ipa[0]
+ $if sound:
+ <button>🗣️</button>
+ </p>
+ $if nicknames:
+ <p style=margin-top:0><small>a.k.a. $", ".join(nicknames)</small></p>
+
+ $ pronouns = card.pop("pronouns", [])
+ $ card.pop("pronoun", None)
+ $if orgs and name == orgs[0]:
+ 🧑🤝🧑
+ $elif pronouns:
+ $if "they" in pronouns[0]:
+ 🧑
+ $elif "she" in pronouns[0]:
+ 👩
+ $elif "he" in pronouns[0]:
+ 👨
+ $else:
+ 🧑
+ <small>
+ $if pronouns:
+ $:pronouns[0].replace(" ", "").replace("/", " / ")
+ $elif pronouns := card.pop("pronoun", None):
+ $for pronoun in pronouns:
+ $pronoun\
+ $if not loop.last:
+  / \
+ </small>
+ $if bday := card.pop("bday", None):
+ $ year, month, day = re.match("(\d\d\d\d|-)-(\d\d?|-)-(\d\d?|-)", bday[0]).groups()
+ $if year != "-":
+ $ year = int(year)
+ $ month = int(month)
+ $ day = int(day)
+ $ months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun"]
+ $ months += ["Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
+ $ n = now()
+ <span title="$months[int(month)-1] $day, $year"
+ $if not (month == n.month and day == n.day):
+ style="opacity:25%"
+ >🎂</span>
+ $if "rel_me" in details:
+ $ details["rel_me"] = details["rels"].pop("me", []) # TODO REMOVE
+ $ urls = set(uri(u).minimized for u in card.pop("url", []) + details["rel_me"])
+ $ reciprocals = set(details.pop("reciprocals", []))
+ $ self_rel_me = f"indieweb.rocks/{url.minimized}"
+ $if self_rel_me in reciprocals:
+ $ urls.discard(self_rel_me)
+ $ reciprocals.discard(self_rel_me)
+ 🍄
+ $if orgs:
+ <br><small>🧑🤝🧑
+ $for org in orgs:
+ $if isinstance(org, dict):
+ $ org_props = org.pop("properties")
+ $if "url" in org_props:
+ <a href=$org_props["url"][0]>\
+ $org_props["name"][0]\
+ $if "url" in org_props:
+ </a>\
+ $else:
+ $org\
+ $if not loop.last:
+ ,
+ </small>
+ $if roles := card.pop("role", None):
+ <br><small>
+ $for role in roles:
+ <code>$role</code>\
+ $if not loop.last:
+ ,
+ </small>
+ $if note := card.pop("note", None):
+ <p style=font-size:.75em;hyphens:auto>$note[0]</p>
+ $if categories := card.pop("category", None):
+ <p>🏷️ <small>
+ $for category in categories:
+ <code>\
+ $if isinstance(category, dict):
+ $ cat_props = category.pop("properties")
+ $if "url" in cat_props:
+ <a href=$cat_props["url"][0]>\
+ $cat_props["name"][0]\
+ $if "url" in cat_props:
+ </a>\
+ $else:
+ $category\
+ </code>\
+ $if not loop.last:
+ ,
+ </small></p>
+ $ street_address = card.pop("street-address", None)
+ $ locality = card.pop("locality", None)
+ $ region = card.pop("region", None)
+ $ postal_code = card.pop("postal-code", None)
+ $ country_name = card.pop("country-name", None)
+ $if street_address:
+ <p>📍
+ $if street_address:
+ $street_address[0]
+ $ area_line = []
+ $if locality:
+ $ area_line.append(locality[0])
+ $if region:
+ $ area_line.append(region[0])
+ $", ".join(area_line)
+ $if postal_code:
+ $postal_code[0]
+ $if country_name:
+ $country_name[0]
+ </p>
+ $ emails = [e.partition(":")[2] for e in card.pop("email", [])]
+ $ tels = []
+ $for tel in card.pop("tel", []):
+ $if ":" in tel:
+ $tels.append(tel.partition(":")[2])
+ $else:
+ $tels.append(tel)
+ $ keys = set(card.pop("key", []) + rels.pop("pgpkey", []))
+ $ all_urls = list(urls)
+ $for _url in sorted(urls):
+ $if _url.startswith("sms:") or _url.startswith("callto:"):
+ $ tel = _url.partition(":")[2]
+ $if tel not in tels:
+ $ tels.append(tel)
+ $urls.remove(_url)
+ $elif _url.startswith("mailto:"):
+ $ email = _url.partition(":")[2]
+ $if email not in emails:
+ $ emails.append(email)
+ $urls.remove(_url)
+ $if emails:
+ <ul class=elsewhere>
+ $for email in emails:
+ <li>📧 <small><a href=mailto:$email>$email</a></small>
+ $if "gravatars" in details:
+ $if gravatar := details["gravatars"].pop(email, None):
+ <a href=//www.gravatar.com/$gravatar><img style=height:1em
+ src=//www.gravatar.com/avatar/$(gravatar).jpg></a>
+ </li>
+ $ web_sign_in.append(email)
+ </ul>
+ $if tels:
+ <ul class=elsewhere>
+ $for tel in tels:
+ <li>📱 <small>$format_phonenumber(tel)</small><br>
+ <small><a href=callto:$tel>call</a> <a href=sms:$tel>message</a></small>
+ </li>
+ $ web_sign_in.append(tel)
+ </ul>
+ $if keys:
+ <p>🔐
+ $for key in keys:
+ $key
+ $if not loop.last:
+ ,
+ $ web_sign_in.append(uri(key).minimized)
+ </p>
+
+ $def render_rel_me(silo_name, domain, profile_pattern, user):
+ $ path = re.sub(r"(\(.+\))", user, profile_pattern).replace("\\", "")
+ <a href=/$domain title=$silo_name><img src=/sites/$domain/icon.png
+ style=height:1em></a> <a href=https://$domain/$path>$user</a>
+
+ $ supported_web_signin_silos = ["github.com", "twitter.com"]
+ $if urls:
+ $for _url in sorted(urls):
+ $if _url.startswith(url.minimized):
+ $ urls.remove(_url)
+ $continue
+ <ul class=elsewhere>
+ $for _url in sorted(urls):
+ $if _url in reciprocals:
+ $ urls.remove(_url)
+ <li>
+ $if silo := get_silo(_url):
+ $:render_rel_me(*silo)
+ $else:
+ $_url
+ ☑️
+ </li>
+ $if _url.partition("/")[0] in supported_web_signin_silos:
+ $ web_sign_in.append(_url)
+ $for _url in sorted(urls):
+ $if silo := get_silo(_url):
+ $ urls.remove(_url)
+ <li>$:render_rel_me(*silo)</li>
+ $for _url in sorted(urls):
+ <li>\
+ $if _url.endswith(".onion"):
+ 🧅 <a href=http://$_url>$(_url[:12])…</a></li>
+ $else:
+ <a href=//$_url>$_url</a></li>
+ </ul>
+ $ card.pop("uid", None) # FIXME: what is it good for?
+ $if card:
+ $:render_uninterpreted("h-card", card, "properties")
+ </div>
+ $if payments := rels.pop("payment", None):
+ <h3>Payment</h3>
+ <ul>
+ $for payment in payments:
+ $ payment_url = uri(payment)
+ <li><img src=/sites/$payment_url.host/icon.png><a href=$payment>$payment_url</a></li>
+ </ul>
+$else:
+ $if meta_props:
+ $if meta_photo := meta_props.get("photo"):
+ <img style=width:100% src=$meta_photo[0]><br>
+ $if meta_summary := meta_props.get("summary"):
+ <p>$meta_summary[0]</p>
+ $if meta_content := meta_props.get("content"):
+ <div>$meta_content[0]</div>
+ <p>No <a href=https://indieweb.org/representative_h-card>representative
+ card</a> found.
+ $if meta_props:
+ Falling back to <a href=//microformats.org/wiki/metaformats>metaformats</a>.
+ </p>
+
+$ license = rels.pop("license", None)
+$if license:
+ <p><a href=$license[0]>
+ $if cc := re.match(r"https://creativecommons.org/licenses/([a-z-]+)/(\d.\d)", license[0]):
+ $ license, version = cc.groups()
+ <span title="CC $license.upper() $version">
+ <img class=cclicense src=/static/cc/cc.svg alt="Creative Commons logo">\
+ $for part in license.split("-"):
+ <img class=cclicense src=/static/cc/$(part).svg \
+ alt="Creative Commons $(part) license logo">\
+ </span>
+ $else:
+ Licensed $license[0].
+ </a></p>
+
+$if "search_url" in details:
+ $ search_url, search_query_name = details["search_url"]
+ <form action=$search_url method=get>
+ <input type=text name=$search_query_name>
+ <button>Search</button>
+ </form>
+ $ rels.pop("search")
+
+$if manifest:
+ $ bgcolor = manifest.get("background_color", "none")
+ <div style="background-color:$bgcolor;height:3em;width:100%"></div>
+
+$ auth_ep = rels.pop("authorization_endpoint", None)
+$ token_ep = rels.pop("token_endpoint", None)
+$ ticket_ep = None
+$ indieauth_metadata = details.pop("indieauth_metadata", None)
+$ openid_delegate = rels.pop("openid.delegate", None)
+$ openid_server = rels.pop("openid.server", None)
+$if indieauth_metadata:
+ $ auth_ep = indieauth_metadata.get("authorization_endpoint", None)
+ $ token_ep = indieauth_metadata.get("token_endpoint", None)
+ $ ticket_ep = indieauth_metadata.get("ticket_endpoint", None)
+
+$if auth_ep:
+ <p class=pass>Supports
+$else:
+ <p class=fail>Does not support
+<a href=/indieauth><img src=/static/specs/indieauth.svg style=height:1.5em;opacity:25%> IndieAuth</a>\
+$if auth_ep:
+ $if token_ep:
+ with a <a href=//indieauth.spec.indieweb.org/#token-endpoint>token endpoint</a>\
+ $if ticket_ep:
+ and a <a href=//indieweb.org/IndieAuth_Ticket_Auth#Create_the_IndieAuth_ticket>ticket endpoint</a>\
+.
+</p>
+
+$# $if auth_ep and not indieauth_metadata:
+$# <p class=NOTE><code>rel=authorization_endpoint</code> is deprecated, leave
+$# it for now but start using <code>rel=indieauth-metadata</code> instead
+$# <sup><a href=https://indieauth.spec.indieweb.org/\
+$# #changes-from-26-november-2020-to-this-version-li-1>read more</a></sup></p>
+
+$ authn = [uri(authn).minimized for authn in rels.pop("authn", [])]
+$if web_sign_in:
+ <p class=pass>Supports <a href=https://microformats.org/wiki/web_sign-in>web sign-in</a>.</p>
+ <ul>
+ $for web_sign_in_endpoint in web_sign_in:
+ $if authn and web_sign_in_endpoint not in authn:
+ $continue
+ <li>$web_sign_in_endpoint</li>
+ </ul>
+
+$# $if openid_delegate and openid_server:
+$# <p class=NOTE>OpenID <strong>was a protocol</strong> for using a web address
+$# as an identity to sign-in to websites; it is losing support, <strong>is
+$# effectively dead</strong> (versions 1 & 2 are both deprecated, sites are
+$# dropping support), and <strong>has been replaced on the IndieWeb with
+$# web-sign-in and IndieAuth</strong>. <sup><a
+$# href=https://indieweb.org/OpenID>read more</a></sup></p>
+
+$ webmention_ep = rels.pop("webmention", None)
+$if webmention_ep:
+ <p class=pass>Supports
+$else:
+ <p class=fail>Does not support
+<a href=//www.w3.org/TR/webmention/><img src=/static/specs/webmention.svg style=height:1.5em;opacity:25%> Webmention</a> on the homepage.
+</p>
+
+$ micropub_ep = rels.pop("micropub", None)
+$ media_ep = rels.pop("media-endpoint", None)
+$if micropub_ep:
+ <p class=pass>Supports
+$else:
+ <p class=fail>Does not support
+<a href=//micropub.spec.indieweb.org><img src=/static/specs/micropub.svg style=height:1.5em;opacity:25%> Micropub</a>\
+$if micropub_ep and media_ep:
+ with a <a href=//micropub.spec.indieweb.org/#media-endpoint>media endpoint</a>\
+.
+</p>
+
+$ microsub_ep = rels.pop("microsub", None)
+$if microsub_ep:
+ <p class=pass>Supports
+$else:
+ <p class=fail>Does not support
+<a href=//indieweb.org/Microsub><img src=/static/specs/microsub.svg style=height:1.5em;opacity:25%> Microsub</a>.
+</p>
+</div>
+
+$ dependencies = []
+$#details.pop("stylesheets")
+$# $for stylesheet in details.pop("stylesheets"):
+$# $if not stylesheet.startswith(url.normalized):
+$# $ dependencies.append(stylesheet)
+$# $for script in details.pop("scripts"):
+$# $if "src" in script:
+$# $if not script["src"].startswith(url.normalized):
+$# $ dependencies.append(script["src"])
+
+$# <h2>Media</h2>
+$#
+$# <h3>Stylesheets</h3>
+$# $if details["stylesheets"]:
+$# <ol>
+$# $for stylesheet in details["stylesheets"]:
+$# <li>$uri(stylesheet).normalized</li>
+$# </ol>
+$# $else:
+$# <p><em>No external stylesheets.</em></p>
+$# $# TODO inline stylesheets
+$#
+$# <h3>Scripts</h3>
+$# $ scripts = details.pop("scripts")
+$# $if scripts:
+$# <!--p class=NOTE>Some users have scripting turned off. See
+$# <a href=https://indieweb.org/js;dr>js;dr</a>.</p-->
+$# <ul>
+$# $for script in scripts:
+$# <li>
+$# $if "src" in script:
+$# $if not script["src"].startswith(url.normalized):
+$# $ dependencies.append(script["src"])
+$# $uri(script["src"]).normalized
+$# $elif "text" in script:
+$# $# TODO $if script.get("type", None) == "application/ld+json":
+$# <details><summary>inline, $len(script["text"]) characters</summary><pre>$script["text"]</pre></details>
+$# $else:
+$# Unknown: $script
+$# </li>
+$# </ul>
+$# $else:
+$# <p><em>No scripting.</em></p>
+$#
+$# <h3>Images/Audio/Video</h3>
+$# <p>...</p>
+
+<h2>Privacy</h2>
+$ dns_prefetches = rels.pop("dns-prefetch", None)
+$ preconnects = rels.pop("preconnect", None)
+$if dns_prefetches or preconnects:
+ $if dns_prefetches:
+ $ dependencies.extend(dns_prefetches)
+ <h5>DNS Prefetch</h5>
+ <ol>
+ $for dns_prefetch in dns_prefetches:
+ <li>$dns_prefetch</li>
+ </ol>
+ $if preconnects:
+ $ dependencies.extend(preconnects)
+ <h5>Preconnect</h5>
+ <ol>
+ $for preconnect in preconnects:
+ <li>$preconnect</li>
+ </ol>
+
+$if dependencies:
+ <p class=fail>This site has external dependencies.</p>
+ <ul>
+ $for dependency in dependencies:
+ <li>$dependency</li>
+ </ul>
+$else:
+ <p class=pass>This site is truly independent.</p>
+
+<h2>Accessibility</h2>
+$if a11y:
+ <p class=fail>$len(a11y) accessibility concerns.</p>
+$else:
+ <p class=pass>There are no accessibility concerns.</p>
+
+<img src=/sites/$(url.minimized)/screenshot.png style=width:100%>
+
+<div>
+$if feed := details.pop("feed", None):
+ $if feed["items"]:
+ <div class=h-feed>
+ $for entry in feed["items"]:
+ $# <pre>$pformat(entry)</pre>
+
+ $# $if details["whostyle"]:
+ $# <iframe
+ $# onload="this.style.height=(this.contentWindow.document.body.scrollHeight+25)+'px'"
+ $# style=border:0;width:100% srcdoc='<link rel=stylesheet href=$uri(details["whostyle"][0]).normalized>
+ $# <div class=whostyle-$uri(url).minimized.replace(".", "-")>
+
+ <div class=entry>
+ $ entry_url = entry.pop("url", [None])[0]
+ $ entry_type = entry.pop("type")[0].partition("-")[2]
+ $ post_type = entry.pop("post-type", None)
+ $if entry_type == "entry":
+ $if in_reply_to := entry.pop("in-reply-to", None):
+ $ reply_url = in_reply_to[0]
+ $if isinstance(reply_url, dict):
+ $ reply_url = reply_url["properties"]["url"][0]
+ <p>↩️
+ $ gh_issue_re = r"https://github.com/(\w+)/([\w-]+)/issues/(\d+)(#([\w-]+))?"
+ $if gh_match := re.match(gh_issue_re, reply_url):
+ $ user, repo, issue, _, comment = gh_match.groups()
+ <img src=/sites/github.com/icon.png style=height:1em alt="GitHub logo">
+ <a href=https://github.com/$user>$user</a> /
+ <a href=https://github.com/$user/$repo>$repo</a> /
+ <a href=https://github.com/$user/$repo/issues>issues</a> /
+ <a href=https://github.com/$user/$repo/issues/$issue>$issue</a> #
+ <a href=https://github.com/$user/$repo/issues/$issue#$comment>$comment</a>
+ $elif tw_match := re.match(r"https://twitter.com/(\w+)/status/(\d+)", reply_url):
+ $ user, tweet = tw_match.groups()
+ <img src=/sites/twitter.com/icon.png style=height:1em class="Twitter logo">
+ <a href=https://twitter.com/$user>$user</a> /
+ <a href=https://twitter.com/$user/status/$tweet>$tweet</a>
+ $else:
+ <a href=$reply_url>$reply_url</a>
+ </p>
+ $if photo := entry.pop("photo", [None])[0]:
+ <p><img src=$photo style=max-width:100% alt=$photo /></p>
+ $if entry_name := entry.pop("name", None):
+ <h3>$entry_name[0]</h3>
+ $if summary := entry.pop("summary", None):
+ $if entry_name != summary:
+ <p>$summary</p>
+ $if like_of := entry.pop("like-of", [None])[0]:
+ <p>♥️ <a href=$like_of>$like_of</a></p>
+ $if content := entry.pop("content", [None])[0]:
+ $# XXX $if post_type == "article":
+ $# XXX <p>$content["value"][:280]…</p>
+ $# XXX $else:
+ <p>$" ".join(content["value"].split()[:50])… \
+ <small><a href=$entry_url>read more</a></small></p>
+ $if categories := entry.pop("category", None):
+ <p><small>
+ $for category in categories:
+ <code>$category</code>\
+ $if not loop.last:
+ ,
+ </small></p>
+ $elif entry_type == "event":
+ <p>$entry.pop("name")<br>
+ <small>$entry.pop("start") – $entry.pop("end", None)</small></p>
+ $ entry.pop("start-str", None)
+ $ entry.pop("end-str", None)
+ <form method=post action=/micropub>
+ <input type=hidden name=in-reply-to value="$entry_url">
+ <select name=rsvp>
+ <option value=yes>Yes</option>
+ <option value=no>No</option>
+ <option value=maybe>Maybe</option>
+ </select>
+ <button>RSVP</button>
+ </form>
+ <p style=text-align:right>\
+ $if author := entry.pop("author", [None])[0]:
+ $if isinstance(author, str):
+ $author
+ $elif author_url := author.pop("url", None):
+ $if uri(author_url).minimized not in all_urls:
+ $author_url
+ <small>
+ $if location := entry.pop("location", None):
+ $if "latitude" in location:
+ <a href=/map?lat=$location['latitude']&lng=$location['longitude']>\
+ $location["latitude"], $location["longitude"]</a>
+ $if published := entry.pop("published", [None])[0]:
+ <time value="$published.isoformat()" datetime="$published"
+ class="dt-published">$published.isoformat()</time>
+ $# $get_dt(published).diff_for_humans()
+ $# $if updated := entry.pop("updated", None):
+ $# $if updated != published:
+ $# , <small>updated $get_dt(updated).diff_for_humans()</small>
+ $ entry.pop("published-str", None)
+ $ entry.pop("updated", None)
+ $ entry.pop("updated-str", None)
+
+ $if entry_url:
+ <br><a href=$entry_url>/$uri(entry_url).path.rstrip("/")</a>
+
+ $if syndication_urls := entry.pop("syndication", None):
+ $for syndication_url in syndication_urls:
+ $if tw_match := re.match(r"https://twitter.com/(\w+)/status/(\d+)", syndication_url):
+ $ user, tweet = tw_match.groups()
+ <a href=https://twitter.com/$user/status/$tweet><img
+ src=/sites/twitter.com/icon.png style=height:1em class="Twitter logo"></a>
+ $# <a href=https://twitter.com/$user>$user</a> /
+ $# <a href=https://twitter.com/$user/status/$tweet>$tweet</a>
+ $else:
+ $syndication_url\
+ $if not loop.last:
+ ,
+ </small></p>
+ $ entry.pop("uid", None) # FIXME: what is it good for?
+ $if entry:
+ $:render_uninterpreted(f"h-{entry_type}", entry, "properties")
+ </div>
+ $if details["whostyle"]:
+ </div>'>
+ </iframe>
+ $if rel_next := rels.pop("next", None):
+ <p>next: <a href=$rel_next[0]>$rel_next[0]</a></p>
+ $if rel_prev := rels.pop("prev", None):
+ <p>previous: <a href=$rel_prev[0]>$rel_prev[0]</a></p>
+ </div>
+$else:
+ <p><em>No <a href=https://indieweb.org/feed#How_to_Publish>content
+ feed</a> available.</em></p>
+</div>
+
+$if scores := details.get("scores"):
+ <div style="display:grid;grid-template-columns:50% 50%;">
+
+ $def list_reasons(level):
+ <ul id=level$level>
+ $for n, (status, reason) in enumerate(details["scores"][level-1]):
+ $if status != 3:
+ <li id=$(level)-$axes[n] class=$statuses[status]>$:(reason.capitalize()).</li>
+ </ul>
+
+ <object id=scoreboard data=/sites/$(url.minimized)/scoreboard.svg></object>
+ <div id=indiemark>
+ <div style="background-color:#222;color:#999;font-size:.8em;padding:.5em 1em;">
+ <h4>Level 1: Use your domain for identity, sign-in, and publishing posts</h4>
+ $:list_reasons(1)
+ <h4>Level 2: Improve your personal identity and post multiple types of posts</h4>
+ $:list_reasons(2)
+ <h4>Level 3: Post and send replies from your own site</h4>
+ $:list_reasons(3)
+ <h4>Level 4: Receive and show comments</h4>
+ $:list_reasons(4)
+ <h4>Level 5: Manage comments</h4>
+ $:list_reasons(5)
+ </div>
+ </div>
+
+ </div>
+</div>
+
+$if rels:
+ $:render_uninterpreted("rel=", rels, "links")
+
+<footer style=font-size:.8em>
+<p><a href=/details/$(url.minimized)>Details (JSON)</a></p>
+
+<form method=post>
+$# $if headers := details.get("headers", None):
+$# <p>$details["headers"]</p>
+<button>Recrawl</button>
+</form>
+$if not tx.user.session:
+ <form method=get action=/guests/sign-in>
+ <input type=hidden name=me value=$url.normalized>
+ <p>If you are the owner of this site you may sign in to gain access to more tools.</p>
+ <button>Sign in as $details["domain"]["name"]</button>
+ </form>
+$# $if tx.user.session and (tx.user.session["uid"][0] == details["url"]):
+$# <h3>Site Owner Controls</h3>
+$# <button>Test</button>
+</footer>
+
+<style>
+h2 {
+ border-bottom: .1em solid #333;
+ font-size: .9em; }
+</style>
index 0000000..5657e95
--- /dev/null
+$def with (urls)
+$var title: Indexed Sites
+
+<ul>
+$for url in urls:
+ <li><a href=/$url["url"]>$url["url"]</a>
+ $# <code>\
+ $# $ size = url["details"]["size"]
+ $# $if size < 10:
+ $# $round(size, 1)\
+ $# $else:
+ $# $round(size)\
+ $# <small style=text-transform:uppercase>kb</small></code>
+ $# last crawled $url["crawled"].diff_for_humans()
+ </li>
+</ul>