Recover from repo crash
Committed 2cccbd
index 0000000..703eccb
--- /dev/null
+name: Run Tests and Analysis
+
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+ branches:
+ - main
+
+jobs:
+ build-linux:
+ strategy:
+ matrix:
+ python-version: ["3.10"]
+ runs-on: "ubuntu-latest"
+ steps:
+ - name: Install graphviz
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y graphviz
+
+ - uses: actions/checkout@v3
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Remove development dependencies
+ run: sed -i '/\[tool.poetry.group.dev.dependencies\]/,/\[/d' pyproject.toml
+
+ - name: Install Poetry
+ uses: snok/install-poetry@v1
+ with:
+ version: 1.2.2
+ virtualenvs-in-project: true
+
+ - name: Install dependencies
+ run: poetry install --no-interaction --no-root
+
+ - name: Install library
+ run: poetry install --no-interaction
+
+ - name: Install development tools
+ run: poetry add gmpg
+
+ - uses: psf/black@stable
+ with:
+ options: "--check --verbose"
+ src: "."
+ version: "23.7"
+
+ - uses: isort/isort-action@v1
+ with:
+ configuration: "--profile black"
+
+ - run: echo "$(poetry env info --path)/bin" >> $GITHUB_PATH
+ - uses: jakebailey/pyright-action@v1
+
+ - name: Run tests
+ run: poetry run gmpg test
+
+ - name: Run analysis
+ run: poetry run gmpg analyze
+
+ - name: Generate dependency graph
+ run: poetry run gmpg graph
+
+ - uses: actions/upload-artifact@v3
+ with:
+ name: analysis
+ path: |
+ test_coverage.xml
+ test_results.xml
+ api_python.json
+ deps.svg
index 0000000..c04bc49
--- /dev/null
index 0000000..65c2801
--- /dev/null
+[tool.poetry]
+name = "webint-search"
+version = "0.0.17"
+description = "search the web from your website"
+authors = ["Angelo Gladding <angelo@ragt.ag>"]
+license = "AGPL-3.0-or-later"
+packages = [{include="webint_search"}]
+
+[tool.poetry.plugins."webapps"]
+search = "webint_search:app"
+
+[tool.poetry.dependencies]
+python = ">=3.10,<3.11"
+webint = ">=0.0"
+typesense = "^0.15.0"
+youtube-search = "^2.1.2"
+wn = "^0.9.4"
+eng-to-ipa = "^0.0.2"
+pronouncing = "^0.2.0"
+nltk = "^3.8.1"
+restrictedpython = "^6.2"
+webint-owner = "^0.0"
+
+[tool.poetry.group.dev.dependencies]
+bgq = {path="../bgq", develop=true}
+gmpg = {path="../gmpg", develop=true}
+newmath = {path="../newmath", develop=true}
+sqlyte = {path="../sqlyte", develop=true}
+webint = {path="../webint", develop=true}
+webagt = {path="../webagt", develop=true}
+microformats = {path="../python-microformats", develop=true}
+
+# [[tool.poetry.source]]
+# name = "main"
+# url = "https://ragt.ag/code/pypi"
+
+[tool.pyright]
+reportGeneralTypeIssues = false
+reportOptionalMemberAccess = false
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
index 0000000..450f87c
--- /dev/null
+def test_app():
+ assert True
index 0000000..33070b2
--- /dev/null
+"""Search the web from your website."""
+
+import collections
+import random
+import re
+import sqlite3
+import string
+
+import black
+import easyuri
+import eng_to_ipa
+import nltk
+import pronouncing
+import requests
+import typesense
+import web
+import webagt
+import webint_owner
+import wn
+import youtube_search
+from RestrictedPython import (
+ compile_restricted,
+ limited_builtins,
+ safe_builtins,
+ utility_builtins,
+)
+from RestrictedPython.Eval import (
+ default_guarded_getattr,
+ default_guarded_getitem,
+ default_guarded_getiter,
+)
+from RestrictedPython.PrintCollector import PrintCollector
+
+app = web.application(__name__, prefix="search")
+client = typesense.Client(
+ {
+ "nodes": [
+ {
+ "host": "localhost",
+ "port": "8108",
+ "protocol": "http",
+ }
+ ],
+ "api_key": "hpAnnsIdJse2NejW8RFKKRZ8z2lfhRjWCNtWWvwNFWXTyB1Y",
+ "connection_timeout_seconds": 2,
+ }
+)
+books_schema = {
+ "name": "books",
+ "fields": [
+ {"name": "title", "type": "string"},
+ {"name": "authors", "type": "string[]", "facet": True},
+ {"name": "publication_year", "type": "int32", "facet": True},
+ {"name": "ratings_count", "type": "int32"},
+ {"name": "average_rating", "type": "float"},
+ ],
+ "default_sorting_field": "ratings_count",
+}
+# client.collections.create(books_schema)
+# with open("/tmp/books.jsonl") as jsonl_file:
+# client.collections["books"].documents.import_(jsonl_file.read().encode("utf-8"))
+
+
+@app.wrap
+def linkify_head(handler, main_app):
+ """Ensure OpenSearch document is referenced from homepage."""
+ yield
+ if web.tx.request.uri.path == "":
+ web.add_rel_links(
+ search=(
+ "/search/opensearch.xml",
+ {
+ "type": "application/opensearchdescription+xml",
+ "title": "Angelo Gladding",
+ },
+ )
+ )
+
+
+def search_youtube(query):
+ return youtube_search.YoutubeSearch(query, max_results=10).to_dict()
+
+
+IW_HANDLE_RE = r"^@(?P<domain>[\w.]+)$"
+AP_HANDLE_RE = r"^@(?P<user>[\w.]+)@(?P<domain>[\w.]+)$"
+
+
+def iw_lookup(handle):
+ match = re.match(IW_HANDLE_RE, handle)
+ if match is None:
+ return
+ (domain,) = match.groups()
+ return webagt.get(domain).card
+
+
+def ap_lookup(handle):
+ match = re.match(AP_HANDLE_RE, handle)
+ if match is None:
+ return
+ user, domain = match.groups()
+ for link in requests.get(
+ f"https://{domain}/.well-known/webfinger?resource=acct:{user}@{domain}",
+ headers={"Accept": "application/activity+json"},
+ ).json()["links"]:
+ if link["rel"] == "self":
+ identity_url = link["href"]
+ break
+ else:
+ return
+ return webint_owner.ap_request(identity_url)
+
+
+@app.control("")
+class Search:
+ """Search everything."""
+
+ def get(self):
+ """Return an index of data sources."""
+ try:
+ form = web.form("q")
+ except web.BadRequest:
+ return app.view.index()
+ query = form.q
+
+ iw_profile = iw_lookup(query)
+ ap_profile = ap_lookup(query)
+
+ builtins = dict(safe_builtins)
+ builtins.update(**limited_builtins)
+ builtins.update(**utility_builtins)
+ env = {
+ "__builtins__": builtins,
+ "_getiter_": default_guarded_getiter,
+ "_getattr_": default_guarded_getattr,
+ "_getitem_": default_guarded_getitem,
+ }
+ secret = "".join(random.choices(string.ascii_lowercase, k=20))
+ try:
+ formatted_query = black.format_str(query, mode=black.mode.Mode()).rstrip()
+ except black.parsing.InvalidInput:
+ formatted_query = None
+ try:
+ exec(compile_restricted(f"{secret} = {query}", "<string>", "exec"), env)
+ except Exception as err:
+ result = None
+ else:
+ result = env[secret]
+
+ if re.match(r"^[0-9A-Za-z_-]{10}[048AEIMQUYcgkosw]$", query):
+ raise web.SeeOther(f"/player/{query}")
+ if query.startswith("!"):
+ bang, _, query = query[1:].partition(" ")
+ match bang:
+ case "yt":
+ return app.view.youtube_results(query, search_youtube(query))
+ case "imdb":
+ web.tx.response.headers["Referrer-Policy"] = "no-referrer"
+ url = easyuri.parse("https://www.imdb.com/find/")
+ url["q"] = query
+ raise web.SeeOther(url)
+ case "ud":
+ web.tx.response.headers["Referrer-Policy"] = "no-referrer"
+ url = easyuri.parse("https://www.urbandictionary.com/define.php")
+ url["term"] = query
+ raise web.SeeOther(url)
+
+ nltk.download("wordnet")
+ word = query
+ snow = nltk.stem.SnowballStemmer("english")
+ stem = snow.stem(query)
+ ipa_pronunciation = None
+ cmu_pronunciation = None
+ definition = None
+ rhymes = []
+ try:
+ en = wn.Wordnet("oewn:2022")
+ except (sqlite3.OperationalError, wn.Error):
+ pass # TODO download Open English WordNet `python -m wn download oewn:2022`
+ else:
+ try:
+ definition = en.synsets(query)[0].definition()
+ except IndexError:
+ try:
+ definition = en.synsets(stem)[0].definition()
+ except IndexError:
+ pass
+ if definition:
+ ipa_pronunciation = eng_to_ipa.convert(query)
+ try:
+ cmu_pronunciation = pronouncing.phones_for_word(query)[0]
+ except IndexError:
+ pass
+ rhymes = pronouncing.rhymes(query)
+
+ web_results = [
+ (
+ webagt.uri(webagt.uri(result.element.attrib["href"])["uddg"][0]),
+ result.element.text if result.element.text is not None else "",
+ )
+ for result in webagt.get(
+ f"https://html.duckduckgo.com/html?q={query}"
+ ).dom.select(".result__a")
+ ]
+
+ code_projects = collections.Counter()
+ code_files = collections.defaultdict(list)
+ for code_project, code_file in web.application("webint_code").model.search(
+ query
+ ):
+ code_projects[code_project] += 1
+ code_files[code_project].append(code_file)
+
+ # books = client.collections["books"].documents.search(
+ # {
+ # "q": query,
+ # "query_by": "authors,title",
+ # "sort_by": "ratings_count:desc",
+ # }
+ # )
+ books = {}
+
+ return app.view.results(
+ query,
+ # scope,
+ iw_profile,
+ ap_profile,
+ formatted_query,
+ result,
+ ipa_pronunciation,
+ cmu_pronunciation,
+ definition,
+ rhymes,
+ web_results,
+ code_projects,
+ code_files,
+ books,
+ )
+
+
+@app.control("opensearch.xml")
+class OpenSearch:
+ """"""
+
+ def get(self):
+ web.header("Content-Type", "application/xml; charset=utf-8")
+ return bytes(str(app.view.opensearch()), "utf-8")
+
+
+@app.control("collections")
+class Collections:
+ """"""
+
+ def get(self):
+ return app.view.collections(client.collections.retrieve())
index 0000000..18a5d55
--- /dev/null
+from pprint import pformat
+
+import pendulum
+import webagt
+from web import tx
+
+__all__ = ["pformat", "tx", "webagt", "pendulum"]
index 0000000..831c7cd
--- /dev/null
+$def with (collections)
+$var title: Collections
+
+$for collection in collections:
+ <h2>$collection.pop("name")</h2>
+ <p>created $pendulum.from_timestamp(collection.pop("created_at")).diff_for_humans(),
+ contains $collection.pop("num_documents") documents</p>
+ <ul>
+ $ default_sort = collection.pop("default_sorting_field")
+ $for field in collection.pop("fields"):
+ $ name = field.pop("name")
+ <li><details>
+ <summary>$name
+ $if name == default_sort:
+ <abbr title="default sort"><strong>*</strong></abbr>
+ <small><code>$field.pop("type")</code></small></summary>
+ <pre>$pformat(field)</pre>
+ </details></li>
+ </ul>
+ <pre>$pformat(collection)</pre>
index 0000000..fb1e475
--- /dev/null
+$def with ()
+$var title: Search
+
+<p>...</p>
index 0000000..6f2c64c
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
+ <ShortName>ragt.ag</ShortName>
+ <LongName>Angelo Gladding (ragt.ag) website search</LongName>
+ <Description>Search Angelo Gladding's website</Description>
+ <Language>en-US</Language>
+ <Contact>angelo@ragt.ag</Contact>
+ <Url type="text/html" method="get" template="https://ragt.ag/search?q={searchTerms}"/>
+ $# <Url type="application/x-suggestions+json" method="get" template="https://ragt.ag/search/opensearch/suggestions?q={searchTerms}"/>
+ <Image width="16" height="16">https://ragt.ag/static/eye.png</Image>
+ <Image height="32" width="32">https://ragt.ag/static/eye.png</Image>
+ <Image height="64" width="64">https://ragt.ag/static/eye.png</Image>
+ <InputEncoding>UTF-8</InputEncoding>
+ <OutputEncoding>UTF-8</OutputEncoding>
+ <SearchForm>https://ragt.ag/</SearchForm>
+ <Tags>programming gardening python permaculture homesteading</Tags>
+ <Query role="example" searchTerms="canopy" />
+ <Developer>Angelo Gladding</Developer>
+</OpenSearchDescription>
index 0000000..7316214
--- /dev/null
+$def with (query, iw_profile, ap_profile, formatted_query, result, ipa_pronunciation, cmu_pronunciation, definition, rhymes, web_results, code_projects, code_files, books)
+$var breadcrumbs = ("search", "Search")
+
+<style>
+pre {
+ font-size: .8em;
+ width: 100%; }
+.profile * {
+ vertical-align: top; }
+.profile_summary {
+ font-size: .8em;
+.profile_summary p {
+ margin-bottom: 0; }
+.profile form {
+ margin-top: 1em; }
+</style>
+
+<form style="margin:1em 0">
+ <input name=q value="$query"> <button>Search</button>
+</form>
+
+$if iw_profile or ap_profile:
+ <div class=profile
+ style="display:grid;grid-gap:1em;grid-template-columns:20% auto;margin:2em 0 0 0">
+ $if iw_profile:
+ $ profile = iw_profile
+ $if photos := profile.get("photo"):
+ <img src=$photos[0] style=width:100%>
+ <div>
+ <p style=margin:0><big><strong>$profile["name"][0]</strong></big><br>
+ $if urls := profile.get("url"):
+ <small>
+ $for url in urls:
+ <a href=$url>$url</a>
+ $if not loop.last:
+ •
+ </small></p>
+ <div class=profile_summary>$:profile["note"][0]</div>
+ $if tags := profile.get("category"):
+ <div style=font-size:.8em>
+ $for tag in tags:
+ $if "value" in tag:
+ $tag["value"]
+ $else:
+ $tag
+ $if not loop.last:
+ •
+ </div>
+ </div>
+ $elif ap_profile:
+ $ profile = ap_profile
+ $if photos := profile.get("icon"):
+ $if not isinstance(photos, list):
+ $ photos = [photos]
+ <img src=$photos[0]["url"] style=width:100%>
+ <div>
+ <p style=margin:0><big><strong>$profile["name"]</strong></big><br>
+ $if "url" in profile:
+ $ urls = profile["url"]
+ $if not isinstance(urls, list):
+ $ urls = [urls]
+ <small>
+ $for url in urls:
+ <a href=$url>$url</a>
+ $if not loop.last:
+ •
+ </small></p>
+ <div class=profile_summary>$:profile["summary"]</div>
+ $if "tag" in profile:
+ <div style=font-size:.8em>
+ $for tag in profile["tag"]:
+ $tag["name"]
+ $if not loop.last:
+ •
+ </div>
+ </div>
+ </div>
+ $if tx.user.is_owner:
+ <form action=/people method=post style=text-align:right>
+ <input type=hidden name=identifier value="$query">
+ <button>Follow</button>
+ </form>
+ <details>
+ <summary>Full Summary</summary>
+ <pre>$pformat(profile)</pre>
+ </details>
+
+$if result:
+ <pre>$formatted_query<br>
+ <big>$result</big></pre>
+
+$if definition:
+ <p><strong title="$cmu_pronunciation">$ipa_pronunciation</strong>, $definition<br>
+ <small>rhymes: $", ".join(rhymes)</small></p>
+
+<h2>Across the Web</h2>
+$if web_results:
+ <div>
+ $ featured_sites = {
+ $ "$tx.host.name": ("$tx.origin/media/6ysi.png", " — $tx.host.owner['name'][0]"),
+ $ "indieweb.org": ("https://indieweb.org/favicon.ico", " - IndieWeb"),
+ $ "en.wikipedia.org": ("https://www.wikipedia.org/static/favicon/wikipedia.ico", " - Wikipedia"),
+ $ "www.youtube.com": ("https://www.youtube.com/s/desktop/a24ea7cc/img/favicon.ico", " - YouTube"),
+ $ "developer.mozilla.org": ("https://developer.mozilla.org/favicon-48x48.cbbd161b.png", " | MDN - MDN Web Docs"),
+ $ "www.w3.org": ("https://www.w3.org/favicon.ico", " - World Wide Web Consortium (W3C)"),
+ $ "stackoverflow.com": ("https://cdn.sstatic.net/Sites/stackoverflow/Img/favicon.ico?v=ec617d715196", " - Stack Overflow"),
+ $ "twitter.com": ("https://abs.twimg.com/favicons/twitter.3.ico", " | Twitter"),
+ $ "www.imdb.com": ("https://m.media-amazon.com/images/G/01/imdb/images-ANDW73HA/favicon_desktop_32x32._CB1582158068_.png", " - IMDb"),
+ $ }
+ $ blocked_sites = (
+ $ "www.$tx.host.name",
+ $ "www.freecodecamp.org",
+ $ "www.tutorialspoint.com",
+ $ "www.w3schools.com",
+ $ )
+ <div>
+ $for result_url, result_text in web_results:
+ $ opacity = .75
+ $ font_size = "1em"
+ $ icon = None
+ $ suffix = ""
+ $if result_url.host in featured_sites:
+ $ font_size = "1.25em"
+ $ icon, suffix = featured_sites[result_url.host]
+ $ opacity = 1
+ $if result_url.host == "developer.mozilla.org" and not result_url.path.startswith("en-US"):
+ $continue
+ $elif result_url.host in blocked_sites:
+ $continue
+ <div style="opacity:$opacity;line-height:1;margin:.25em 0">
+ <a href=$result_url>$result_text.removesuffix(suffix)</a><br>
+ <small><small>
+ $if icon:
+ <img style=height:1.25em;position:relative;top:.225em src=$icon> \
+ $str(result_url).removeprefix(f"{result_url.origin}/")
+ $else:
+ $result_url
+ </small></small>
+ </div>
+ </div>
+ </div>
+$else:
+ <p>0 results</p>
+
+<h2>Code</h2>
+$if code_projects.most_common()[0][0]:
+ <ul>
+ $for project, file_count in code_projects.most_common():
+ <li><a href=$tx.origin/code/projects/$project>$project</a>
+ $ files = code_files[project]
+ <small>\
+ $for code_file in sorted(files)[:3]:
+ <a href=$tx.origin/code/projects/$project/files/$code_file
+ style=color:#777>$code_file</a>\
+ $if not loop.last:
+ , \
+ $if len(files) > 3:
+ <em>and $(file_count - 3) more</em>
+ </small></li>
+ </ul>
+$else:
+ <p>0 results</p>
+
+$# <h2>Books</h2>
+$# <p>found $books.pop("found") document out of $books.pop("out_of"),
+$# search took $books.pop("search_time_ms")ms</p>
+$# <ul>
+$# $for book in books.pop("hits"):
+$# <li><p>$book.pop("document")</p>
+$# <p>$book.pop("highlights")</p>
+$# <pre>$pformat(book)</pre>
+$# </li>
+$# </ul>
+$# XXX <pre>$pformat(books)</pre>
index 0000000..ebe339c
--- /dev/null
+$def with (query, results)
+$var title: YouTube Search Results for <code>$query</code>
+
+$for result in results:
+ <div>
+ <img src="$result['thumbnails'][0]" style=display:inline-block;width:5em>
+ <form action=$tx.origin/media method=post style=display:inline-block>
+ $ url = f"https://youtube.com/watch?v={result['id']}"
+ <a href="$url">$result["title"]</a><br>
+ <small><strong>$result["duration"]</strong>, $result["channel"], $result["publish_time"], $result["views"]</small>
+ <input name=url value="$url" type=hidden><button>Download</button>
+ </form>
+ </div>