Bootstrap
Committed 512cec
index 0000000..4711d41
--- /dev/null
+poetry.lock
+.coverage
+.test_coverage.xml
+.test_results.xml
index 0000000..c9d35cf
--- /dev/null
+https://github.com/kylewm/mf2util @b1acda6 Copyright (c) 2014
+ Kyle Mahan <kyle@kylewm.com> <https://kylewm.com>
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the
+ distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
index 0000000..6055c1f
--- /dev/null
+[microformats][0] are the simplest way to openly publish contacts, events,
+reviews, recipes, and other structured information on the web.
+
+ >>> import mf
+ >>> url = "https://alice.example"
+ >>> doc = f'''
+ ... <p class=h-card><a href={url}>Alice</a></p>
+ ... <ul class=h-feed>
+ ... <li class=h-entry>foo
+ ... <li class=h-entry>bar
+ ... </ul>
+ ... '''
+ >>> page = mf.parse(doc=doc, url=url)
+
+ # TODO >>> dict(page)
+ # TODO >>> page.json
+
+ >>> card = page["items"][0]
+ >>> card["type"]
+ ['h-card']
+ >>> card["properties"]
+ {'name': ['Alice'], 'url': ['https://alice.example']}
+ >>> feed = page["items"][1]
+ >>> feed["children"][0]["properties"]["name"]
+ ['foo']
+
+ >>> mf.util.representative_card(page, url)
+ {'name': ['Alice'], 'url': ['https://alice.example']}
+ >>> mf.util.representative_feed(page, url)["items"][0]["name"]
+ ['foo']
+
+ # TODO >>> page.representative_card
+ # TODO {'name': ['Alice'], 'url': ['https://alice.example']}
+ # TODO >>> page.representative_feed["items"][0]["name"]
+ # TODO ['foo']
+
+Based upon [`mf2py`][1] and [`mf2util`][2].
+
+[0]: https://microformats.org/wiki/microformats
+[1]: https://github.com/microformats/mf2py
+[2]: https://github.com/kylewm/mf2util
index 0000000..a228e34
--- /dev/null
+"""
+Microformats utilities.
+
+Microformats2 is a general way to mark up any HTML document with
+classes and propeties. This module uses domain-specific assumptions
+about the classes (specifically h-entry and h-event) to extract
+certain interesting properties.
+
+"""
+
+import collections
+import datetime
+import re
+import string
+import unicodedata
+from urllib.parse import urljoin
+
+import bs4
+import easyuri
+from mf2py import parse
+
+__all__ = ["parse", "representative_card"]
+
+URL_ATTRIBUTES = {
+ "a": ["href"],
+ "link": ["href"],
+ "img": ["src"],
+ "audio": ["src"],
+ "video": ["src", "poster"],
+ "source": ["src"],
+}
+
+# From https://indieweb.org/location#How_to_determine_the_location_of_a_microformat
+LOCATION_PROPERTIES = frozenset(
+ (
+ "street-address",
+ "extended-address",
+ "post-office-box",
+ "locality",
+ "region",
+ "postal-code",
+ "country-name",
+ "label",
+ "latitude",
+ "longitude",
+ "altitude",
+ "name",
+ )
+)
+
+
+def get_url(parsed):
+ """Given a property value that may be a list of simple URLs or complex
+ h-* dicts (with a url property), extract a list of URLs. This is useful
+ when parsing e.g., in-reply-to.
+
+ Args:
+ mf (string or dict): URL or h-cite-style dict
+
+ Returns:
+ list: a list of URLs
+ """
+
+ urls = []
+ for item in parsed["items"]:
+ if isinstance(item, str):
+ urls.append(item)
+ elif isinstance(item, dict) and any(
+ x.startswith("h-") for x in item.get("type", [])
+ ):
+ urls.extend(item.get("properties", {}).get("url", []))
+
+ return urls
+
+
+def find_first_entry(parsed, types):
+ """Find the first interesting h-* object in BFS-order
+
+ :param dict parsed: a mf2py parsed dict
+ :param list types: target types, e.g. ['h-entry', 'h-event']
+ :return: an mf2py item that is one of `types`, or None
+ """
+ return next(_find_all_entries(parsed, types, False), None)
+
+
+def find_all_entries(parsed, types, include_properties=False):
+ """Find all h-* objects of a given type in BFS-order. Traverses the
+ top-level items and their children and descendents. Includes property
+ values (e.g. finding all h-cards would not find values of
+ "p-author h-card") only if `include_properties` is True.
+
+ :param dict parsed: a mf2py parsed dict
+ :param list types: target types, e.g. ['h-entry', 'h-event']
+ :param boolean include_properties: include properties in search of entries
+ :return: all entries with any of the the target types
+ """
+ return list(_find_all_entries(parsed, types, include_properties))
+
+
+def _find_all_entries(parsed, types, include_properties):
+ queue = collections.deque(item for item in parsed["items"])
+ while queue:
+ item = queue.popleft()
+ if any(h_class in item.get("type", []) for h_class in types):
+ yield item
+ queue.extend(item.get("children", []))
+ if include_properties:
+ queue.extend(
+ prop
+ for props in item.get("properties", {}).values()
+ for prop in props
+ if isinstance(prop, dict)
+ )
+
+
+def find_datetimes(parsed):
+ """Find published, updated, start, and end dates.
+
+ :param dict parsed: a mf2py parsed dict
+ :return: a dictionary from property type to datetime or date
+ """
+ hentry = find_first_entry(parsed)
+ result = {}
+
+ if hentry:
+ for prop in ("published", "updated", "start", "end"):
+ date_strs = hentry["properties"].get(prop, [])
+ result[prop] = parse_dt(" ".join(date_strs))
+
+
+def parse_dt(s):
+ """The definition for microformats2 dt-* properties are fairly
+ lenient. This method converts an mf2 date string into either a
+ datetime.date or datetime.datetime object. Datetimes will be naive
+ unless a timezone is specified.
+
+ :param str s: a mf2 string representation of a date or datetime
+ :return: datetime.date or datetime.datetime
+ :raises ValueError: if the string is not recognizable
+ """
+
+ if not s:
+ return None
+
+ s = re.sub(r"\s+", " ", s)
+ date_re = r"(?P<year>\d{4,})-(?P<month>\d{1,2})-(?P<day>\d{1,2})"
+ time_re = r"(?P<hour>\d{1,2}):(?P<minute>\d{2})(:(?P<second>\d{2})(\.(?P<microsecond>\d+))?)?"
+ tz_re = r"(?P<tzz>Z)|(?P<tzsign>[+-])(?P<tzhour>\d{1,2}):?(?P<tzminute>\d{2})"
+ dt_re = f"{date_re}((T| ){time_re} ?({tz_re})?)?$"
+
+ m = re.match(dt_re, s)
+ if not m:
+ raise ValueError(f"unrecognized datetime {s}")
+
+ year = m.group("year")
+ month = m.group("month")
+ day = m.group("day")
+
+ hour = m.group("hour")
+
+ if not hour:
+ return datetime.date(int(year), int(month), int(day))
+
+ minute = m.group("minute") or "00"
+ second = m.group("second") or "00"
+
+ if hour:
+ dt = datetime.datetime(
+ int(year), int(month), int(day), int(hour), int(minute), int(second)
+ )
+ if m.group("tzz"):
+ dt = dt.replace(tzinfo=datetime.timezone.utc)
+ else:
+ tzsign = m.group("tzsign")
+ tzhour = m.group("tzhour")
+ tzminute = m.group("tzminute") or "00"
+
+ if tzsign and tzhour:
+ offset = datetime.timedelta(hours=int(tzhour), minutes=int(tzminute))
+ if tzsign == "-":
+ offset = -offset
+ dt = dt.replace(
+ tzinfo=datetime.timezone(offset, f"{tzsign}{tzhour}:{tzminute}")
+ )
+
+ return dt
+
+
+def get_plain_text(values, strip=True):
+ """Get the first value in a list of values that we expect to be plain-text.
+ If it is a dict, then return the value of "value".
+
+ :param list values: a list of values
+ :param boolean strip: true if we should strip the plaintext value
+ :return: a string or None
+ """
+ if values:
+ v = values[0]
+ if isinstance(v, dict):
+ v = v.get("value", "")
+ if strip:
+ v = v.strip()
+ return v
+
+
+def classify_comment(parsed, target_urls):
+ """Find and categorize comments that reference any of a collection of
+ target URLs. Looks for references of type reply, like, and repost.
+
+ :param dict parsed: a mf2py parsed dict
+ :param list target_urls: a collection of urls that represent the
+ target post. this can include alternate or shortened URLs.
+ :return: a list of applicable comment types ['like', 'reply', 'repost']
+ """
+
+ def process_references(objs, reftypes, result):
+ for obj in objs:
+ if isinstance(obj, dict):
+ if any(
+ url in target_urls
+ for url in obj.get("properties", {}).get("url", [])
+ ):
+ result += (r for r in reftypes if r not in result)
+ elif obj in target_urls:
+ result += (r for r in reftypes if r not in result)
+
+ result = []
+ hentry = find_first_entry(parsed, ["h-entry"])
+ if hentry:
+ reply_type = []
+ if "rsvp" in hentry["properties"]:
+ reply_type.append("rsvp")
+ if "invitee" in hentry["properties"]:
+ reply_type.append("invite")
+ reply_type.append("reply")
+
+ # TODO handle rel=in-reply-to
+ for prop in ("in-reply-to", "reply-to", "reply"):
+ process_references(hentry["properties"].get(prop, []), reply_type, result)
+
+ for prop in ("like-of", "like"):
+ process_references(hentry["properties"].get(prop, []), ("like",), result)
+
+ for prop in ("repost-of", "repost"):
+ process_references(hentry["properties"].get(prop, []), ("repost",), result)
+
+ return result
+
+
+def parse_author(obj):
+ """Parse the value of a u-author property, can either be a compound
+ h-card or a single name or url.
+
+ :param object obj: the mf2 property value, either a dict or a string
+ :result: a dict containing the author's name, photo, and url
+ """
+ result = {}
+ if isinstance(obj, dict):
+ names = obj["properties"].get("name")
+ photos = obj["properties"].get("photo")
+ urls = obj["properties"].get("url")
+ if names:
+ result["name"] = names[0]
+ if photos:
+ result["photo"] = photos[0]
+ if urls:
+ result["url"] = urls[0]
+ elif obj:
+ if obj.startswith("http://") or obj.startswith("https://"):
+ result["url"] = obj
+ else:
+ result["name"] = obj
+ return result
+
+
+def find_author(parsed, source_url=None, hentry=None, fetch_mf2_func=None):
+ """Use the authorship discovery algorithm
+ https://indiewebcamp.com/authorship to determine an h-entry's
+ author.
+
+ :param dict parsed: an mf2py parsed dict.
+ :param str source_url: the source of the parsed document.
+ :param hentry dict: optional, the h-entry we're examining, if omitted,
+ we'll just use the first one
+ :param fetch_mf2_func callable: optional function that takes a URL
+ and returns parsed mf2
+ :return: a dict containing the author's name, photo, and url
+ """
+
+ def find_hentry_author(hentry):
+ for obj in hentry["properties"].get("author", []):
+ return parse_author(obj)
+
+ def find_parent_hfeed_author(hentry):
+ for hfeed in _find_all_entries(parsed, ["h-feed"], False):
+ # find the h-entry's parent h-feed
+ if hentry in hfeed.get("children", []):
+ for obj in hfeed["properties"].get("author", []):
+ return parse_author(obj)
+
+ if not hentry:
+ hentry = find_first_entry(parsed, ["h-entry"])
+ if not hentry:
+ return None
+
+ author_page = None
+
+ # 3. if the h-entry has an author property, use that
+ author = find_hentry_author(hentry)
+
+ # 4. otherwise if the h-entry has a parent h-feed with author property,
+ # use that
+ if not author:
+ author = find_parent_hfeed_author(hentry)
+
+ # 5. if an author property was found
+ if author:
+ # 5.2 otherwise if author property is an http(s) URL, let the
+ # author-page have that URL
+ if list(author.keys()) == ["url"]:
+ author_page = author["url"]
+ # 5.1 if it has an h-card, use it, exit.
+ # 5.3 otherwise use the author property as the author name,
+ # exit.
+ else:
+ return author
+
+ # 6. if there is no author-page and the h-entry's page is a permalink page
+ if not author_page:
+ # 6.1 if the page has a rel-author link, let the author-page's
+ # URL be the href of the rel-author link
+ rel_authors = parsed.get("rels", {}).get("author", [])
+ if rel_authors:
+ author_page = rel_authors[0]
+
+ # 7. if there is an author-page URL
+ if author_page:
+ if not fetch_mf2_func:
+ return {"url": author_page}
+
+ # 7.1 get the author-page from that URL and parse it for microformats2
+ parsed = fetch_mf2_func(author_page)
+ hcards = find_all_entries(parsed, ["h-card"])
+
+ # 7.2 if author-page has 1+ h-card with url == uid ==
+ # author-page's URL, then use first such h-card, exit.
+ for hcard in hcards:
+ hcard_url = get_plain_text(hcard["properties"].get("url"))
+ hcard_uid = get_plain_text(hcard["properties"].get("uid"))
+ if (
+ hcard_url
+ and hcard_uid
+ and hcard_url == hcard_uid
+ and hcard_url == author_page
+ ):
+ return parse_author(hcard)
+
+ # 7.3 else if author-page has 1+ h-card with url property
+ # which matches the href of a rel-me link on the author-page
+ # (perhaps the same hyperlink element as the u-url, though not
+ # required to be), use first such h-card, exit.
+ rel_mes = parsed.get("rels", {}).get("me", [])
+ for hcard in hcards:
+ hcard_url = get_plain_text(hcard["properties"].get("url"))
+ if hcard_url and hcard_url in rel_mes:
+ return parse_author(hcard)
+
+ # 7.4 if the h-entry's page has 1+ h-card with url ==
+ # author-page URL, use first such h-card, exit.
+ for hcard in hcards:
+ hcard_url = get_plain_text(hcard["properties"].get("url"))
+ if hcard_url and hcard_url == author_page:
+ return parse_author(hcard)
+
+ # 8. otherwise no deterministic author can be found.
+ return None
+
+
+def representative_hcard(parsed, source_url):
+ """Find the representative h-card for a URL
+
+ http://microformats.org/wiki/representative-h-card-parsing
+
+ :param dict parsed: an mf2 parsed dict
+ :param str source_url: the source of the parsed document.
+ :return: the representative h-card if one is found
+ """
+ hcards = find_all_entries(parsed, ["h-card"], include_properties=True)
+ # uid and url both match source_url
+ for hcard in hcards:
+ if source_url in hcard["properties"].get("uid", []) and source_url in hcard[
+ "properties"
+ ].get("url", []):
+ return hcard
+ # url that is also a rel=me
+ for hcard in hcards:
+ if any(
+ url in parsed.get("rels", {}).get("me", [])
+ for url in hcard["properties"].get("url", [])
+ ):
+ return hcard
+ # single hcard with matching url
+ found = None
+ count = 0
+ for hcard in hcards:
+ if source_url in hcard["properties"].get("url", []):
+ found = hcard
+ count += 1
+ if count == 1:
+ return found
+
+
+def convert_relative_paths_to_absolute(source_url, base_href, html):
+ """Attempt to convert relative paths in foreign content
+ to absolute based on the source url of the document. Useful for
+ displaying images or links in reply contexts and comments.
+
+ Gets list of tags/attributes from `URL_ATTRIBUTES`. Note that this
+ function uses a regular expression to avoid adding a library
+ dependency on a proper parser.
+
+ :param str source_url: the source of the parsed document.
+ :param str html: the text of the source document
+ :return: the document with relative urls replaced with absolute ones
+ """
+
+ def do_convert(match):
+ base_url = urljoin(source_url, base_href) if base_href else source_url
+ return (
+ match.string[match.start(0) : match.start(1)]
+ + urljoin(base_url, match.group(1))
+ + match.string[match.end(1) : match.end(0)]
+ )
+
+ if source_url:
+ for tagname, attributes in URL_ATTRIBUTES.items():
+ for attribute in attributes:
+ pattern = re.compile(
+ rf"<{tagname}[^>]*?{attribute}\s*=\s*['\"](.*?)['\"]",
+ flags=re.DOTALL | re.MULTILINE | re.IGNORECASE,
+ )
+ html = pattern.sub(do_convert, html)
+
+ return html
+
+
+def is_name_a_title(name, content):
+ """Determine whether the name property represents an explicit title.
+
+ Typically when parsing an h-entry, we check whether p-name ==
+ e-content (value). If they are non-equal, then p-name likely
+ represents a title.
+
+ However, occasionally we come across an h-entry that does not
+ provide an explicit p-name. In this case, the name is
+ automatically generated by converting the entire h-entry content
+ to plain text. This definitely does not represent a title, and
+ looks very bad when displayed as such.
+
+ To handle this case, we broaden the equality check to see if
+ content is a subset of name. We also strip out non-alphanumeric
+ characters just to make the check a little more forgiving.
+
+ :param str name: the p-name property that may represent a title
+ :param str content: the plain-text version of an e-content property
+ :return: True if the name likely represents a separate, explicit title
+ """
+
+ def normalize(s):
+ if not isinstance(s, str):
+ s = s.decode("utf-8")
+ s = unicodedata.normalize("NFKD", s)
+ s = s.lower()
+ s = re.sub("[" + string.whitespace + string.punctuation + "]", "", s)
+ return s
+
+ if not content:
+ return True
+ if not name:
+ return False
+ return normalize(content) not in normalize(name)
+
+
+def post_type_discovery(hentry):
+ """Implementation of the post-type discovery algorithm
+ defined here https://indiewebcamp.com/post-type-discovery#Algorithm
+
+ :param dict hentry: mf2 item representing the entry to test
+
+ :return: string, one of: 'org', 'person', 'event', 'rsvp',
+ 'invite', 'reply', 'repost', 'like', 'photo',
+ 'article', 'note', 'follow'
+
+ """
+ props = hentry.get("properties", {})
+ if "h-card" in hentry.get("type", []):
+ name = get_plain_text(props.get("name"))
+ org = get_plain_text(props.get("org"))
+ if name and org and name == org:
+ return "org"
+ return "person"
+
+ if "h-event" in hentry.get("type", []):
+ return "event"
+
+ for prop, implied_type in [
+ ("rsvp", "rsvp"),
+ ("invitee", "invite"),
+ ("in-reply-to", "reply"),
+ ("repost-of", "repost"),
+ ("like-of", "like"),
+ ("follow-of", "follow"),
+ ("photo", "photo"),
+ ]:
+ if props.get(prop) is not None:
+ return implied_type
+ # check name ~= content
+ name = get_plain_text(props.get("name"))
+ content = get_plain_text(props.get("content"))
+ if not content:
+ content = get_plain_text(props.get("summary"))
+ if content and name and is_name_a_title(name, content):
+ return "article"
+ return "note"
+
+
+def _interpret_common_properties(
+ parsed,
+ source_url,
+ base_href,
+ hentry,
+ use_rel_syndication,
+ want_json,
+ fetch_mf2_func,
+):
+ result = {}
+ props = hentry["properties"]
+
+ for prop in ("url", "uid", "photo", "featured" "logo"):
+ value = get_plain_text(props.get(prop))
+ if value:
+ result[prop] = value
+
+ for prop in ("start", "end", "published", "updated", "deleted"):
+ date_str = get_plain_text(props.get(prop))
+ if date_str:
+ if want_json:
+ result[prop] = date_str
+ else:
+ result[prop + "-str"] = date_str
+ try:
+ date = parse_dt(date_str)
+ if date:
+ result[prop] = date
+ except ValueError:
+ raise ValueError(f"Failed to parse datetime {date_str}")
+
+ author = find_author(parsed, source_url, hentry, fetch_mf2_func)
+ if author:
+ result["author"] = author
+
+ content_prop = props.get("content")
+ content_value = None
+ if content_prop:
+ if isinstance(content_prop[0], dict):
+ content_html = content_prop[0].get("html", "").strip()
+ content_value = content_prop[0].get("value", "").strip()
+ else:
+ content_value = content_html = content_prop[0]
+ result["content"] = convert_relative_paths_to_absolute(
+ source_url, base_href, content_html
+ )
+ result["content-plain"] = content_value
+
+ summary_prop = props.get("summary")
+ if summary_prop:
+ if isinstance(summary_prop[0], dict):
+ result["summary"] = summary_prop[0]["value"]
+ else:
+ result["summary"] = summary_prop[0]
+
+ # Collect location objects, then follow this algorithm to consolidate their
+ # properties:
+ # https://indieweb.org/location#How_to_determine_the_location_of_a_microformat
+ location_stack = [props]
+
+ for prop in "location", "adr":
+ vals = props.get(prop)
+ if vals:
+ if isinstance(vals[0], str):
+ location_stack.append({"name": vals})
+ else:
+ location_stack.append(vals[0].get("properties", {}))
+
+ geo = props.get("geo")
+ if geo:
+ if isinstance(geo[0], dict):
+ location_stack.append(geo[0].get("properties", {}))
+ else:
+ if geo[0].startswith("geo:"):
+ # a geo: URL. try to parse it. https://tools.ietf.org/html/rfc5870
+ parts = geo[0][len("geo:") :].split(";")[0].split(",")
+ if len(parts) >= 2:
+ location_stack.append(
+ {
+ "latitude": [parts[0]],
+ "longitude": [parts[1]],
+ "altitude": [parts[2]] if len(parts) >= 3 else [],
+ }
+ )
+
+ for prop in LOCATION_PROPERTIES:
+ for obj in location_stack:
+ if obj and obj.get(prop) and not (obj == props and prop == "name"):
+ result.setdefault("location", {})[prop] = obj[prop][0]
+
+ if use_rel_syndication:
+ result["syndication"] = list(
+ set(
+ parsed.get("rels", {}).get("syndication", [])
+ + hentry["properties"].get("syndication", [])
+ )
+ )
+ else:
+ result["syndication"] = hentry["properties"].get("syndication", [])
+
+ return result
+
+
+def interpret_event(
+ parsed,
+ source_url,
+ base_href=None,
+ hevent=None,
+ use_rel_syndication=True,
+ want_json=False,
+ fetch_mf2_func=None,
+):
+ """Given a document containing an h-event, return a dictionary::
+
+ {
+ 'type': 'event',
+ 'url': the permalink url of the document (may be different than source_url),
+ 'start': datetime or date,
+ 'end': datetime or date,
+ 'name': plain-text event name,
+ 'content': body of event description (contains HTML)
+ }
+
+ :param dict parsed: the result of parsing a document containing mf2 markup
+ :param str source_url: the URL of the parsed document, not currently used
+ :param str base_href: (optional) the href value of the base tag
+ :param dict hevent: (optional) the item in the above document representing
+ the h-event. if provided, we can avoid a redundant call to
+ find_first_entry
+ :param boolean use_rel_syndication: (optional, default True) Whether
+ to include rel=syndication in the list of syndication sources. Sometimes
+ useful to set this to False when parsing h-feeds that erroneously include
+ rel=syndication on each entry.
+ :param boolean want_json: (optional, default false) if true, the result
+ will be pure json with datetimes as strings instead of python objects
+ :param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
+ output for a given URL.
+ :return: a dict with some or all of the described properties
+ """
+ # find the h-event if it wasn't provided
+ if not hevent:
+ hevent = find_first_entry(parsed, ["h-event"])
+ if not hevent:
+ return {}
+
+ result = _interpret_common_properties(
+ parsed,
+ source_url,
+ base_href,
+ hevent,
+ use_rel_syndication,
+ want_json,
+ fetch_mf2_func,
+ )
+ result["type"] = "event"
+ name_value = get_plain_text(hevent["properties"].get("name"))
+ if name_value:
+ result["name"] = name_value
+ return result
+
+
+def interpret_entry(
+ parsed,
+ source_url,
+ base_href=None,
+ hentry=None,
+ use_rel_syndication=True,
+ want_json=False,
+ fetch_mf2_func=None,
+):
+ """Given a document containing an h-entry, return a dictionary::
+
+ {
+ 'type': 'entry',
+ 'url': the permalink url of the document (may be different than source_url),
+ 'published': datetime or date,
+ 'updated': datetime or date,
+ 'name': title of the entry,
+ 'content': body of entry (contains HTML),
+ 'author': {
+ 'name': author name,
+ 'url': author url,
+ 'photo': author photo
+ },
+ 'syndication': [
+ 'syndication url',
+ ...
+ ],
+ 'in-reply-to': [...],
+ 'like-of': [...],
+ 'repost-of': [...],
+ }
+
+ :param dict parsed: the result of parsing a document containing mf2 markup
+ :param str source_url: the URL of the parsed document, used by the
+ authorship algorithm
+ :param str base_href: (optional) the href value of the base tag
+ :param dict hentry: (optional) the item in the above document
+ representing the h-entry. if provided, we can avoid a redundant
+ call to find_first_entry
+ :param boolean use_rel_syndication: (optional, default True) Whether
+ to include rel=syndication in the list of syndication sources. Sometimes
+ useful to set this to False when parsing h-feeds that erroneously include
+ rel=syndication on each entry.
+ :param boolean want_json: (optional, default False) if true, the result
+ will be pure json with datetimes as strings instead of python objects
+ :param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
+ output for a given URL.
+ :return: a dict with some or all of the described properties
+ """
+
+ # find the h-entry if it wasn't provided
+ if not hentry:
+ hentry = find_first_entry(parsed, ["h-entry"])
+ if not hentry:
+ return {}
+
+ result = _interpret_common_properties(
+ parsed,
+ source_url,
+ base_href,
+ hentry,
+ use_rel_syndication,
+ want_json,
+ fetch_mf2_func,
+ )
+ if "h-cite" in hentry.get("type", []):
+ result["type"] = "cite"
+ else:
+ result["type"] = "entry"
+
+ title = get_plain_text(hentry["properties"].get("name"))
+ if title and is_name_a_title(title, result.get("content-plain")):
+ result["name"] = title
+
+ for prop in (
+ "in-reply-to",
+ "like-of",
+ "repost-of",
+ "bookmark-of",
+ "comment",
+ "like",
+ "repost",
+ ):
+ for url_val in hentry["properties"].get(prop, []):
+ if isinstance(url_val, dict):
+ result.setdefault(prop, []).append(
+ interpret(
+ parsed,
+ source_url,
+ base_href,
+ url_val,
+ use_rel_syndication=False,
+ want_json=want_json,
+ fetch_mf2_func=fetch_mf2_func,
+ )
+ )
+ else:
+ result.setdefault(prop, []).append(
+ {
+ "url": url_val,
+ }
+ )
+
+ return result
+
+
+def interpret_feed(
+ parsed, source_url, base_href=None, hfeed=None, want_json=False, fetch_mf2_func=None
+):
+ """Interpret a source page as an h-feed or as an top-level collection
+ of h-entries.
+
+ :param dict parsed: the result of parsing a mf2 document
+ :param str source_url: the URL of the source document (used for authorship
+ discovery)
+ :param str base_href: (optional) the href value of the base tag
+ :param dict hfedd: (optional) the h-feed to be parsed. If provided,
+ this will be used instead of the first h-feed on the page.
+ :param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
+ output for a given URL.
+ :return: a dict containing 'entries', a list of entries, and possibly other
+ feed properties (like 'name').
+ """
+ result = {}
+ # find the first feed if it wasn't provided
+ if not hfeed:
+ hfeed = find_first_entry(parsed, ["h-feed"])
+
+ if hfeed:
+ names = hfeed["properties"].get("name")
+ if names:
+ result["name"] = names[0]
+ children = hfeed.get("children", [])
+ # just use the top level 'items' as the feed children
+ else:
+ children = parsed.get("items", [])
+
+ entries = []
+ for child in children:
+ entry = interpret(
+ parsed,
+ source_url,
+ base_href,
+ item=child,
+ use_rel_syndication=False,
+ want_json=want_json,
+ fetch_mf2_func=fetch_mf2_func,
+ )
+ if entry:
+ entries.append(entry)
+ result["entries"] = entries
+ return result
+
+
+def interpret(
+ parsed,
+ source_url,
+ base_href=None,
+ item=None,
+ use_rel_syndication=True,
+ want_json=False,
+ fetch_mf2_func=None,
+):
+ """Interpret a permalink of unknown type. Finds the first interesting
+ h-* element, and delegates to :func:`interpret_entry` if it is an
+ h-entry or :func:`interpret_event` for an h-event
+
+ :param dict parsed: the result of parsing a mf2 document
+ :param str source_url: the URL of the source document (used for authorship
+ discovery)
+ :param str base_href: (optional) the href value of the base tag
+ :param dict item: (optional) the item to be parsed. If provided,
+ this will be used instead of the first element on the page.
+ :param boolean use_rel_syndication: (optional, default True) Whether
+ to include rel=syndication in the list of syndication sources. Sometimes
+ useful to set this to False when parsing h-feeds that erroneously include
+ rel=syndication on each entry.
+ :param boolean want_json: (optional, default False) If true, the result
+ will be pure json with datetimes as strings instead of python objects
+ :param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
+ output for a given URL.
+ :return: a dict as described by interpret_entry or interpret_event, or None
+ """
+ if not item:
+ item = find_first_entry(parsed, ["h-entry", "h-event"])
+
+ if item:
+ types = item.get("type", [])
+ if "h-event" in types:
+ return interpret_event(
+ parsed,
+ source_url,
+ base_href=base_href,
+ hevent=item,
+ use_rel_syndication=use_rel_syndication,
+ want_json=want_json,
+ fetch_mf2_func=fetch_mf2_func,
+ )
+ elif "h-entry" in types or "h-cite" in types:
+ return interpret_entry(
+ parsed,
+ source_url,
+ base_href=base_href,
+ hentry=item,
+ use_rel_syndication=use_rel_syndication,
+ want_json=want_json,
+ fetch_mf2_func=fetch_mf2_func,
+ )
+
+
+def interpret_comment(
+ parsed,
+ source_url,
+ target_urls,
+ base_href=None,
+ want_json=False,
+ fetch_mf2_func=None,
+):
+ """Interpret received webmentions, and classify as like, reply, or
+ repost (or a combination thereof). Returns a dict as described
+ in :func:`interpret_entry`, with the additional fields::
+
+ {
+ 'comment_type': a list of strings, zero or more of
+ 'like', 'reply', or 'repost'
+ 'rsvp': a string containing the rsvp response (optional)
+ }
+
+ :param dict parsed: a parsed mf2 parsed document
+ :param str source_url: the URL of the source document
+ :param list target_urls: a collection containing the URL of the target\
+ document, and any alternate URLs (e.g., shortened links) that should\
+ be considered equivalent when looking for references
+ :param str base_href: (optional) the href value of the base tag
+ :param boolean want_json: (optional, default False) If true, the result
+ will be pure json with datetimes as strings instead of python objects
+ :param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
+ output for a given URL.
+ :return: a dict as described above, or None
+ """
+ item = find_first_entry(parsed, ["h-entry"])
+ if item:
+ result = interpret_entry(
+ parsed,
+ source_url,
+ base_href=base_href,
+ hentry=item,
+ want_json=want_json,
+ fetch_mf2_func=fetch_mf2_func,
+ )
+ if result:
+ result["comment_type"] = classify_comment(parsed, target_urls)
+ rsvp = get_plain_text(item["properties"].get("rsvp"))
+ if rsvp:
+ result["rsvp"] = rsvp.lower()
+
+ invitees = item["properties"].get("invitee")
+ if invitees:
+ result["invitees"] = [parse_author(inv) for inv in invitees]
+
+ return result
+
+
+# ===========================================================================
+
+
+stable = {
+ "adr": [
+ "p-street-address",
+ "p-extended-address",
+ "p-post-office-box",
+ "p-locality",
+ "p-region",
+ "p-postal-code",
+ "p-country-name",
+ "p-label",
+ "p/u-geo",
+ "p-latitude",
+ "p-longitude",
+ "p-altitude",
+ ],
+ "card": [
+ "p-name",
+ "p-honorific-prefix",
+ "p-given-name",
+ "p-additional-name",
+ "p-family-name",
+ "p-sort-string",
+ "p-honorific-suffix",
+ "p-nickname",
+ "u-email",
+ "u-logo",
+ "u-photo",
+ "u-url",
+ "u-uid",
+ "p-category",
+ "p/h-adr",
+ "p-post-office-box",
+ "p-extended-address",
+ "p-street-address",
+ "p-locality",
+ "p-region",
+ "p-postal-code",
+ "p-country-name",
+ "p-label",
+ "p/u/h-geo",
+ "p-latitude",
+ "p-longitude",
+ "p-altitude",
+ "p-tel",
+ "p-note",
+ "dt-bday",
+ "u-key",
+ "p-org",
+ "p-job-title",
+ "p-role",
+ "u-impp",
+ "p-sex",
+ "p-gender-identity",
+ "dt-anniversary",
+ ],
+ "entry": [
+ "p-name",
+ "p-summary",
+ "e-content",
+ "dt-published",
+ "dt-updated",
+ "p-author",
+ "p-category",
+ "u-url",
+ "u-uid",
+ "p-location",
+ "u-syndication",
+ "u-in-reply-to",
+ "p-rsvp",
+ "u-like-of",
+ "u-repost-of",
+ ],
+ "event": [
+ "p-name",
+ "p-summary",
+ "dt-start",
+ "dt-end",
+ "dt-duration",
+ "e-content",
+ "u-url",
+ "p-category",
+ "p-location(card/adr/geo)",
+ "[p-attendee]",
+ ],
+ "feed": ["p-name", "p-author(card)", "u-url", "u-photo"],
+ "geo": ["p-latitude", "p-longitude", "p-altitude"],
+ "item": ["p-name", "u-url", "u-photo"],
+ "product": [
+ "p-name",
+ "u-photo",
+ "p-brand(card)",
+ "p-category",
+ "e-content",
+ "u-url",
+ "u-identifier",
+ "p-review(review)",
+ "p-price",
+ ],
+ "recipe": [
+ "p-name",
+ "p-ingredient",
+ "p-yield",
+ "e-instructions",
+ "dt-duration",
+ "u-photo",
+ "p-summary",
+ "p-author(card)",
+ "dt-published",
+ "p-nutrition",
+ "p-category",
+ ],
+ "resume": [
+ "p-name",
+ "p-summary",
+ "p-contact",
+ "p-education(event+card)",
+ "p-experience(event+card)",
+ "p-skill",
+ "p-affiliation",
+ ],
+ "review": [
+ "p-name ",
+ "p-item(card/event/adr/geo/product/item)",
+ "p-author(card)",
+ "dt-published",
+ "p-rating",
+ "p-best",
+ "p-worst",
+ "e-content",
+ "p-category",
+ "u-url",
+ ],
+ "review-aggregate": [
+ "p-item(card/event/adr/geo/product/item)",
+ "p-average",
+ "p-best",
+ "p-worst",
+ "p-count",
+ "p-votes",
+ "p-name",
+ ],
+}
+draft = {"app": ["p-name", "u-url", "u-logo", "u-photo"]}
+
+
+def representative_card(mf2json: dict, source_url: str):
+ """
+ Return the representative card for given parsed document.
+
+ http://microformats.org/wiki/representative-h-card-parsing
+
+ """
+ source = easyuri.parse(source_url).minimized
+ cards = [
+ card
+ for card in _get_all_items(mf2json, ["h-card"], include_props=True)
+ if (
+ card["properties"].get("name", [""])[0]
+ or card["properties"].get("nickname", [""])[0]
+ )
+ ]
+ if match := _check_uid_and_url_match_source_url(cards, source):
+ return match
+ if match := _check_url_matches_rel_me(cards, mf2json):
+ return match
+ if match := _check_url_matches_source_url(cards, source):
+ return match
+
+
+def _check_uid_and_url_match_source_url(cards, source_url): # FIXME same as below?
+ """"""
+ for card in cards:
+ if source_url in _get_normalized_urls(
+ card, "uid"
+ ) and source_url in _get_normalized_urls(card, "url"):
+ return card["properties"]
+
+
+def _check_url_matches_rel_me(cards, parsed):
+ """"""
+ for card in cards:
+ rel_mes = set()
+ for rel_me in parsed.get("rels", {}).get("me", []):
+ try:
+ rel_me = easyuri.parse(rel_me)
+ except ValueError:
+ continue
+ if isinstance(rel_me, (easyuri.HTTPURI, easyuri.HTTPSURI)):
+ rel_mes.add(rel_me.minimized)
+ if any(url in rel_mes for url in _get_normalized_urls(card, "url")):
+ return card["properties"]
+
+
+def _check_url_matches_source_url(cards, source_url): # FIXME same as above?
+ """"""
+ found = []
+ count = 0
+ for card in cards:
+ # if source_url in card['properties'].get('url', []):
+ for card_url in _get_normalized_urls(card, "url"):
+ if card_url.rstrip("/") == source_url:
+ found.append(card)
+ count += 1
+ if count:
+ return found[0]["properties"]
+
+
+def representative_feed(mf2json: dict, source_url: str, source_dom=None):
+ """
+ Return the representative feed for given parsed document.
+
+ https://indieweb.org/feed#How_To_Consume
+ https://microformats.org/wiki/h-feed#Discovery
+
+ """
+ feed = {}
+ try:
+ feed["name"] = source_dom.select("title")[0].text
+ except (AttributeError, IndexError):
+ pass
+ if author := representative_card(mf2json, source_url):
+ feed["author"] = author
+ items = []
+ if first_feed := _get_first_item(mf2json, ["h-feed"]):
+ if name := first_feed["properties"].get("name"):
+ feed["name"] = [name]
+ if authors := first_feed["properties"].get("author"):
+ feed["author"] = []
+ for author in authors:
+ author["properties"]["type"] = author["type"]
+ feed["author"].append(author["properties"])
+ if children := first_feed["children"]:
+ items = children
+ else:
+ items = _get_all_items(mf2json, ["h-entry", "h-event"])
+ feed["items"] = []
+ for item in items:
+ item["properties"]["type"] = item["type"]
+ feed["items"].append(item["properties"])
+ if rel_next := mf2json["rels"].get("next"):
+ feed["next"] = rel_next[0]
+ if rel_prev := mf2json["rels"].get("prev"):
+ feed["prev"] = rel_prev[0]
+ return feed
+
+
+def discover_post_type(properties):
+ """
+ Return the discovered post type.
+
+ http://ptd.spec.indieweb.org/#x5-post-type-algorithm
+
+ """
+ type_specific_properties = {
+ "rsvp": "rsvp",
+ "repost-of": "repost", # aka share
+ "like-of": "like", # aka favorite
+ "in-reply-to": "reply",
+ "listen-of": "listen",
+ "bookmark-of": "bookmark",
+ "checkin": "check-in",
+ "video": "video",
+ "audio": "audio",
+ "photo": "photo",
+ # TODO "checkin": "checkin",
+ # TODO "bookmark-of": "bookmark",
+ # TODO "follow-of": "follow",
+ # TODO "weight": "weight",
+ }
+ for type_specific_property, post_type in type_specific_properties.items():
+ if type_specific_property in properties:
+ if (
+ post_type in ("video", "audio", "photo")
+ and "quotation-of" in properties
+ ):
+ return f"{post_type}/clip"
+ return post_type
+ content = ""
+ try:
+ content = _get_first_non_empty(properties["content"])
+ except KeyError:
+ try:
+ content = _get_first_non_empty(properties["summary"])
+ except KeyError:
+ return "note"
+ name = ""
+ try:
+ name = _get_first_non_empty(properties["name"])
+ except KeyError:
+ return "note"
+ if name:
+ try:
+ content = dict(content)
+ except ValueError:
+ text_content = content
+ else:
+ text_content = bs4.BeautifulSoup(content["html"].strip()).text
+ if not text_content.startswith(name):
+ return "article"
+ return "note"
+
+
+def _get_first_item(mf2json: dict, item_type: set):
+ """Return the first object(s) of given item_type(s) (eg. h-entry, h-event)."""
+ return next(_yield_all_items(mf2json, item_type, False), None)
+
+
+def _get_all_items(mf2json: dict, item_type: set, include_props=False):
+ """Return all object(s) of given item_type(s) (eg. h-entry, h-event)."""
+ return list(_yield_all_items(mf2json, item_type, include_props))
+
+
+def _yield_all_items(mf2json: dict, item_type: set, include_props: bool):
+ """
+ Yield objects(s) of given item_type(s) in breadth first search.
+
+ Traverses the top-level items and their children and descendents.
+ Includes property values (e.g. finding all h-cards would not find
+ values of "p-author h-card") only if `include_props` is True.
+
+ """
+ queue = collections.deque(item for item in mf2json["items"])
+ while queue:
+ item = queue.popleft()
+ if any(h_class in item.get("type", []) for h_class in item_type):
+ yield item
+ queue.extend(item.get("children", []))
+ if include_props:
+ queue.extend(
+ prop
+ for props in item.get("properties", {}).values()
+ for prop in props
+ if isinstance(prop, dict)
+ )
+
+
+def _get_normalized_urls(card, prop):
+ """Return a list of normalized URLs for an card's prop (uid/url)."""
+ urls = []
+ for url in card["properties"].get(prop, []):
+ try:
+ urls.append(easyuri.parse(url).minimized)
+ except ValueError:
+ pass
+ return urls
+
+
+def _get_first_non_empty(propval):
+ """
+ Return the first non-empty value in `propval`.
+
+ If `propval` is not a list and non-empty, return it.
+
+ """
+ if not isinstance(propval, list):
+ propval = [propval]
+ for content in propval:
+ if content:
+ return content
index 0000000..db2047d
--- /dev/null
+[tool.poetry]
+name = "microformats"
+version = "0.3.4"
+description = "tools for microformats production, consumption and analysis"
+readme = "README.md"
+keywords = ["IndieWeb", "microformats"]
+homepage = "https://ragt.ag/code/python-microformats"
+authors = ["Angelo Gladding <angelo@ragt.ag>"]
+license = "BSD-3-Clause"
+packages = [{include="mf.py"}]
+
+[tool.poetry.scripts]
+mf = "mf:main"
+
+[tool.poetry.dependencies]
+python = ">=3.8,<3.11"
+beautifulsoup4 = "^4.11.2"
+easyuri = ">=0.1.2"
+txtint = ">=0.1.2"
+mf2py = "^1.1.3"
+
+[tool.poetry.group.dev.dependencies]
+lxml = "^4.9.2"
+gmpg = {path="../gmpg", develop=true}
+easyuri = {path="../easyuri", develop=true}
+
+# [[tool.poetry.source]]
+# name = "main"
+# url = "https://ragt.ag/code/pypi"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
diff --git a/tests/authorship/h-card_with_u-url_equal_to_self.html b/tests/authorship/h-card_with_u-url_equal_to_self.html
new file mode 100644
index 0000000..1714677
--- /dev/null
+++ b/tests/authorship/h-card_with_u-url_equal_to_self.html
+<!DOCTYPE html>
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+ <title>h-card with u-url == self</title>
+ </head>
+ <body>
+
+ <div class="h-card">
+ <a class="u-url" href="no_h-card.html"></a>
+ <p class="p-name">Wrong One</p>
+ </div>
+
+ <div class="h-card">
+ <a class="u-url" href="h-card_with_u-url_equal_to_self.html">
+ <img class="u-photo" src="http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm">
+ </a>
+ <p class="p-name">John Doe</p>
+ </div>
+
+ </body>
+</html>
diff --git a/tests/authorship/h-card_with_u-url_equal_to_u-uid_equal_to_self.html b/tests/authorship/h-card_with_u-url_equal_to_u-uid_equal_to_self.html
new file mode 100644
index 0000000..6b77a67
--- /dev/null
+++ b/tests/authorship/h-card_with_u-url_equal_to_u-uid_equal_to_self.html
+<!DOCTYPE html>
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+ <title>h-card with u-url == u-uid == self</title>
+ </head>
+
+ <body>
+
+ <div class="h-card">
+ <a class="u-url" rel="me" href="h-card_with_u-url_equal_to_u-uid_equal_to_self.html"></a>
+ <p class="p-name">Wrong One</p>
+ </div>
+
+ <div class="h-card">
+ <a class="u-url u-uid" href="h-card_with_u-url_equal_to_u-uid_equal_to_self.html">
+ <img class="u-photo" src="http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm">
+ </a>
+ <p class="p-name">John Doe</p>
+ </div>
+
+ </body>
+</html>
diff --git a/tests/authorship/h-card_with_u-url_that_is_also_rel-me.html b/tests/authorship/h-card_with_u-url_that_is_also_rel-me.html
new file mode 100644
index 0000000..e1c062e
--- /dev/null
+++ b/tests/authorship/h-card_with_u-url_that_is_also_rel-me.html
+<!DOCTYPE html>
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+ <title>h-card with u-url == rel-me</title>
+ <link rel="me" href="h-card_with_u-url_that_is_also_rel-me.html">
+ </head>
+
+ <body>
+
+ <div class="h-card">
+ <a class="u-url" href="h-card_with_u-url_equal_to_u-uid_equal_to_self.html"></a>
+ <p class="p-name">Wrong One</p>
+ </div>
+
+ <div class="h-card">
+ <a class="u-url" href="h-card_with_u-url_that_is_also_rel-me.html">
+ <img class="u-photo" src="http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm">
+ </a>
+ <p class="p-name">John Doe</p>
+ </div>
+
+ </body>
+</html>
diff --git a/tests/authorship/h-entry_with_p-author_h-card.html b/tests/authorship/h-entry_with_p-author_h-card.html
new file mode 100644
index 0000000..1d37d3c
--- /dev/null
+++ b/tests/authorship/h-entry_with_p-author_h-card.html
+<!DOCTYPE html>
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+ <title>h-entry with p-author</title>
+ </head>
+
+ <body>
+
+ <div class="h-entry">
+ <div class="p-author h-card">
+ <a class="u-url" href="http://example.com/johndoe/">
+ <img class="u-photo" src="http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm">
+ </a>
+ <p class="p-name">John Doe</p>
+ </div>
+ <div class="p-name p-summary e-content">Hello World!</div>
+ </div>
+
+ </body>
+</html>
diff --git a/tests/authorship/h-entry_with_rel-author.html b/tests/authorship/h-entry_with_rel-author.html
new file mode 100644
index 0000000..363b546
--- /dev/null
+++ b/tests/authorship/h-entry_with_rel-author.html
+<!DOCTYPE html>
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+ <title>h-entry with rel-author</title>
+ <link rel="author" href="h-card_with_u-url_that_is_also_rel-me.html" />
+ </head>
+ <body>
+ <div class="h-entry">
+ <span class="p-name p-summary e-content">Hello World!</span>
+ </div>
+ </body>
+</html>
diff --git a/tests/authorship/h-entry_with_u-author.html b/tests/authorship/h-entry_with_u-author.html
new file mode 100644
index 0000000..a2d68d2
--- /dev/null
+++ b/tests/authorship/h-entry_with_u-author.html
+<!DOCTYPE>
+<html>
+ <head>
+ <title>h-entry with u-author pointing to an author page</title>
+ </head>
+ <body>
+
+ <div class="h-entry">
+ <a class="u-author" href="h-card_with_u-url_equal_to_self.html">Not Here</a>
+ <span class="p-name e-content">
+ Follow the u-author URL to find the full h-card
+ </span>
+ </div>
+
+ </body>
+</html>
diff --git a/tests/authorship/h-feed_with_p-author_h-card.html b/tests/authorship/h-feed_with_p-author_h-card.html
new file mode 100644
index 0000000..7532a79
--- /dev/null
+++ b/tests/authorship/h-feed_with_p-author_h-card.html
+<!DOCTYPE>
+<html>
+ <head>
+ <title>h-feed with p-author h-card</title>
+ </head>
+ <body>
+
+ <div class="h-feed">
+ <div class="p-author h-card">
+ <a class="u-url" href="http://example.com/johndoe/">
+ <img class="u-photo" src="http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm" />
+ <span class="p-name">John Doe</span>
+ </a>
+ </div>
+
+ <div class="h-entry">
+ <span class="p-name e-content">
+ First entry
+ </span>
+ </div>
+
+ <div class="h-entry">
+ <span class="p-name e-content">
+ Second entry
+ </span>
+ </div>
+
+ <div class="h-entry">
+ <span class="p-name e-content">
+ Third entry
+ </span>
+ </div>
+
+ </div>
+
+ </body>
+</html>
diff --git a/tests/authorship/h-feed_with_u-author.html b/tests/authorship/h-feed_with_u-author.html
new file mode 100644
index 0000000..7de4b3c
--- /dev/null
+++ b/tests/authorship/h-feed_with_u-author.html
+<!DOCTYPE>
+<html>
+ <head>
+ <title>h-feed with u-author</title>
+ </head>
+ <body>
+
+ <div class="h-feed">
+
+ <a class="u-author" href="h-card_with_u-url_equal_to_u-uid_equal_to_self.html">Not Here</a>
+
+ <div class="h-entry">
+ <span class="p-name e-content">
+ First entry
+ </span>
+ </div>
+
+ <div class="h-entry">
+ <span class="p-name e-content">
+ Second entry
+ </span>
+ </div>
+
+ <div class="h-entry">
+ <span class="p-name e-content">
+ Third entry
+ </span>
+ </div>
+
+ </div>
+
+ </body>
+</html>
diff --git a/tests/authorship/no_h-card.html b/tests/authorship/no_h-card.html
new file mode 100644
index 0000000..6f2d331
--- /dev/null
+++ b/tests/authorship/no_h-card.html
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>h-entry with p-author</title>
+</head>
+
+<body>
+ <p>Nothing to see here. Move along.</p>
+</body>
+</html>
index 0000000..76c9745
--- /dev/null
+{
+ "items": [
+ {
+ "properties": {
+ "name": [
+ "\nMarkup For People Focused Mobile Communication \n"
+ ],
+ "published": [
+ "2014-04-30T12:11"
+ ],
+ "uid": [
+ "http://tantek.com/2014/120/b1/markup-people-focused-mobile-communication"
+ ],
+ "url": [
+ "http://tantek.com/2014/120/b1/markup-people-focused-mobile-communication"
+ ],
+ "updated": [
+ "2014-04-30T12:11"
+ ],
+ "content": [
+ {
+ "html": "\n<p>\nAll functionality on web pages and applications starts with markup. The previous post in this series, <cite><a href=\"http://tantek.com/2014/084/b1/urls-people-focused-mobile-communication\">URLs For People Focused Mobile Communication</a></cite>, documented the various URL schemes for launching the communication apps shown in <a href=\"http://tantek.com/2014/067/b2/mockups-people-focused-mobile-communication\">the mockups</a>, as well as results of testing them on mobile devices. Those tests used minimal markup.\n</p>\n<p>\nThis post documents and explains that markup, building up element by element from a simple hyperlink to the structure implied by this mockup:\n</p>\n<p class=\"figure\"><img alt=\"mobile website icon header\" src=\"http://indiewebcamp.com/images/6/66/mobile-personal-home-contact-ios7.jpg\"/>\n</p>\n<p>\nOr if you want, you may jump directly to the <a href=\"120/b1/markup-people-focused-mobile-communication#comms-markup\">complete markup example</a>.\n</p>\n<h2 id=\"urls-comms-hyperlink\">A hyperlink</h2>\n<p>\nA hyperlink provides a way for the user to navigate to other web pages. Using a URL scheme for a communication app, a hyperlink can start a message, resume a conversation, or start an audio/video call. Here's a simple hyperlink that uses the first URL scheme documented in the previous post, <code>sms:</code>\n</p>\n<pre style=\"white-space:pre-wrap\"><code><a href=\"sms:user@example.com\">txt message</a>\n</code></pre>\n<p>\nLive example: <a href=\"sms:user@example.com\">txt message</a>\n</p>\n<p>\nActivating that live example likely won't do much, as <code>user@example.com</code> does not belong to anyone. <code>Example.com</code> is a domain registered purely for the purpose of examples like this one. To make this hyperlink work, you'd have to use a registered AppleID email address, which would send a txt on iOS, and fallback to email via phone provider on Android.\n</p>\n<h2>Action labels not app names</h2>\n<p>\nI use the link text \"<strong>txt message</strong>\" to indicate its user-centered function: the action of <em>creating a txt message</em>, from one human to another.\n</p>\n<p>\nContrast that with the mockup above (which I \"built\" using an iOS7 home screen folder), which uses the label \"Messages\", the name of the application it launches. \n</p>\n<p>\nThis deliberate change from \"Messages\" (application) to \"txt message\" (action) reflects the larger purpose of this exercise: <a href=\"http://tantek.com/2013/338/b1/people-focused-mobile-communication-experience\">people-focused rather than app-focused communication</a>. Subsequent labels follow a similar approach.\n</p>\n<h2>An image hyperlink</h2>\n<p>\nA simple text hyperlink is functional, yet does not provide the immediate association and recognition conveyed by the Messages icon in the mockup. There are two methods of providing an image hyperlink:\n</p>\n<ul>\n<li>An <code><img></code> element inside the hyperlink</li>\n<li>A CSS <code>background-image</code></li>\n</ul>\n<p>\nThe question of when to use markup for an image and when to use CSS is usually easily answered by the question: is the image meaningful <em>content</em> (like a photograph) or purely <em>decorative</em> (like a flourish)? Or by asking, is any meaning lost if the image is dropped?\n</p>\n<p>\nThe Messages image is neither content nor decorative. It's a button, and it's also a standard iOS user interface element, which means it <em>does</em> convey meaning to those users, above and beyond any text label. Here's the minimum markup for an image hyperlink, with the link text moved into the alt attribute as a fallback:\n</p>\n<pre style=\"white-space:pre-wrap\"><code><a href=\"sms:user@example.com\">\n <img src=\"ios7-messages-icon.png\" \n alt=\"txt message\"/>\n</a>\n</code></pre>\n<p>\nLive example: <br/>\n<a href=\"sms:user@example.com\"><img alt=\"txt message\" src=\"http://media.idownloadblog.com/wp-content/uploads/2013/06/Messages-icon-iOS-7.png\"/></a>\n</p>\n<h2>Image and text hyperlink</h2>\n<p>\nThere is a third option, as implied by the mockup, and that is to use both an image and a text label. That's a simple matter of moving the alt text outside the image:\n</p>\n<pre style=\"white-space:pre-wrap\"><code><a href=\"sms:user@example.com\">\n <img src=\"ios7-messages-icon.png\" \n alt=\"\"/>\n txt message\n</a>\n</code></pre>\n<p>\nLive example: <br/>\n<a href=\"sms:user@example.com\"><img alt=\"\" src=\"http://media.idownloadblog.com/wp-content/uploads/2013/06/Messages-icon-iOS-7.png\"/>\ntxt message</a>\n</p>\n<p>\nThe <code>alt</code> attribute is left deliberately empty since putting anything there would not add to the usability of the link, and could in fact detract from it.\n</p>\n<p>\nUnlike the mockup, the link text is next to (instead of underneath) the image, and is blue & underlined. These are all presentational aspects and will be addressed in the next post on CSS for People Focused Mobile Communication.\n</p>\n<h2>A list of communication options</h2>\n<p>\nThe mockup also shows multiple communication buttons in a list laid out as a grid. We can assign meaning to the order of the buttons - the site owner's preferred order of communication methods. Thus we use an ordered list to convey that their order is significant. Here's a few image+text links wrapped in list items inside an ordered list:\n</p>\n<pre style=\"white-space:pre-wrap\"><code><ol>\n <li><a href=\"sms:user@example.com\">\n <img src=\"ios7-messages-icon.png\" \n alt=\"\"/>\n txt message\n </a></li>\n <li><a href=\"fb-messenger://user-thread/4\">\n <img src=\"fb-messenger-icon.png\" \n alt=\"\"/>\n <abbr title=\"Facebook\">FB</abbr> message\n </a></li>\n <li><a href=\"aim:goim?screenname=tantekc&message=hi\">\n <img src=\"aim-icon.png\" \n alt=\"\"/>\n AIM chat\n </a></li>\n</ol>\n</code></pre>\n<p>\nNote the use of an <code><abbr></code> element to abbreviate \"Facebook\" just to \"FB\" to shorten the overall \"FB message\" link text.\n</p>\n<p>\nLive example: \n</p>\n<ol>\n<li><a href=\"sms:user@example.com\"><img alt=\"\" src=\"http://media.idownloadblog.com/wp-content/uploads/2013/06/Messages-icon-iOS-7.png\"/>\ntxt message</a></li>\n<li><a href=\"fb-messenger://user-thread/4\"><img alt=\"\" src=\"http://a1.mzstatic.com/us/r30/Purple/v4/c0/92/69/c09269c0-85ca-fd85-5f0f-f235dff13ff8/mzl.lyucgsnh.175x175-75.jpg\"/>\n<abbr title=\"Facebook\">FB</abbr> message</a></li>\n<li>\n<a href=\"aim:goim?screenname=tantekc&message=greetings+program\"><img alt=\"\" src=\"http://a1.mzstatic.com/us/r30/Purple6/v4/93/04/58/93045809-0d28-da1c-e5c8-a2966065b59d/mzl.onzjlxad.175x175-75.jpg\"/> AIM chat</a></li>\n</ol>\n<p>\nJust as in the previous URLs post, the FB message link uses Zuck's <abbr title=\"identifer\">ID</abbr>, and the AIM chat link uses the same nickname I've had in the sidebar for a while.\n</p>\n\n<h2>List heading</h2>\n\n<p>\nThe mockup labels the entire grid \"Contact\" (also deliberately chosen as an action, rather than the \"Contacts\" application). This makes sense as a heading, and in the context of a home page, a second level heading:\n</p>\n\n<pre style=\"white-space:pre-wrap\"><code><h2>Contact</h2>\n</code></pre>\n\n<p>No need for a separate live example - the subheads above are all <code><h2></code> elements. As is this one:</p>\n\n<h2 id=\"comms-markup\">Putting it all together</h2>\n\n<p>\nCombining the Contact heading with the previous ordered list, and adding the remaining buttons:\n</p>\n\n<pre style=\"white-space:pre-wrap\"><code><h2>Contact</h2>\n<ol>\n <li><a href=\"sms:<b style=\"color:orange\">user@example.com</b>\">\n <img src=\"ios7-messages-icon.png\" \n alt=\"\"/>\n txt message\n </a></li>\n <li><a href=\"fb-messenger://user-thread/<b style=\"color:orange\">4</b>\">\n <img src=\"fb-messenger-icon.png\" \n alt=\"\"/>\n <abbr title=\"Facebook\">FB</abbr> message\n </a></li>\n <li><a href=\"aim:goim?screenname=<b style=\"color:orange\">tantekc</b>&message=hi\">\n <img src=\"aim-icon.png\" \n alt=\"\"/>\n AIM chat\n </a></li>\n <li><a href=\"facetime:<b style=\"color:orange\">user@example.com</b>\">\n <img src=\"facetime-icon.png\" \n alt=\"\"/>\n FaceTime call\n </a></li>\n <li><a href=\"skype:<b style=\"color:orange\">echo123</b>?call\">\n <img src=\"skype-icon.png\" \n alt=\"\"/>\n Skype call\n </a></li>\n <li><a href=\"https://mobile.twitter.com/<b style=\"color:orange\">t</b>/messages\">\n <img src=\"twitter-dm-icon.png\" \n alt=\"\"/>\n Twitter <abbr title=\"Direct Message\">DM</abbr>\n </a></li>\n</ol>\n</code></pre>\n\n<p>\nIn this final code example I've highlighted (using orange bold tags), the key pieces you need to change to your own identifiers on each service.\n</p>\n\n<p>\nLive example once more, including heading:\n</p>\n\n<h2>Contact</h2>\n<ol>\n<li><a href=\"sms:user@example.com\"><img alt=\"\" src=\"http://media.idownloadblog.com/wp-content/uploads/2013/06/Messages-icon-iOS-7.png\"/>\ntxt message</a></li>\n<li><a href=\"fb-messenger://user-thread/4\"><img alt=\"\" src=\"http://a1.mzstatic.com/us/r30/Purple/v4/c0/92/69/c09269c0-85ca-fd85-5f0f-f235dff13ff8/mzl.lyucgsnh.175x175-75.jpg\"/>\n<abbr title=\"Facebook\">FB</abbr> message</a></li>\n<li>\n<a href=\"aim:goim?screenname=tantekc&message=greetings+program\"><img alt=\"\" src=\"http://a1.mzstatic.com/us/r30/Purple6/v4/93/04/58/93045809-0d28-da1c-e5c8-a2966065b59d/mzl.onzjlxad.175x175-75.jpg\"/> AIM chat</a></li>\n<li>\n<a href=\"facetime:user@example.com\"><img alt=\"\" src=\"http://www.downloadios7.org/wp-content/uploads/apple_facetime_ios_7_logo.png\"/> FaceTime call</a></li>\n\n<li>\n<a href=\"skype:echo123?call\"><img alt=\"\" src=\"http://a1.mzstatic.com/us/r30/Purple4/v4/56/a2/b7/56a2b7a7-426d-cb33-3d11-41b1259dab89/mzl.zcfukhdi.175x175-75.jpg\"/> Skype call</a></li>\n\n<li>\n<a href=\"https://mobile.twitter.com/t/messages\"><img alt=\"\" src=\"https://ma.twimg.com/twitter-mobile/52e2205d8630c1980f88fe6357cacae3d7772a7e/images/apple-touch-icon-114.png\"/> Twitter DM</a></li>\n\n</ol>\n\n<p>\nI dropped the Google Hangouts icon since that application lacks support for any URL schemes (as noted in the previous post). Also I've re-ordered a bit from the mockup, having found that I prefer FaceTime over Skype. Pick your own from among the <a href=\"http://tantek.com/2014/084/b1/urls-people-focused-mobile-communication\">documented URL schemes</a>, and order them to your preference.\n</p>\n\n<h2 id=\"next-markup-steps\">Next Steps</h2>\n\n<p>\nAll the essential structure is there, yet it clearly needs some CSS. There's plenty to fix from inconsistent image sizes (all but the Messages & FaceTime icons are from Apple's iTunes store web pages), to blue underlined link text. And there's plenty to clean up to approach the look of the mockup: from the clustered center-aligned image+text button layout, to the grid layout of the buttons, to white text on the gray rounded corner ordered list background.</p>\n<p>\nThat's all for the next post in <a href=\"http://tantek.com/2014/067/b1/building-blocks-people-focused-mobile-communication\">this series</a>.\n</p>\n\n",
+ "value": "\n\nAll functionality on web pages and applications starts with markup. The previous post in this series, URLs For People Focused Mobile Communication, documented the various URL schemes for launching the communication apps shown in the mockups, as well as results of testing them on mobile devices. Those tests used minimal markup.\n\n\nThis post documents and explains that markup, building up element by element from a simple hyperlink to the structure implied by this mockup:\n\n\n\n\nOr if you want, you may jump directly to the complete markup example.\n\nA hyperlink\n\nA hyperlink provides a way for the user to navigate to other web pages. Using a URL scheme for a communication app, a hyperlink can start a message, resume a conversation, or start an audio/video call. Here's a simple hyperlink that uses the first URL scheme documented in the previous post, sms:\n\n<a href=\"sms:user@example.com\">txt message</a>\n\n\nLive example: txt message\n\n\nActivating that live example likely won't do much, as user@example.com does not belong to anyone. Example.com is a domain registered purely for the purpose of examples like this one. To make this hyperlink work, you'd have to use a registered AppleID email address, which would send a txt on iOS, and fallback to email via phone provider on Android.\n\nAction labels not app names\n\nI use the link text \"txt message\" to indicate its user-centered function: the action of creating a txt message, from one human to another.\n\n\nContrast that with the mockup above (which I \"built\" using an iOS7 home screen folder), which uses the label \"Messages\", the name of the application it launches. \n\n\nThis deliberate change from \"Messages\" (application) to \"txt message\" (action) reflects the larger purpose of this exercise: people-focused rather than app-focused communication. Subsequent labels follow a similar approach.\n\nAn image hyperlink\n\nA simple text hyperlink is functional, yet does not provide the immediate association and recognition conveyed by the Messages icon in the mockup. There are two methods of providing an image hyperlink:\n\n\nAn <img> element inside the hyperlink\nA CSS background-image\n\n\nThe question of when to use markup for an image and when to use CSS is usually easily answered by the question: is the image meaningful content (like a photograph) or purely decorative (like a flourish)? Or by asking, is any meaning lost if the image is dropped?\n\n\nThe Messages image is neither content nor decorative. It's a button, and it's also a standard iOS user interface element, which means it does convey meaning to those users, above and beyond any text label. Here's the minimum markup for an image hyperlink, with the link text moved into the alt attribute as a fallback:\n\n<a href=\"sms:user@example.com\">\n <img src=\"ios7-messages-icon.png\" \n alt=\"txt message\"/>\n</a>\n\n\nLive example: \n\n\nImage and text hyperlink\n\nThere is a third option, as implied by the mockup, and that is to use both an image and a text label. That's a simple matter of moving the alt text outside the image:\n\n<a href=\"sms:user@example.com\">\n <img src=\"ios7-messages-icon.png\" \n alt=\"\"/>\n txt message\n</a>\n\n\nLive example: \n\ntxt message\n\n\nThe alt attribute is left deliberately empty since putting anything there would not add to the usability of the link, and could in fact detract from it.\n\n\nUnlike the mockup, the link text is next to (instead of underneath) the image, and is blue & underlined. These are all presentational aspects and will be addressed in the next post on CSS for People Focused Mobile Communication.\n\nA list of communication options\n\nThe mockup also shows multiple communication buttons in a list laid out as a grid. We can assign meaning to the order of the buttons - the site owner's preferred order of communication methods. Thus we use an ordered list to convey that their order is significant. Here's a few image+text links wrapped in list items inside an ordered list:\n\n<ol>\n <li><a href=\"sms:user@example.com\">\n <img src=\"ios7-messages-icon.png\" \n alt=\"\"/>\n txt message\n </a></li>\n <li><a href=\"fb-messenger://user-thread/4\">\n <img src=\"fb-messenger-icon.png\" \n alt=\"\"/>\n <abbr title=\"Facebook\">FB</abbr> message\n </a></li>\n <li><a href=\"aim:goim?screenname=tantekc&message=hi\">\n <img src=\"aim-icon.png\" \n alt=\"\"/>\n AIM chat\n </a></li>\n</ol>\n\n\nNote the use of an <abbr> element to abbreviate \"Facebook\" just to \"FB\" to shorten the overall \"FB message\" link text.\n\n\nLive example: \n\n\n\ntxt message\n\nFB message\n\n AIM chat\n\n\nJust as in the previous URLs post, the FB message link uses Zuck's ID, and the AIM chat link uses the same nickname I've had in the sidebar for a while.\n\n\nList heading\n\n\nThe mockup labels the entire grid \"Contact\" (also deliberately chosen as an action, rather than the \"Contacts\" application). This makes sense as a heading, and in the context of a home page, a second level heading:\n\n\n<h2>Contact</h2>\n\n\nNo need for a separate live example - the subheads above are all <h2> elements. As is this one:\n\nPutting it all together\n\n\nCombining the Contact heading with the previous ordered list, and adding the remaining buttons:\n\n\n<h2>Contact</h2>\n<ol>\n <li><a href=\"sms:user@example.com\">\n <img src=\"ios7-messages-icon.png\" \n alt=\"\"/>\n txt message\n </a></li>\n <li><a href=\"fb-messenger://user-thread/4\">\n <img src=\"fb-messenger-icon.png\" \n alt=\"\"/>\n <abbr title=\"Facebook\">FB</abbr> message\n </a></li>\n <li><a href=\"aim:goim?screenname=tantekc&message=hi\">\n <img src=\"aim-icon.png\" \n alt=\"\"/>\n AIM chat\n </a></li>\n <li><a href=\"facetime:user@example.com\">\n <img src=\"facetime-icon.png\" \n alt=\"\"/>\n FaceTime call\n </a></li>\n <li><a href=\"skype:echo123?call\">\n <img src=\"skype-icon.png\" \n alt=\"\"/>\n Skype call\n </a></li>\n <li><a href=\"https://mobile.twitter.com/t/messages\">\n <img src=\"twitter-dm-icon.png\" \n alt=\"\"/>\n Twitter DM\n </a></li>\n</ol>\n\n\n\nIn this final code example I've highlighted (using orange bold tags), the key pieces you need to change to your own identifiers on each service.\n\n\n\nLive example once more, including heading:\n\n\nContact\n\n\ntxt message\n\nFB message\n\n AIM chat\n\n FaceTime call\n\n\n Skype call\n\n\n Twitter DM\n\n\n\n\nI dropped the Google Hangouts icon since that application lacks support for any URL schemes (as noted in the previous post). Also I've re-ordered a bit from the mockup, having found that I prefer FaceTime over Skype. Pick your own from among the documented URL schemes, and order them to your preference.\n\n\nNext Steps\n\n\nAll the essential structure is there, yet it clearly needs some CSS. There's plenty to fix from inconsistent image sizes (all but the Messages & FaceTime icons are from Apple's iTunes store web pages), to blue underlined link text. And there's plenty to clean up to approach the look of the mockup: from the clustered center-aligned image+text button layout, to the grid layout of the buttons, to white text on the gray rounded corner ordered list background.\n\nThat's all for the next post in this series.\n\n\n"
+ }
+ ],
+ "author": [
+ {
+ "value": "",
+ "properties": {
+ "name": [
+ "Tantek \u00c7elik"
+ ],
+ "photo": [
+ "http://tantek.com/logo.jpg"
+ ],
+ "url": [
+ "http://tantek.com/"
+ ]
+ },
+ "type": [
+ "h-card"
+ ]
+ }
+ ]
+ },
+ "type": [
+ "h-entry",
+ "h-as-article"
+ ]
+ }
+ ],
+ "rels": {
+ "author": [
+ "http://tantek.com/",
+ "http://tantek.com/"
+ ],
+ "next": [
+ "http://tantek.com/2014/120/t1/great-wdc14-talk-removed-twitter-follow-button-js"
+ ],
+ "home": [
+ "http://tantek.com/"
+ ],
+ "prev": [
+ "http://tantek.com/2014/118/t2/indiewebcamp-nyc-lunch-sponsor-dinner"
+ ],
+ "hub": [
+ "http://pubsubhubbub.appspot.com/"
+ ],
+ "webmention": [
+ "http://webmention.io/tantek.com/webmention"
+ ]
+ },
+ "alternates": [
+ {
+ "type": "application/atom+xml",
+ "url": "http://tantek.com/updates.atom",
+ "rel": "home"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/tests/interpret/article_no_p-name.json b/tests/interpret/article_no_p-name.json
new file mode 100644
index 0000000..56caafb
--- /dev/null
+++ b/tests/interpret/article_no_p-name.json
+{
+ "alternates": [
+ {
+ "rel": "feed",
+ "type": "application/rss+xml",
+ "url": "http://werd.io/2014/egg-brooklyn?_t=rss"
+ },
+ {
+ "rel": "feed",
+ "type": "application/rss+xml",
+ "url": "http://werd.io/content/all?_t=rss"
+ }
+ ],
+ "items": [
+ {
+ "children": [
+ {
+ "properties": {
+ "name": [
+ ""
+ ],
+ "photo": [
+ "http://werd.io/gfx/users/default-00.png"
+ ],
+ "url": [
+ "\n"
+ ]
+ },
+ "type": [
+ "h-card"
+ ],
+ "value": "\n\n\n"
+ }
+ ],
+ "properties": {
+ "author": [
+ {
+ "properties": {
+ "name": [
+ "Ben Werdm\u00fcller"
+ ],
+ "photo": [
+ "http://werd.io/file/538d0a4cbed7de5111a1ad31/thumb.jpg"
+ ],
+ "url": [
+ "http://werd.io/profile/benwerd",
+ "http://werd.io/profile/benwerd",
+ "http://werd.io/profile/benwerd"
+ ]
+ },
+ "type": [
+ "h-card"
+ ],
+ "value": "\n\nBen Werdm\u00fcller\n\n\n"
+ }
+ ],
+ "content": [
+ {
+ "html": "\n<h2 class=\"p-photo\"><a href=\"http://werd.io/2014/egg-brooklyn\">Egg, Brooklyn</a></h2>\n<p style=\"text-align: center\">\n<a href=\"http://werd.io/file/541c3accbed7ded9797e59b2/IMG_20140919_101510.jpg\"><img class=\"u-photo\" src=\"http://werd.io/file/541c3accbed7ded9797e59bb/thumb.jpg\"/></a>\n</p>\n<p>Give me crayons and I will draw a rocketship. Fact.</p>\n",
+ "value": "\nEgg, Brooklyn\n\n\n\nGive me crayons and I will draw a rocketship. Fact.\n"
+ }
+ ],
+ "name": [
+ "Ben Werdm\u00fcller\n\n\n\n\u00a0\n\n\n\n\n2014-09-19T14:16:45+00:00\n\n\n\n\nEgg, Brooklyn\n\n\n\nGive me crayons and I will draw a rocketship. Fact.\n\n\n\n\n\n\n\n 1 star\n 1 comment\n\n\n\n\n\n\n\n\n\n\n\n\n\nsorry to break it to you, that is a squid.\nKyle Mahan,\n Sep 20 2014\n on kylewm.com\n\n\n\n\n\n\n\n\n\n\nDavid Walker\n liked this post\n \nSep 20 2014 on facebook.com\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n $(document).ready(function () {\n\n $('#extrafield').html('<input type=\"hidden\" name=\"validator\" value=\"http://werd.io/2014/egg-brooklyn\">');\n\n })\n \n\n\n\n Also on:\n flickr facebook"
+ ],
+ "photo": [
+ "Egg, Brooklyn",
+ "http://werd.io/file/541c3accbed7ded9797e59bb/thumb.jpg"
+ ],
+ "published": [
+ "2014-09-19T14:16:45+0000"
+ ],
+ "syndication": [
+ "https://www.flickr.com/photos/benwerd/15264900606/",
+ "https://facebook.com/10100928029673259"
+ ],
+ "url": [
+ "http://werd.io/2014/egg-brooklyn"
+ ]
+ },
+ "type": [
+ "h-entry"
+ ]
+ }
+ ],
+ "rels": {
+ "apple-touch-icon": [
+ "http://werd.io/gfx/logos/apple-icon-57x57.png",
+ "http://werd.io/gfx/logos/apple-icon-72x72.png",
+ "http://werd.io/gfx/logos/apple-icon-114x114.png",
+ "http://werd.io/gfx/logos/apple-icon-144x144.png",
+ "http://werd.io/file/538d0a4cbed7de5111a1ad31/thumb.jpg"
+ ],
+ "apple-touch-icon-precomposed": [
+ "http://werd.io/file/538d0a4cbed7de5111a1ad31/thumb.jpg"
+ ],
+ "author": [
+ "http://werd.io/humans.txt"
+ ],
+ "authorization_endpoint": [
+ "https://indieauth.com/auth"
+ ],
+ "feed": [
+ "http://werd.io/content/all"
+ ],
+ "http://webmention.org/": [
+ "http://werd.io/webmention/"
+ ],
+ "hub": [
+ "http://benwerd.superfeedr.com/"
+ ],
+ "icon": [
+ "http://werd.io/file/538d0a4cbed7de5111a1ad31/thumb.jpg"
+ ],
+ "micropub": [
+ "http://werd.io/micropub/endpoint"
+ ],
+ "openid.delegate": [
+ "http://werd.io/"
+ ],
+ "openid.server": [
+ "https://indieauth.com/openid"
+ ],
+ "permalink": [
+ "http://werd.io/2014/egg-brooklyn"
+ ],
+ "schema.DC": [
+ "http://purl.org/dc/elements/1.1/"
+ ],
+ "shortcut": [
+ "http://werd.io/file/538d0a4cbed7de5111a1ad31/thumb.jpg"
+ ],
+ "stylesheet": [
+ "http://werd.io/external/bootstrap/assets/css/bootstrap.css",
+ "http://werd.io/external/font-awesome/css/font-awesome.min.css",
+ "http://werd.io/external/bootstrap/assets/css/bootstrap-responsive.css",
+ "http://werd.io/css/default.css",
+ "http://cdn.leafletjs.com/leaflet-0.5/leaflet.css",
+ "http://werd.io/styles/site/",
+ "http://werd.io/Themes/Cherwell/css/default.css",
+ "http://werd.io/external/mediaelement/build/mediaelementplayer.css",
+ "http://werd.io/external/summernote/dist/summernote.css",
+ "http://werd.io/external/mention/recommended-styles.css"
+ ],
+ "syndication": [
+ "https://www.flickr.com/photos/benwerd/15264900606/",
+ "https://facebook.com/10100928029673259"
+ ],
+ "token_endpoint": [
+ "http://werd.io/indieauth/token"
+ ],
+ "webmention": [
+ "http://werd.io/webmention/"
+ ]
+ }
+}
index 0000000..8a6b419
--- /dev/null
+{
+ "items": [
+ {
+ "type": [
+ "h-entry"
+ ],
+ "properties": {
+ "name": [
+ "foo"
+ ],
+ "content": [
+ "Поч"
+ ]
+ }
+ }
+ ]
+}
index 0000000..759e89d
--- /dev/null
+{
+ "items": [
+ {
+ "properties": {
+ "name": [
+ "Test Article with Two Published Dates"
+ ],
+ "published": [
+ "2014-04-30T12:11:00-0800",
+ "2014-04-30T12:11:00-0800"
+ ],
+ "content": [
+ {
+ "html": "",
+ "value": ""
+ }
+ ],
+ "author": [
+ {
+ "value": "",
+ "properties": {
+ "name": [
+ "Aaron Parecki"
+ ],
+ "url": [
+ "http://aaronparecki.com/"
+ ]
+ },
+ "type": [
+ "h-card"
+ ]
+ }
+ ]
+ },
+ "type": [
+ "h-entry",
+ "h-as-article"
+ ]
+ }
+ ],
+ "rels": {
+ },
+ "alternates": [
+ {
+ }
+ ]
+}
index 0000000..940f232
--- /dev/null
+{
+ "items": [
+ {
+ "properties": {
+ "name": [
+ "I follow thee"
+ ],
+ "published": [
+ "2014-05-05T10:10:53-07:00"
+ ],
+ "author": [
+ {
+ "value": "Ryan Barrett",
+ "properties": {
+ "name": [
+ "Ryan Barrett"
+ ],
+ "photo": [
+ "https://secure.gravatar.com/avatar/947b5f3f323da0ef785b6f02d9c265d6?s=96&d=https%3A%2F%2Fsecure.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&r=G"
+ ],
+ "url": [
+ "http://snarfed.org/"
+ ]
+ },
+ "type": [
+ "h-card"
+ ]
+ }
+ ],
+ "follow-of": [
+ "http://other/person"
+ ],
+ "url": [
+ "https://snarfed.org/2014-05-05_follow"
+ ],
+ "content": [
+ {
+ "html": "I follow thee",
+ "value": "I follow thee"
+ }
+ ]
+ },
+ "type": [
+ "h-entry"
+ ]
+ }
+ ]
+}
diff --git a/tests/interpret/hwc-event.json b/tests/interpret/hwc-event.json
new file mode 100644
index 0000000..d39212c
--- /dev/null
+++ b/tests/interpret/hwc-event.json
+{
+ "alternates": [
+ {
+ "type": "application/rss+xml",
+ "url": "http://werd.io/2014/homebrew-website-club-4?_t=rss",
+ "rel": "feed"
+ },
+ {
+ "type": "application/rss+xml",
+ "url": "http://werd.io/content/all?_t=rss",
+ "rel": "feed"
+ }
+ ],
+ "items": [
+ {
+ "type": [
+ "h-card"
+ ],
+ "properties": {
+ "photo": [
+ "http://werd.io/file/52be39babed7deb701668dd8"
+ ],
+ "name": [
+ "Ben Werdm\u00fcller"
+ ],
+ "url": [
+ "http://werd.io/profile/benwerd",
+ "http://werd.io/profile/benwerd"
+ ]
+ }
+ },
+ {
+ "type": [
+ "h-event"
+ ],
+ "properties": {
+ "published": [
+ "2014-05-05T16:34:30+00:00"
+ ],
+ "end": [
+ "2014-05-07T19:30:00+00:00"
+ ],
+ "start": [
+ "2014-05-07T18:30:00+00:00"
+ ],
+ "content": [
+ {
+ "value": "\n \n \n Homebrew Website Club\n \n \n \n Discuss progress; meet up; make new friends. \n \n Location: Mozilla SF, 1st floor, 2 Harrison st. (at Embarcadero), San Francisco, CA \n \n \n Time: May 7th, 6:30pm\n \n \n Ends: May 7th, 7:30pm\n \n \n\n \nAre you building your own website? Indie reader? Personal publishing web app? Or some other digital magic-cloud proxy? If so, come on by and join a gathering of people with like-minded interests. Bring your friends that want to start a personal web site. Exchange information, swap ideas, talk shop, help work on a project ...\n\nSee the Homebrew Website Club Newsletter Volume 1 Issue 1 for a description of the first meeting.\n\nOriginally posted on indiewebcamp.com. There's also a companion event at Mozilla Portland.\n\nHere's the Facebook event, if you prefer.\n ",
+ "html": "\n <div>\n <h2 class=\"p-name\">\n <a class=\"u-url\" href=\"http://werd.io/2014/homebrew-website-club-4\">Homebrew Website Club</a>\n </h2>\n <div class=\"well\">\n <p class=\"p-summary\">\n Discuss progress; meet up; make new friends. </p>\n <p>\n Location: <span class=\"p-location\">Mozilla SF, 1st floor, 2 Harrison st. (at Embarcadero), San Francisco, CA </span>\n </p>\n <p>\n Time: <time class=\"dt-start\" datetime=\"2014-05-07T18:30:00+00:00\">May 7th, 6:30pm</time>\n </p>\n <p>\n Ends: <time class=\"dt-end\" datetime=\"2014-05-07T19:30:00+00:00\">May 7th, 7:30pm</time>\n </p>\n </div>\n\n \n<p>Are you building your own website? Indie reader? Personal publishing web app? Or some other digital magic-cloud proxy? If so, come on by and join a gathering of people with like-minded interests. Bring your friends that want to start a personal web site. Exchange information, swap ideas, talk shop, help work on a project ...</p>\n\n<p><a href=\"http://tantek.com/2013/332/b1/homebrew-website-club-newsletter\">See the Homebrew Website Club Newsletter Volume 1 Issue 1 for a description of the first meeting.</a></p>\n\n<p><a href=\"http://indiewebcamp.com/events/2014-05-07-homebrew-website-club\">Originally posted on indiewebcamp.com</a>. There's also a companion event at Mozilla Portland.</p>\n\n<p><a class=\"u-syndication\" href=\"https://www.facebook.com/events/1430990723825351/\" rel=\"syndication\">Here's the Facebook event, if you prefer</a>.</p>\n</div> "
+ }
+ ],
+ "location": [
+ "Mozilla SF, 1st floor, 2 Harrison st. (at Embarcadero), San Francisco, CA "
+ ],
+ "author": [
+ {
+ "value": "\n \n Ben Werdm\u00fcller\n \n ",
+ "type": [
+ "h-card"
+ ],
+ "properties": {
+ "photo": [
+ "http://werd.io/file/52be39babed7deb701668dd8"
+ ],
+ "logo": [
+ "http://werd.io/file/52be39babed7deb701668dd8"
+ ],
+ "name": [
+ "Ben Werdm\u00fcller"
+ ],
+ "url": [
+ "http://werd.io/profile/benwerd",
+ "http://werd.io/profile/benwerd"
+ ]
+ }
+ }
+ ],
+ "name": [
+ "\n Homebrew Website Club\n "
+ ],
+ "syndication": [
+ "https://www.facebook.com/events/1430990723825351/"
+ ],
+ "summary": [
+ "\n Discuss progress; meet up; make new friends. "
+ ],
+ "url": [
+ "http://werd.io/2014/homebrew-website-club-4",
+ "http://werd.io/2014/homebrew-website-club-4"
+ ]
+ }
+ }
+ ],
+ "rels": {
+ "http://webmention.org/": [
+ "http://werd.io/webmention/"
+ ],
+ "icon": [
+ "http://werd.io/file/52be39babed7deb701668dd8"
+ ],
+ "syndication": [
+ "https://www.facebook.com/events/1430990723825351/"
+ ],
+ "stylesheet": [
+ "http://werd.io/external/bootstrap/assets/css/bootstrap.css",
+ "http://werd.io/external/font-awesome/css/font-awesome.min.css",
+ "http://werd.io/external/bootstrap/assets/css/bootstrap-responsive.css",
+ "http://werd.io/css/default.css",
+ "http://werd.io/IdnoPlugins/Checkin/external/leaflet/leaflet.css",
+ "http://werd.io/styles/site/"
+ ],
+ "webmention": [
+ "http://werd.io/webmention/"
+ ],
+ "feed": [
+ "http://werd.io/content/all"
+ ],
+ "shortcut": [
+ "http://werd.io/file/52be39babed7deb701668dd8"
+ ],
+ "author": [
+ "http://werd.io/humans.txt"
+ ],
+ "apple-touch-icon-precomposed": [
+ "http://werd.io/file/52be39babed7deb701668dd8"
+ ],
+ "permalink": [
+ "http://werd.io/2014/homebrew-website-club-4"
+ ]
+ }
+}
\ No newline at end of file
diff --git a/tests/interpret/location_h-adr.json b/tests/interpret/location_h-adr.json
new file mode 100644
index 0000000..4520d46
--- /dev/null
+++ b/tests/interpret/location_h-adr.json
+{
+ "items": [
+ {
+ "type": [
+ "h-entry"
+ ],
+ "properties": {
+ "adr": [
+ {
+ "type": [
+ "h-adr"
+ ],
+ "properties": {
+ "street-address": ["17 Austerstræti"],
+ "locality": ["Reykjavík"],
+ "country-name": ["Iceland"],
+ "postal-code": ["107"],
+ "name": ["17 Austerstræti Reykjavík Iceland 107"]
+ }
+ }
+ ]
+ }
+ }
+ ]
+}
diff --git a/tests/interpret/location_h-card.json b/tests/interpret/location_h-card.json
new file mode 100644
index 0000000..ec11887
--- /dev/null
+++ b/tests/interpret/location_h-card.json
+{
+ "items": [
+ {
+ "type": [
+ "h-entry"
+ ],
+ "properties": {
+ "location": [
+ {
+ "type": [
+ "h-card"
+ ],
+ "properties": {
+ "name": ["Timeless Coffee Roasters"],
+ "latitude": ["37.83"],
+ "longitude": ["-122.25"]
+ }
+ }
+ ]
+ }
+ }
+ ]
+}
diff --git a/tests/interpret/location_h-geo.json b/tests/interpret/location_h-geo.json
new file mode 100644
index 0000000..6e9722c
--- /dev/null
+++ b/tests/interpret/location_h-geo.json
+{
+ "items": [
+ {
+ "type": [
+ "h-entry"
+ ],
+ "properties": {
+ "geo": [
+ {
+ "type": [
+ "h-geo"
+ ],
+ "properties": {
+ "altitude": ["123.0"],
+ "latitude": ["37.83"],
+ "longitude": ["-122.25"]
+ }
+ }
+ ]
+ }
+ }
+ ]
+}
index 0000000..940969b
--- /dev/null
+{
+ "items": [
+ {
+ "type": [
+ "h-entry"
+ ],
+ "properties": {
+ "latitude": ["37.83"],
+ "longitude": ["-122.25"]
+ }
+ }
+ ]
+}
index 0000000..a0f6b28
--- /dev/null
+{
+ "items": [
+ {
+ "properties": {
+ "author": [
+ {
+ "properties": {
+ "name": [
+ "Kyle Mahan"
+ ],
+ "photo": [
+ "https://kylewm.com/static/img/users/kyle.jpg"
+ ],
+ "url": [
+ "https://kylewm.com"
+ ]
+ },
+ "type": [
+ "h-card"
+ ],
+ "value": "Kyle Mahan"
+ }
+ ],
+ "category": [
+ "indieweb"
+ ],
+ "comment": [
+ {
+ "properties": {
+ "author": [
+ {
+ "properties": {
+ "name": [
+ "Aaron Parecki"
+ ],
+ "photo": [
+ "https://twitter.com/aaronpk/profile_image?size=original"
+ ],
+ "url": [
+ "http://aaronparecki.com"
+ ]
+ },
+ "type": [
+ "h-card"
+ ],
+ "value": "Aaron Parecki"
+ }
+ ],
+ "content": [
+ {
+ "html": "\n <a href=\"https://twitter.com/kylewmahan\">@kylewmahan</a> I usually click through a couple levels up looking to see if any of the URLs up the chain show comments <a href=\"https://twitter.com/search?q=%23indieweb\">#indieweb</a>\n ",
+ "value": "\n @kylewmahan I usually click through a couple levels up looking to see if any of the URLs up the chain show comments #indieweb\n "
+ }
+ ],
+ "name": [
+ "Aaron Parecki\n \n\n \n @kylewmahan I usually click through a couple levels up looking to see if any of the URLs up the chain show comments #indieweb\n \n \n on twitter.com"
+ ],
+ "published": [
+ "2015-10-05T17:07:20-07:00"
+ ],
+ "url": [
+ "https://twitter.com/aaronpk/status/651186942114050050"
+ ]
+ },
+ "type": [
+ "h-cite"
+ ],
+ "value": "Aaron Parecki\n \n\n \n @kylewmahan I usually click through a couple levels up looking to see if any of the URLs up the chain show comments #indieweb\n \n \n on twitter.com"
+ }
+ ],
+ "content": [
+ {
+ "html": "\n <p>big thing missing from my <a href=\"/tags/indieweb\">#indieweb</a> experience is being able to see other people\u2019s comments before replying. tough problem to solve.</p>\n \n ",
+ "value": "\n big thing missing from my #indieweb experience is being able to see other people\u2019s comments before replying. tough problem to solve.\n \n "
+ }
+ ],
+ "like": [
+ {
+ "properties": {
+ "author": [
+ {
+ "properties": {
+ "name": [
+ ""
+ ],
+ "photo": [
+ "https://kylewm.com/imageproxy?url=https%3A%2F%2Ftwitter.com%2Fbenwerd%2Fprofile_image%3Fsize%3Doriginal&size=48&sig=fde7ce5635f5ea132a2545ff5c7d3d33"
+ ],
+ "url": [
+ "https://twitter.com/benwerd"
+ ]
+ },
+ "type": [
+ "h-card"
+ ],
+ "value": ""
+ }
+ ],
+ "name": [
+ ""
+ ],
+ "url": [
+ "https://twitter.com/kylewmahan/status/651186266701107200"
+ ]
+ },
+ "type": [
+ "h-cite"
+ ],
+ "value": "https://twitter.com/kylewmahan/status/651186266701107200"
+ }
+ ],
+ "name": [
+ "big thing missing from my #indieweb experience is being able to see other people\u2019s comments before replying. tough problem to solve."
+ ],
+ "published": [
+ "2015-10-05T17:04:35-07:00"
+ ],
+ "shortlink": [
+ "https://kylewm.com/n/4d_1"
+ ],
+ "syndication": [
+ "https://twitter.com/kylewmahan/status/651186266701107200"
+ ],
+ "uid": [
+ "https://kylewm.com/2015/10/big-thing-missing-from-my-indieweb-experience-is"
+ ],
+ "url": [
+ "https://kylewm.com/2015/10/big-thing-missing-from-my-indieweb-experience-is",
+ "https://kylewm.com/n/4d_1"
+ ]
+ },
+ "type": [
+ "h-entry"
+ ]
+ }
+ ],
+ "rel-urls": {
+ "https://indieauth.com/auth": {
+ "rels": [
+ "authorization_endpoint"
+ ],
+ "text": ""
+ },
+ "https://indieauth.com/openid": {
+ "rels": [
+ "openid.server"
+ ],
+ "text": ""
+ },
+ "https://keybase.io/kylewm/key.asc": {
+ "rels": [
+ "pgpkey"
+ ],
+ "text": "",
+ "type": "application/pgp-keys"
+ },
+ "https://kylewm.com": {
+ "rels": [
+ "openid.delegate"
+ ],
+ "text": ""
+ },
+ "https://kylewm.com/_themes/boxy/style.css?version=2015-06-25": {
+ "rels": [
+ "stylesheet"
+ ],
+ "text": ""
+ },
+ "https://kylewm.com/everything": {
+ "rels": [
+ "feed"
+ ],
+ "text": "",
+ "type": "text/html"
+ },
+ "https://kylewm.com/foaf.rdf": {
+ "rels": [
+ "meta"
+ ],
+ "text": "",
+ "title": "Contact",
+ "type": "application/rdf+xml"
+ },
+ "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=114&sig=b57d1f32eb45988e4b1e7f5a53afd072": {
+ "rels": [
+ "apple-touch-icon"
+ ],
+ "text": ""
+ },
+ "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=152&sig=cb27d9fb6b285da683bb869ba974ee53": {
+ "rels": [
+ "apple-touch-icon"
+ ],
+ "text": ""
+ },
+ "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=60&sig=deebbb906749f01b98a4291e7b2cff7d": {
+ "rels": [
+ "apple-touch-icon"
+ ],
+ "text": ""
+ },
+ "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=76&sig=7606f9576a5cdbfeac9fe773b19d5bf1": {
+ "rels": [
+ "apple-touch-icon"
+ ],
+ "text": ""
+ },
+ "https://kylewm.com/micropub": {
+ "rels": [
+ "micropub"
+ ],
+ "text": ""
+ },
+ "https://kylewm.com/static/img/users/kyle.jpg": {
+ "rels": [
+ "shortcut",
+ "icon",
+ "apple-touch-icon"
+ ],
+ "text": ""
+ },
+ "https://kylewm.com/static/pygments.css": {
+ "rels": [
+ "stylesheet"
+ ],
+ "text": ""
+ },
+ "https://kylewm.com/token": {
+ "rels": [
+ "token_endpoint"
+ ],
+ "text": ""
+ },
+ "https://kylewm.com/webmention": {
+ "rels": [
+ "webmention"
+ ],
+ "text": ""
+ },
+ "https://maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css": {
+ "rels": [
+ "stylesheet"
+ ],
+ "text": ""
+ },
+ "https://twitter.com/kylewmahan/status/651186266701107200": {
+ "rels": [
+ "syndication"
+ ],
+ "text": ""
+ },
+ "https://webmention.io/webmention?forward=https://kylewm.com/webmention": {
+ "rels": [
+ "pingback"
+ ],
+ "text": ""
+ },
+ "ni:///sha-256;LXQj307VecrQ7BPxkMhuI-rM14CktmXjy16DjI0MMAE?ct=application/x-x509-user-cert": {
+ "rels": [
+ "me"
+ ],
+ "text": ""
+ }
+ },
+ "rels": {
+ "apple-touch-icon": [
+ "https://kylewm.com/static/img/users/kyle.jpg",
+ "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=60&sig=deebbb906749f01b98a4291e7b2cff7d",
+ "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=76&sig=7606f9576a5cdbfeac9fe773b19d5bf1",
+ "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=114&sig=b57d1f32eb45988e4b1e7f5a53afd072",
+ "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=152&sig=cb27d9fb6b285da683bb869ba974ee53"
+ ],
+ "authorization_endpoint": [
+ "https://indieauth.com/auth"
+ ],
+ "feed": [
+ "https://kylewm.com/everything"
+ ],
+ "icon": [
+ "https://kylewm.com/static/img/users/kyle.jpg"
+ ],
+ "me": [
+ "ni:///sha-256;LXQj307VecrQ7BPxkMhuI-rM14CktmXjy16DjI0MMAE?ct=application/x-x509-user-cert"
+ ],
+ "meta": [
+ "https://kylewm.com/foaf.rdf"
+ ],
+ "micropub": [
+ "https://kylewm.com/micropub"
+ ],
+ "openid.delegate": [
+ "https://kylewm.com"
+ ],
+ "openid.server": [
+ "https://indieauth.com/openid"
+ ],
+ "pgpkey": [
+ "https://keybase.io/kylewm/key.asc"
+ ],
+ "pingback": [
+ "https://webmention.io/webmention?forward=https://kylewm.com/webmention"
+ ],
+ "shortcut": [
+ "https://kylewm.com/static/img/users/kyle.jpg"
+ ],
+ "stylesheet": [
+ "https://kylewm.com/_themes/boxy/style.css?version=2015-06-25",
+ "https://kylewm.com/static/pygments.css",
+ "https://maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css"
+ ],
+ "syndication": [
+ "https://twitter.com/kylewmahan/status/651186266701107200"
+ ],
+ "token_endpoint": [
+ "https://kylewm.com/token"
+ ],
+ "webmention": [
+ "https://kylewm.com/webmention"
+ ]
+ }
+}
index 0000000..4f63e14
--- /dev/null
+{
+ "rels": {},
+ "alternates": [],
+ "items": [{
+ "type": ["h-entry"],
+ "properties": {
+ "name": ["Example title"],
+ "content": [{
+ "value": "This is an example document",
+ "html": "This is an <img alt=\"alt text\" title=\"the title\" src=\"/static/img.jpg\"/> example document with <a href=\"relative_paths.html\">relative paths</a>."
+ }]
+ }
+ }]
+}
diff --git a/tests/interpret/reply_h-cite.json b/tests/interpret/reply_h-cite.json
new file mode 100644
index 0000000..3444e7c
--- /dev/null
+++ b/tests/interpret/reply_h-cite.json
+{
+ "items": [
+ {
+ "properties": {
+ "published": [
+ "2014-05-10T14:48:33-07:00"
+ ],
+ "author": [
+ {
+ "properties": {
+ "photo": [
+ "https://aaronparecki.com/images/aaronpk.png"
+ ],
+ "logo": [
+ "https://aaronparecki.com/images/aaronpk.png"
+ ],
+ "url": [
+ "http://aaronparecki.com/"
+ ],
+ "name": [
+ "Aaron Parecki"
+ ]
+ },
+ "value": "\n \n aaronparecki.com\n Aaron Parecki\n Aaron Parecki\n ",
+ "type": [
+ "h-card"
+ ]
+ }
+ ],
+ "name": [
+ "@thedatahive We're working on it ;-) http://indiewebcamp.com/generations"
+ ],
+ "url": [
+ "http://aaronparecki.com/replies/2014/05/10/1/indieweb"
+ ],
+ "shortlink": [
+ "http://aaron.pk/r4W01"
+ ],
+ "category": [
+ "indieweb "
+ ],
+ "syndication": [
+ "https://twitter.com/aaronpk/status/465247041078034432"
+ ],
+ "location": [
+ {
+ "properties": {
+ "latitude": [
+ "45.512284"
+ ],
+ "name": [
+ "Portland, Oregon, USA"
+ ],
+ "longitude": [
+ "-122.612955"
+ ]
+ },
+ "value": " Portland, Oregon, USA",
+ "type": [
+ "h-card"
+ ]
+ }
+ ],
+ "content": [
+ {
+ "value": "@thedatahive We're working on it ;-) http://indiewebcamp.com/generations",
+ "html": "<a href=\"http://twitter.com/thedatahive\">@thedatahive</a> We're working on it ;-) <a href=\"http://indiewebcamp.com/generations\"><span class=\"protocol\">http://</span>indiewebcamp.com/generations</a>"
+ }
+ ],
+ "in-reply-to": [
+ {
+ "properties": {
+ "author": [
+ {
+ "properties": {
+ "photo": [
+ "http://aaronparecki.com/images/nouns/user.svg"
+ ],
+ "logo": [
+ "http://aaronparecki.com/images/nouns/user.svg"
+ ],
+ "url": [
+ "http://datahiveconsulting.com/author/lynne/"
+ ],
+ "name": [
+ "Lynne Baer"
+ ]
+ },
+ "value": "\n \n \n \n datahiveconsulting.com/author/lynne\n Lynne Baer\n ",
+ "type": [
+ "h-card"
+ ]
+ }
+ ],
+ "name": [
+ "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ..."
+ ],
+ "url": [
+ "http://datahiveconsulting.com/2014/04/10/indiewebify-me-and-the-knowledge-gap/"
+ ],
+ "content": [
+ {
+ "value": "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ...",
+ "html": "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ..."
+ }
+ ]
+ },
+ "value": " \n \n \n \n datahiveconsulting.com/author/lynne\n Lynne Baer\n \nLast week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ...permalink",
+ "type": [
+ "h-cite"
+ ]
+ }
+ ]
+ },
+ "type": [
+ "h-entry"
+ ]
+ }
+ ],
+ "rels": {
+ "pingback": [
+ "http://webmention.io/webmention?forward=http%3A%2F%2Faaronparecki.com%2Fwebmention.php"
+ ],
+ "icon": [
+ "/favicon.ico",
+ "/images/aaronpk-256.jpg"
+ ],
+ "search": [
+ "/opensearch.xml"
+ ],
+ "authorization_endpoint": [
+ "https://indieauth.com/auth"
+ ],
+ "apple-touch-icon-precomposed": [
+ "/images/aaronpk-256.jpg"
+ ],
+ "openid.server": [
+ "https://indieauth.com/openid"
+ ],
+ "author": [
+ "http://aaronparecki.com/",
+ "https://plus.google.com/117847912875913905493"
+ ],
+ "token_endpoint": [
+ "https://tokens.oauth.net/token"
+ ],
+ "prev": [
+ "/replies/2014/05/09/1/"
+ ],
+ "micropub": [
+ "https://aaronparecki.com/api/post"
+ ],
+ "webmention": [
+ "https://aaronparecki.com/webmention.php"
+ ],
+ "stylesheet": [
+ "/bootstrap-2.2.2/css/bootstrap.min.css",
+ "/bootstrap-2.2.2/css/bootstrap-responsive.min.css",
+ "/css/style.css?body=1",
+ "/css/font-awesome/css/font-awesome.min.css",
+ "/css/aaronpk.css"
+ ],
+ "openid.delegate": [
+ "https://aaronparecki.com/"
+ ],
+ "shortcut": [
+ "/favicon.ico",
+ "/images/aaronpk-256.jpg"
+ ],
+ "license": [
+ "http://creativecommons.org/licenses/by/3.0/"
+ ],
+ "tag": [
+ "/tag/indieweb"
+ ]
+ }
+}
index 0000000..6ab012d
--- /dev/null
+{
+ "items": [
+ {
+ "properties": {
+ "in-reply-to": [
+ "https://indiewebcamp.com/events/2015-03-25-homebrew-website-club",
+ "https://kylewm.com/2015/03/homebrew-website-club-2015-march-25"
+ ],
+ "invitee": [
+ {
+ "properties": {
+ "name": [
+ "Silona Bonewald"
+ ],
+ "photo": [
+ "https://graph.facebook.com/v2.2/10155109753190015/picture?type=large"
+ ],
+ "url": [
+ "https://www.facebook.com/10155109753190015"
+ ]
+ },
+ "type": [
+ "h-card"
+ ],
+ "value": "Silona Bonewald"
+ }
+ ],
+ "name": [
+ "invited"
+ ],
+ "uid": [
+ "tag:facebook.com,2013:1565113317092307_rsvp_10155109753190015"
+ ],
+ "url": [
+ "https://www.facebook.com/1565113317092307#10155109753190015"
+ ]
+ },
+ "type": [
+ "h-entry"
+ ]
+ }
+ ],
+ "rels": {}
+}
index 0000000..e5372cf
--- /dev/null
+{
+ "items": [
+ {
+ "children": [
+ {
+ "value": "\n\nI'm Ryan Barrett.\nI live, work, and play in\n San FranciscoCalifornia.\nI code, write, and post pictures here.\n\n\n\n\npublic@ryanb.org\npublic PGP key\n\n\n\n \n \n Home\n \n \n \n \n Search\n \n \n \n \n Archives\n \n \n \n \n Twitter\n \n \n \n \n Facebook\n \n \n \n \n Google+\n \n \n \n \n GitHub\n \n \n \n \n RSS Feed\n \n \n\n",
+ "properties": {
+ "name": [
+ "Ryan Barrett"
+ ],
+ "photo": [
+ "https://snarfed.org/ryan_profile_square_thumb.jpg"
+ ],
+ "url": [
+ "https://snarfed.org/"
+ ],
+ "key": [
+ "https://snarfed.org/pubkey.txt"
+ ],
+ "region": [
+ "California"
+ ],
+ "locality": [
+ "San Francisco"
+ ],
+ "email": [
+ "mailto:public@ryanb.org"
+ ]
+ },
+ "type": [
+ "h-card"
+ ]
+ }
+ ],
+ "properties": {
+ "name": [
+ "Homebrew Website Club",
+ "Homebrew Website Club"
+ ],
+ "published": [
+ "2014-05-05T10:10:53-07:00"
+ ],
+ "author": [
+ {
+ "value": " Ryan Barrett",
+ "properties": {
+ "name": [
+ "Ryan Barrett"
+ ],
+ "photo": [
+ "https://secure.gravatar.com/avatar/947b5f3f323da0ef785b6f02d9c265d6?s=96&d=https%3A%2F%2Fsecure.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&r=G"
+ ],
+ "url": [
+ "http://snarfed.org/"
+ ]
+ },
+ "type": [
+ "h-card"
+ ]
+ }
+ ],
+ "in-reply-to": [
+ "http://werd.io/2014/homebrew-website-club-4"
+ ],
+ "url": [
+ "https://snarfed.org/2014-05-05_homebrew-website-club-3"
+ ],
+ "content": [
+ {
+ "html": "\n\t\t\t<div class=\"e-content\"><p>RSVPs <data class=\"p-rsvp\" value=\"yes\">yes</data> to <a class=\"u-in-reply-to\" href=\"http://werd.io/2014/homebrew-website-club-4\">Homebrew Website Club</a></p>\n</div>\t\t\t\t\t",
+ "value": "\n\t\t\tRSVPs yes to Homebrew Website Club\n\t\t\t\t\t"
+ },
+ {
+ "html": "<p>RSVPs <data class=\"p-rsvp\" value=\"yes\">yes</data> to <a class=\"u-in-reply-to\" href=\"http://werd.io/2014/homebrew-website-club-4\">Homebrew Website Club</a></p>\n",
+ "value": "RSVPs yes to Homebrew Website Club\n"
+ }
+ ],
+ "rsvp": [
+ "yes"
+ ]
+ },
+ "type": [
+ "h-entry",
+ "h-as-article"
+ ]
+ }
+ ],
+ "rels": {
+ "nofollow": [
+ "https://snarfed.org/2014-05-05_homebrew-website-club-3#respond"
+ ],
+ "generator": [
+ "http://wordpress.org/"
+ ],
+ "prev": [
+ "https://snarfed.org/2014-05-05_9325"
+ ],
+ "stylesheet": [
+ "https://snarfed.org/w/wp-content/plugins/nextgen-gallery/products/photocrati_nextgen/modules/nextgen_gallery_display/static/nextgen_gallery_related_images.css?ver=3.9.1",
+ "https://snarfed.org/w/wp-content/plugins/jetpack/modules/subscriptions/subscriptions.css?ver=3.9.1",
+ "https://snarfed.org/w/wp-content/plugins/jetpack/modules/widgets/widgets.css?ver=20121003",
+ "https://snarfed.org/w/wp-content/plugins/jetpack/modules/carousel/jetpack-carousel.css?ver=20120629",
+ "https://snarfed.org/w/wp-content/plugins/jetpack/modules/tiled-gallery/tiled-gallery/tiled-gallery.css?ver=2012-09-21",
+ "https://snarfed.org/w/wp-includes/js/mediaelement/mediaelementplayer.min.css?ver=2.13.0",
+ "https://snarfed.org/w/wp-includes/js/mediaelement/wp-mediaelement.css?ver=3.9.1",
+ "https://snarfed.org/w/wp-content/themes/snarfed-ryu/style.css?ver=3.9.1",
+ "https://fonts.googleapis.com/css?family=Lato:100,300,400,700,900,100italic,300italic,400italic,700italic,900italic",
+ "https://fonts.googleapis.com/css?family=Playfair+Display:400,700,900,400italic,700italic,900italic&subset=latin,latin-ext"
+ ],
+ "canonical": [
+ "https://snarfed.org/2014-05-05_homebrew-website-club-3"
+ ],
+ "tag": [
+ "https://snarfed.org/category/indieweb_rsvp"
+ ],
+ "webmention": [
+ "https://snarfed.org/w/?webmention=endpoint"
+ ],
+ "shortcut": [
+ "https://snarfed.org/ryan_profile_square_thumb.jpg"
+ ],
+ "openid2.local_id": [
+ "http://www.google.com/profiles/heaven"
+ ],
+ "pingback": [
+ "https://snarfed.org/w/xmlrpc.php"
+ ],
+ "http://webmention.org/": [
+ "https://snarfed.org/w/?webmention=endpoint"
+ ],
+ "apple-touch-icon-precomposed": [
+ "https://snarfed.org/ryan_profile_square_thumb.jpg"
+ ],
+ "shortlink": [
+ "http://wp.me/p3EDAq-2qr"
+ ],
+ "author": [
+ "http://snarfed.org/"
+ ],
+ "publisher": [
+ "https://plus.google.com/103651231634018158746"
+ ],
+ "key": [
+ "https://snarfed.org/pubkey.txt"
+ ],
+ "designer": [
+ "http://theme.wordpress.com/"
+ ],
+ "icon": [
+ "https://snarfed.org/ryan_profile_square_thumb.jpg"
+ ],
+ "bookmark": [
+ "https://snarfed.org/2014-05-05_homebrew-website-club-3"
+ ],
+ "category": [
+ "https://snarfed.org/category/indieweb_rsvp"
+ ],
+ "home": [
+ "https://snarfed.org/",
+ "https://snarfed.org/"
+ ],
+ "me": [
+ "mailto:public@ryanb.org",
+ "https://twitter.com/schnarfed",
+ "https://www.facebook.com/snarfed.org",
+ "https://plus.google.com/+RyanBarrett",
+ "https://github.com/snarfed"
+ ],
+ "openid2.provider": [
+ "https://www.google.com/accounts/o8/ud?source=profiles"
+ ],
+ "profile": [
+ "http://gmpg.org/xfn/11"
+ ],
+ "next": [
+ "https://snarfed.org/2014-05-05_i-still-think-automattic-is-undervalued-at-1-16bn-httprecode-net20140505wordpress-parent-automattic-has-raised-160-million-now-valued-at-1-16-billion-post-money"
+ ]
+ },
+ "alternates": [
+ {
+ "url": "https://snarfed.org/feed",
+ "type": "application/rss+xml"
+ },
+ {
+ "url": "https://snarfed.org/comments/feed",
+ "type": "application/rss+xml"
+ },
+ {
+ "url": "https://snarfed.org/2014-05-05_homebrew-website-club-3/feed",
+ "type": "application/rss+xml"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/tests/interpret/reply_u-in-reply-to.json b/tests/interpret/reply_u-in-reply-to.json
new file mode 100644
index 0000000..1599081
--- /dev/null
+++ b/tests/interpret/reply_u-in-reply-to.json
+{
+ "items": [
+ {
+ "children": [
+ {
+ "value": "\n\nI'm Ryan Barrett.\nI live, work, and play in\n San FranciscoCalifornia.\nI code, write, and post pictures here.\n\n\n\n\npublic@ryanb.org\npublic PGP key\n\n\n\n \n \n Home\n \n \n \n \n Search\n \n \n \n \n Archives\n \n \n \n \n Twitter\n \n \n \n \n Facebook\n \n \n \n \n Google+\n \n \n \n \n GitHub\n \n \n \n \n RSS Feed\n \n \n\n",
+ "properties": {
+ "name": [
+ "Ryan Barrett"
+ ],
+ "photo": [
+ "https://snarfed.org/ryan_profile_square_thumb.jpg"
+ ],
+ "url": [
+ "https://snarfed.org/"
+ ],
+ "key": [
+ "https://snarfed.org/pubkey.txt"
+ ],
+ "region": [
+ "California"
+ ],
+ "locality": [
+ "San Francisco"
+ ],
+ "email": [
+ "mailto:public@ryanb.org"
+ ]
+ },
+ "type": [
+ "h-card"
+ ]
+ }
+ ],
+ "properties": {
+ "name": [
+ "Re: Display likes in a facepile",
+ "Re: Display likes in a facepile"
+ ],
+ "published": [
+ "2014-03-09T22:48:22-07:00"
+ ],
+ "author": [
+ {
+ "value": " Ryan Barrett",
+ "properties": {
+ "name": [
+ "Ryan Barrett"
+ ],
+ "photo": [
+ "https://secure.gravatar.com/avatar/947b5f3f323da0ef785b6f02d9c265d6?s=96&d=https%3A%2F%2Fsecure.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&r=G"
+ ],
+ "url": [
+ "http://snarfed.org/"
+ ]
+ },
+ "type": [
+ "h-card"
+ ]
+ }
+ ],
+ "in-reply-to": [
+ "https://willnorris.com/2014/03/display-likes-in-a-facepile"
+ ],
+ "url": [
+ "https://snarfed.org/2014-03-09_re-display-likes-in-a-facepile"
+ ],
+ "content": [
+ {
+ "html": "\n\t\t\t<div class=\"e-content\"><p>oh man, so cool! thanks for doing this. can\u2019t wait to try it myself!</p>\n\n<p><a class=\"u-in-reply-to\" href=\"https://willnorris.com/2014/03/display-likes-in-a-facepile\"></a></p>\n</div>\t\t\t\t\t",
+ "value": "\n\t\t\toh man, so cool! thanks for doing this. can\u2019t wait to try it myself!\n\n\n\t\t\t\t\t"
+ },
+ {
+ "html": "<p>oh man, so cool! thanks for doing this. can\u2019t wait to try it myself!</p>\n\n<p><a class=\"u-in-reply-to\" href=\"https://willnorris.com/2014/03/display-likes-in-a-facepile\"></a></p>\n",
+ "value": "oh man, so cool! thanks for doing this. can\u2019t wait to try it myself!\n\n\n"
+ }
+ ]
+ },
+ "type": [
+ "h-entry",
+ "h-as-article"
+ ]
+ }
+ ],
+ "rels": {
+ "nofollow": [
+ "https://snarfed.org/2014-03-09_re-display-likes-in-a-facepile#respond"
+ ],
+ "generator": [
+ "http://wordpress.org/"
+ ],
+ "prev": [
+ "https://snarfed.org/2014-03-09_re-oh-in-duboce-park-theyre-entering-the-medical-tricorder-s"
+ ],
+ "stylesheet": [
+ "https://snarfed.org/w/wp-content/plugins/nextgen-gallery/products/photocrati_nextgen/modules/nextgen_gallery_display/static/nextgen_gallery_related_images.css?ver=3.9.1",
+ "https://snarfed.org/w/wp-content/plugins/jetpack/modules/subscriptions/subscriptions.css?ver=3.9.1",
+ "https://snarfed.org/w/wp-content/plugins/jetpack/modules/widgets/widgets.css?ver=20121003",
+ "https://snarfed.org/w/wp-content/plugins/jetpack/modules/carousel/jetpack-carousel.css?ver=20120629",
+ "https://snarfed.org/w/wp-content/plugins/jetpack/modules/tiled-gallery/tiled-gallery/tiled-gallery.css?ver=2012-09-21",
+ "https://snarfed.org/w/wp-includes/js/mediaelement/mediaelementplayer.min.css?ver=2.13.0",
+ "https://snarfed.org/w/wp-includes/js/mediaelement/wp-mediaelement.css?ver=3.9.1",
+ "https://snarfed.org/w/wp-content/themes/snarfed-ryu/style.css?ver=3.9.1",
+ "https://fonts.googleapis.com/css?family=Lato:100,300,400,700,900,100italic,300italic,400italic,700italic,900italic",
+ "https://fonts.googleapis.com/css?family=Playfair+Display:400,700,900,400italic,700italic,900italic&subset=latin,latin-ext"
+ ],
+ "canonical": [
+ "https://snarfed.org/2014-03-09_re-display-likes-in-a-facepile"
+ ],
+ "tag": [
+ "https://snarfed.org/category/indieweb"
+ ],
+ "webmention": [
+ "https://snarfed.org/w/?webmention=endpoint"
+ ],
+ "shortcut": [
+ "https://snarfed.org/ryan_profile_square_thumb.jpg"
+ ],
+ "openid2.local_id": [
+ "http://www.google.com/profiles/heaven"
+ ],
+ "pingback": [
+ "https://snarfed.org/w/xmlrpc.php"
+ ],
+ "http://webmention.org/": [
+ "https://snarfed.org/w/?webmention=endpoint"
+ ],
+ "apple-touch-icon-precomposed": [
+ "https://snarfed.org/ryan_profile_square_thumb.jpg"
+ ],
+ "shortlink": [
+ "http://wp.me/p3EDAq-2ak"
+ ],
+ "author": [
+ "http://snarfed.org/"
+ ],
+ "publisher": [
+ "https://plus.google.com/103651231634018158746"
+ ],
+ "key": [
+ "https://snarfed.org/pubkey.txt"
+ ],
+ "designer": [
+ "http://theme.wordpress.com/"
+ ],
+ "icon": [
+ "https://snarfed.org/ryan_profile_square_thumb.jpg"
+ ],
+ "bookmark": [
+ "https://snarfed.org/2014-03-09_re-display-likes-in-a-facepile"
+ ],
+ "category": [
+ "https://snarfed.org/category/indieweb"
+ ],
+ "home": [
+ "https://snarfed.org/",
+ "https://snarfed.org/"
+ ],
+ "me": [
+ "mailto:public@ryanb.org",
+ "https://twitter.com/schnarfed",
+ "https://www.facebook.com/snarfed.org",
+ "https://plus.google.com/+RyanBarrett",
+ "https://github.com/snarfed"
+ ],
+ "openid2.provider": [
+ "https://www.google.com/accounts/o8/ud?source=profiles"
+ ],
+ "profile": [
+ "http://gmpg.org/xfn/11"
+ ],
+ "next": [
+ "https://snarfed.org/2014-03-10_re-joining-the-indie-web-my-motivation"
+ ]
+ },
+ "alternates": [
+ {
+ "url": "https://snarfed.org/feed",
+ "type": "application/rss+xml"
+ },
+ {
+ "url": "https://snarfed.org/comments/feed",
+ "type": "application/rss+xml"
+ },
+ {
+ "url": "https://snarfed.org/2014-03-09_re-display-likes-in-a-facepile/feed",
+ "type": "application/rss+xml"
+ }
+ ]
+}
\ No newline at end of file
index 0000000..4ef8032
--- /dev/null
+{
+ "items": [
+ {
+ "properties": {
+ "name": [
+ {
+ "properties": {
+ "name": ["This is wrong"]
+ },
+ "value": "Rocky Raccoon",
+ "type": ["h-card"]
+ },
+ "This is also wrong"
+ ],
+ "url": [
+ {
+ "properties": {
+ "url": ["This is probably wrong"]
+ },
+ "value": "https://foo.bar/",
+ "type": ["h-event"]
+ },
+ "This is wrong too"
+ ],
+ "uid": ["https://foo.bar/"]
+ },
+ "type": [
+ "h-entry"
+ ]
+ }
+ ]
+}
index 0000000..125f4d0
--- /dev/null
+{
+ "items": [
+ {
+ "type": [
+ "h-card"
+ ],
+ "properties": {
+ "url": [
+ "https://tmichellemoore.com/"
+ ],
+ "uid": [
+ "https://tmichellemoore.com/"
+ ],
+ "photo": [
+ "https://tmichellemoore.com/pic.jpg"
+ ]
+ }
+ }
+ ]
+}
index 0000000..e8888ac
--- /dev/null
+{
+ "items": [
+ {
+ "type": [
+ "h-card"
+ ],
+ "properties": {
+ "url": [
+ "https://tmichellemoore.com/"
+ ],
+ "uid": [
+ "https://tmichellemoore.com/"
+ ],
+ "name": [
+ "Foo Foundation"
+ ],
+ "org": [
+ "Foo Foundation"
+ ]
+ }
+ }
+ ]
+}
index 0000000..58febe8
--- /dev/null
+{
+ "items": [
+ {
+ "properties": {
+ "content": [
+ {
+ "html": "some <br> html"
+ }
+ ]
+ },
+ "type": [
+ "h-entry"
+ ]
+ }
+ ]
+}
index 0000000..35b3da6
--- /dev/null
+{
+ "items": [
+ {
+ "children": [
+ {
+ "properties": {
+ "name": [
+ "@W3C"
+ ],
+ "url": [
+ "https://twitter.com/W3C"
+ ]
+ },
+ "type": [
+ "h-x-username"
+ ]
+ }
+ ],
+ "properties": {
+ "author": [
+ {
+ "properties": {
+ "name": [
+ "Tantek \u00c7elik"
+ ],
+ "photo": [
+ "http://tantek.com/logo.jpg"
+ ],
+ "url": [
+ "http://tantek.com/"
+ ]
+ },
+ "type": [
+ "h-card"
+ ],
+ "value": ""
+ }
+ ],
+ "content": [
+ {
+ "html": "<a class=\"auto-link figure\" href=\"https://igcdn-photos-b-a.akamaihd.net/hphotos-ak-xaf1/t51.2885-15/e35/12145332_1662314194043465_2009449288_n.jpg\"><img alt=\"a photo\" class=\"auto-embed u-photo\" src=\"https://igcdn-photos-b-a.akamaihd.net/hphotos-ak-xaf1/t51.2885-15/e35/12145332_1662314194043465_2009449288_n.jpg\"/></a> <a class=\"auto-link\" href=\"https://instagram.com/p/9XVBIRA9cj/\">https://instagram.com/p/9XVBIRA9cj/</a><br class=\"auto-break\"/><br class=\"auto-break\"/>Social Web session <a class=\"auto-link h-x-username\" href=\"https://twitter.com/W3C\">@W3C</a> #TPAC2015 in Sapporo, Hokkaido, Japan.",
+ "value": " https://instagram.com/p/9XVBIRA9cj/Social Web session @W3C #TPAC2015 in Sapporo, Hokkaido, Japan."
+ }
+ ],
+ "name": [
+ "https://instagram.com/p/9XVBIRA9cj/Social Web session @W3C #TPAC2015 in Sapporo, Hokkaido, Japan."
+ ],
+ "photo": [
+ "https://igcdn-photos-b-a.akamaihd.net/hphotos-ak-xaf1/t51.2885-15/e35/12145332_1662314194043465_2009449288_n.jpg"
+ ],
+ "published": [
+ "2015-10-27T19:48:00-0700"
+ ],
+ "syndication": [
+ "https://www.facebook.com/photo.php?fbid=10101948228396473",
+ "https://twitter.com/t/status/659200761427980288"
+ ],
+ "uid": [
+ "http://tantek.com/2015/300/t1/social-web-session-w3c-tpac2015"
+ ],
+ "updated": [
+ "2015-10-27T19:48:00-0700"
+ ],
+ "url": [
+ "http://tantek.com/2015/300/t1/social-web-session-w3c-tpac2015"
+ ]
+ },
+ "type": [
+ "h-entry"
+ ]
+ }
+ ],
+ "rel-urls": {
+ "http://tantek.com/": {
+ "rels": [
+ "author",
+ "home"
+ ],
+ "text": "tantek.com",
+ "title": "Tantek \u00c7elik"
+ },
+ "http://tantek.com/2015/298/t1/portable-planetarium-sciencehackday-best-hardware": {
+ "rels": [
+ "prev"
+ ],
+ "text": "\u2190",
+ "title": "View the previous (older) item in the stream."
+ },
+ "http://tantek.com/2015/301/f1": {
+ "rels": [
+ "next"
+ ],
+ "text": "\u2192",
+ "title": "View the next (newer) item in the stream"
+ },
+ "http://tantek.com/logo.jpg": {
+ "rels": [
+ "icon"
+ ],
+ "text": ""
+ },
+ "http://webmention.io/tantek.com/webmention": {
+ "rels": [
+ "webmention"
+ ],
+ "text": ""
+ },
+ "https://twitter.com/t/status/659200761427980288": {
+ "rels": [
+ "syndication"
+ ],
+ "text": "View \n on Twitter"
+ },
+ "https://www.facebook.com/photo.php?fbid=10101948228396473": {
+ "rels": [
+ "syndication"
+ ],
+ "text": "View on Facebook"
+ }
+ },
+ "rels": {
+ "author": [
+ "http://tantek.com/"
+ ],
+ "home": [
+ "http://tantek.com/"
+ ],
+ "icon": [
+ "http://tantek.com/logo.jpg"
+ ],
+ "next": [
+ "http://tantek.com/2015/301/f1"
+ ],
+ "prev": [
+ "http://tantek.com/2015/298/t1/portable-planetarium-sciencehackday-best-hardware"
+ ],
+ "syndication": [
+ "https://www.facebook.com/photo.php?fbid=10101948228396473",
+ "https://twitter.com/t/status/659200761427980288"
+ ],
+ "webmention": [
+ "http://webmention.io/tantek.com/webmention"
+ ]
+ }
+}
index 0000000..9bcd75f
--- /dev/null
+"""
+Test the authorship discovery algorithm. Credit for test cases to
+
+Sandeep Shetty https://github.com/sandeepshetty/authorship-test-cases
+
+"""
+
+import mf
+
+
+def load_test(testname, hentry_func=None):
+ def fetch_mf2(url):
+ testname = url
+ prefix = "http://example.com/"
+ if testname.startswith(prefix):
+ testname = testname[len(prefix) :]
+
+ with open("tests/authorship/" + testname) as f:
+ return mf.parse(url=url, doc=f.read())
+
+ url = "http://example.com/" + testname
+ parsed = fetch_mf2(url)
+ hentry = hentry_func and hentry_func(parsed)
+
+ return mf.find_author(parsed, url, hentry=hentry, fetch_mf2_func=fetch_mf2)
+
+
+def test_p_author_string():
+ blob = {"items": [{"type": ["h-entry"], "properties": {"author": ["John Doe"]}}]}
+ assert mf.find_author(blob) == {"name": "John Doe"}
+
+
+def test_h_entry_with_p_author_h_card():
+ assert load_test("h-entry_with_p-author_h-card.html") == {
+ "name": "John Doe",
+ "url": "http://example.com/johndoe/",
+ "photo": "http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm",
+ }
+
+
+def test_h_entry_with_rel_author():
+ assert load_test("h-entry_with_rel-author.html") == {
+ "name": "John Doe",
+ "url": "http://example.com/h-card_with_u-url_that_is_also_rel-me.html",
+ "photo": "http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm",
+ }
+
+
+def test_h_entry_with_u_author():
+ assert load_test("h-entry_with_u-author.html") == {
+ "name": "John Doe",
+ "url": "http://example.com/h-card_with_u-url_equal_to_self.html",
+ "photo": "http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm",
+ }
+
+
+def test_h_feed_with_p_author_h_card():
+ def select_h_entry(parsed):
+ hfeed = parsed["items"][0]
+ assert hfeed["type"] == ["h-feed"]
+ assert len(hfeed["children"]) == 3
+ return hfeed["children"][1]
+
+ assert load_test("h-feed_with_p-author_h-card.html", select_h_entry) == {
+ "name": "John Doe",
+ "url": "http://example.com/johndoe/",
+ "photo": "http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm",
+ }
+
+
+def test_h_feed_with_u_author():
+ def select_h_entry(parsed):
+ hfeed = parsed["items"][0]
+ assert hfeed["type"] == ["h-feed"]
+ assert len(hfeed["children"]) == 3
+ return hfeed["children"][2]
+
+ assert load_test("h-feed_with_u-author.html", select_h_entry) == {
+ "name": "John Doe",
+ "url": "http://example.com/h-card_with_u-url_equal_to_u-uid_equal_to_self.html",
+ "photo": "http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm",
+ }
index 0000000..5f35e72
--- /dev/null
+import copy
+
+import mf
+
+TEST_BLOB = {
+ "alternates": [],
+ "items": [
+ {
+ "properties": {
+ "name": ["Author"],
+ "photo": ["http://example.com/author_img.jpg"],
+ "url": ["http://example.com"],
+ },
+ "type": ["h-card"],
+ "value": "Author LastName",
+ },
+ {
+ "properties": {
+ "content": [{"html": "some content", "value": "some content"}],
+ "name": ["some title"],
+ "published": ["2014-05-07T17:15:44+00:00"],
+ "url": ["http://example.com/reply/2014/05/07/1"],
+ },
+ "type": ["h-entry"],
+ },
+ ],
+ "rels": {},
+}
+
+
+def test_no_reference():
+ blob = copy.deepcopy(TEST_BLOB)
+ assert mf.classify_comment(blob, ("http://example.com",)) == []
+
+ # add some irrelevant references
+ blob["items"][1]["in-reply-to"] = [
+ "http://werd.io/2014/homebrew-website-club-4",
+ "https://www.facebook.com/events/1430990723825351/",
+ ]
+ assert mf.classify_comment(blob, ("http://example.com",)) == []
+
+ # no target url
+ assert mf.classify_comment(blob, ()) == []
+
+
+def test_rsvps():
+ blob = copy.deepcopy(TEST_BLOB)
+
+ blob["items"][1]["properties"].update(
+ {
+ "in-reply-to": ["http://mydomain.com/my-post"],
+ "rsvp": ["yes"],
+ }
+ )
+
+ assert mf.classify_comment(
+ blob, ("http://mydoma.in/short", "http://mydomain.com/my-post")
+ ) == ["rsvp", "reply"]
+
+
+def test_invites():
+ blob = copy.deepcopy(TEST_BLOB)
+
+ blob["items"][1]["properties"].update(
+ {
+ "in-reply-to": ["http://mydomain.com/my-post"],
+ "invitee": [
+ {
+ "name": "Kyle Mahan",
+ "url": "https://kylewm.com",
+ }
+ ],
+ }
+ )
+
+ assert mf.classify_comment(
+ blob, ("http://mydoma.in/short", "http://mydomain.com/my-post")
+ ) == ["invite", "reply"]
+
+
+def test_likes():
+ """make sure we find likes"""
+ blob = copy.deepcopy(TEST_BLOB)
+
+ # add some references
+ blob["items"][1]["properties"].update(
+ {
+ "in-reply-to": ["http://someoneelse.com/post"],
+ "like-of": ["http://mydomain.com/my-post"],
+ }
+ )
+
+ assert mf.classify_comment(
+ blob, ("http://mydoma.in/short", "http://mydomain.com/my-post")
+ ) == ["like"]
+
+
+def test_reposts():
+ """make sure we find reposts"""
+ blob = copy.deepcopy(TEST_BLOB)
+
+ # add some references
+ blob["items"][1]["properties"].update(
+ {
+ "repost-of": ["http://mydomain.com/my-post"],
+ "like-of": ["http://someoneelse.com/post"],
+ }
+ )
+
+ assert mf.classify_comment(
+ blob, ("http://mydoma.in/short", "http://mydomain.com/my-post")
+ ) == ["repost"]
+
+
+def test_multireply():
+ """check behavior if our post is one among several posts
+ in a multireply"""
+ blob = copy.deepcopy(TEST_BLOB)
+
+ # add some references
+ blob["items"][1]["properties"].update(
+ {
+ "in-reply-to": [
+ "http://someoneelse.com/post",
+ "http://mydomain.com/my-post",
+ "http://athirddomain.org/permalink",
+ ],
+ }
+ )
+
+ assert mf.classify_comment(blob, ("http://mydomain.com/my-post")) == ["reply"]
+
+
+def test_multimodal():
+ """a mention can have more than one classification, make sure we find
+ all of them. also tests some of the alternate/historical classnames"""
+ blob = copy.deepcopy(TEST_BLOB)
+
+ # add some references
+ blob["items"][1]["properties"].update(
+ {
+ "reply-to": ["http://noone.im/"],
+ "repost-of": [
+ "http://someoneelse.com",
+ "http://mydomain.com/my-post",
+ ],
+ "like": [
+ "http://mydoma.in/short",
+ "http://someoneelse.com/post",
+ ],
+ }
+ )
+
+ assert sorted(
+ mf.classify_comment(
+ blob, ("http://mydoma.in/short", "http://mydomain.com/my-post")
+ )
+ ) == ["like", "repost"]
+
+
+def test_h_cite():
+ """Test object references (e.g., class="p-in-reply-to h-cite")"""
+ blob = copy.deepcopy(TEST_BLOB)
+
+ # add some references
+ blob["items"][1]["properties"].update(
+ {
+ "in-reply-to": [
+ {
+ "type": "h-cite",
+ "properties": {
+ "url": ["http://mydomain.com/my-post"],
+ },
+ }
+ ],
+ }
+ )
+
+ assert mf.classify_comment(blob, ("http://mydomain.com/my-post",)) == ["reply"]
index 0000000..9a47710
--- /dev/null
+from datetime import date, datetime, timedelta
+
+import pytest
+from mf import parse_dt
+
+
+def test_none():
+ assert parse_dt(None) is None
+
+
+def test_parse_dates():
+ assert parse_dt("2014-04-27") == date(2014, 4, 27)
+ assert parse_dt("2014-9-2") == date(2014, 9, 2)
+ assert parse_dt("1982-11-24") == date(1982, 11, 24)
+
+ with pytest.raises(ValueError):
+ # day/month switched
+ parse_dt("2014-24-11")
+
+ with pytest.raises(ValueError):
+ # 2-character year
+ parse_dt("14-09-27")
+
+
+def test_parse_dts_no_tz():
+ # tantek.com -- no seconds, no timezone
+ assert parse_dt("2014-05-09T17:53") == datetime(2014, 5, 9, 17, 53)
+ # same as above without 'T'
+ assert parse_dt("2014-05-09 17:53") == datetime(2014, 5, 9, 17, 53)
+ # Homebrew Website Club
+ assert parse_dt("2014-04-23T18:30") == datetime(2014, 4, 23, 18, 30)
+
+ with pytest.raises(ValueError):
+ # hour only
+ parse_dt("2012-09-01T12")
+
+ with pytest.raises(ValueError):
+ # invalid hour minute
+ parse_dt("2014-04-23T30:90")
+
+
+def test_parse_dts():
+ def assert_with_tz(dt, naive, offset):
+ """return a tuple with naive datetime, and an timedelta tz offset"""
+ assert naive == dt.replace(tzinfo=None)
+ assert offset == dt.utcoffset()
+
+ # waterpigs.co.uk -- utc time
+ assert_with_tz(
+ parse_dt("2014-05-10T10:48:28+00:00"),
+ datetime(2014, 5, 10, 10, 48, 28),
+ timedelta(hours=0),
+ )
+
+ # same as above with Zulu time
+ assert_with_tz(
+ parse_dt("2014-05-10T10:48:28Z"),
+ datetime(2014, 5, 10, 10, 48, 28),
+ timedelta(hours=0),
+ )
+
+ # snarfed.org -- pacific time
+ assert_with_tz(
+ parse_dt("2014-05-05T09:59:08-07:00"),
+ datetime(2014, 5, 5, 9, 59, 8),
+ timedelta(hours=-7),
+ )
+
+ # same as above, no colon in tz
+ assert_with_tz(
+ parse_dt("2014-05-05T09:59:08-0700"),
+ datetime(2014, 5, 5, 9, 59, 8),
+ timedelta(hours=-7),
+ )
+
+ with pytest.raises(ValueError):
+ # cannot read timezones by name
+ parse_dt("2013-07-04T11:22 PST")
index 0000000..9c0e3e1
--- /dev/null
+"""
+Test the interpret module, the unification of the other utility methods.
+
+Uses test cases from around the indieweb.
+
+"""
+
+import json
+from datetime import date, datetime, timedelta
+
+import mf
+
+
+def load_test(testname):
+ return json.load(open(f"tests/interpret/{testname}.json"))
+
+
+def test_event():
+ # HWC event from werd.io
+ parsed = load_test("hwc-event")
+ result = mf.interpret(parsed, "http://werd.io/2014/homebrew-website-club-4")
+
+ assert result["type"] == "event"
+ assert result["name"] == "Homebrew Website Club"
+ assert "Are you building your own website?" in result["content"]
+ assert result["start"].replace(tzinfo=None) == datetime(2014, 5, 7, 18, 30)
+ assert result["start"].utcoffset() == timedelta(hours=0)
+ assert result["end"].replace(tzinfo=None) == datetime(2014, 5, 7, 19, 30)
+ assert result["end"].utcoffset() == timedelta(hours=0)
+ assert result["location"] == {
+ "name": "Mozilla SF, 1st floor, 2 Harrison st. (at Embarcadero), San Francisco, CA ",
+ }
+
+
+def test_reply_h_cite():
+ # reply with reply-context from aaronnparecki.com
+ parsed = load_test("reply_h-cite")
+ result = mf.interpret_comment(
+ parsed,
+ "http://aaronparecki.com/replies/2014/05/10/1/indieweb",
+ [
+ "http://datahiveconsulting.com/2014/04/10/indiewebify-me-and-the-knowledge-gap/",
+ "http://datahiveconsulting.com/2014/04/10",
+ ],
+ )
+
+ assert result["type"] == "entry"
+ assert not result.get("name")
+ assert "We're working on it ;-)" in result.get("content")
+ assert result["published"].replace(tzinfo=None) == datetime(2014, 5, 10, 14, 48, 33)
+ assert result["published"].utcoffset() == timedelta(hours=-7)
+ assert result["comment_type"] == ["reply"]
+ assert result["in-reply-to"] == [
+ {
+ "type": "cite",
+ "author": {
+ "name": "Lynne Baer",
+ "photo": "http://aaronparecki.com/images/nouns/user.svg",
+ "url": "http://datahiveconsulting.com/author/lynne/",
+ },
+ "content": "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ...",
+ "content-plain": "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ...",
+ "url": "http://datahiveconsulting.com/2014/04/10/indiewebify-me-and-the-knowledge-gap/",
+ "syndication": [],
+ }
+ ]
+ assert result["syndication"] == [
+ "https://twitter.com/aaronpk/status/465247041078034432"
+ ]
+
+
+def test_u_in_reply_to():
+ # reply with simple u-in-reply-to link from snarfed.org
+ parsed = load_test("reply_u-in-reply-to")
+ result = mf.interpret_comment(
+ parsed,
+ "https://snarfed.org/2014-03-09_re-display-likes-in-a-facepile",
+ ["https://willnorris.com/2014/03/display-likes-in-a-facepile"],
+ )
+
+ assert result["type"] == "entry"
+ assert result["name"] == "Re: Display likes in a facepile"
+ assert "oh man, so cool!" in result.get("content")
+ assert result["published"].replace(tzinfo=None) == datetime(2014, 3, 9, 22, 48, 22)
+ assert result["published"].utcoffset() == timedelta(hours=-7)
+ assert result["in-reply-to"] == [
+ {"url": "https://willnorris.com/2014/03/display-likes-in-a-facepile"}
+ ]
+ assert result["comment_type"] == ["reply"]
+
+
+def test_reply_rsvp():
+ parsed = load_test("reply_rsvp")
+ result = mf.interpret_comment(
+ parsed,
+ "https://snarfed.org/2014-05-05_homebrew-website-club-3",
+ ["http://werd.io/2014/homebrew-website-club-4"],
+ )
+
+ assert result["type"] == "entry"
+ assert result["name"] == "Homebrew Website Club"
+ assert '<a class="u-in-reply-to"' in result.get("content")
+ assert result["published"].replace(tzinfo=None) == datetime(2014, 5, 5, 10, 10, 53)
+ assert result["published"].utcoffset() == timedelta(hours=-7)
+ assert result["comment_type"] == ["rsvp", "reply"]
+ assert result["rsvp"] == "yes"
+
+
+def test_reply_invite():
+ parsed = load_test("reply_invite")
+ result = mf.interpret_comment(
+ parsed,
+ "https://www.facebook.com/1565113317092307#10155109753190015",
+ ["https://kylewm.com/2015/03/homebrew-website-club-2015-march-25"],
+ )
+ assert result["name"] == "invited"
+ assert result["comment_type"] == ["invite", "reply"]
+ assert result["invitees"] == [
+ {
+ "name": "Silona Bonewald",
+ "url": "https://www.facebook.com/10155109753190015",
+ "photo": "https://graph.facebook.com/v2.2/10155109753190015/picture?type=large",
+ }
+ ]
+
+
+def test_comment_and_like():
+ parsed = load_test("note_with_comment_and_like")
+ result = mf.interpret(
+ parsed,
+ "https://kylewm.com/2015/10/big-thing-missing-from-my-indieweb-experience-is",
+ )
+ assert result["type"] == "entry"
+
+ assert len(result["comment"]) == 1
+
+ assert result["comment"][0]["type"] == "cite"
+ assert result["comment"][0]["author"] == {
+ "name": "Aaron Parecki",
+ "photo": "https://twitter.com/aaronpk/profile_image?size=original",
+ "url": "http://aaronparecki.com",
+ }
+ assert (
+ result["comment"][0]["content"]
+ == '<a href="https://twitter.com/kylewmahan">@kylewmahan</a> I usually click through a couple levels up looking to see if any of the URLs up the chain show comments <a href="https://twitter.com/search?q=%23indieweb">#indieweb</a>'
+ )
+
+ assert len(result["like"]) == 1
+ assert result["like"][0]["type"] == "cite"
+ assert result["like"][0]["author"] == {
+ "name": "",
+ "url": "https://twitter.com/benwerd",
+ "photo": "https://kylewm.com/imageproxy?url=https%3A%2F%2Ftwitter.com%2Fbenwerd%2Fprofile_image%3Fsize%3Doriginal&size=48&sig=fde7ce5635f5ea132a2545ff5c7d3d33",
+ }
+
+
+def test_article_naive_datetime():
+ parsed = load_test("article_naive_datetime")
+ result = mf.interpret(
+ parsed,
+ "http://tantek.com/2014/120/b1/markup-people-focused-mobile-communication",
+ )
+ assert result["type"] == "entry"
+ assert result["name"] == "Markup For People Focused Mobile Communication"
+ assert "<h2>Action labels not app names</h2>" in result["content"]
+ assert result["published"] == datetime(2014, 4, 30, 12, 11)
+ assert result["updated"] == datetime(2014, 4, 30, 12, 11)
+
+
+def test_article_two_published_dates():
+ """Test for a case that was throwing exceptions. Could not interpret
+ datetime on posts with two dt-published dates because I was
+ concatenating them. Should just take the first instead.
+ """
+ parsed = load_test("article_two_published_dates")
+ result = mf.interpret(parsed, "article.html")
+ assert result["type"] == "entry"
+ assert result["name"] == "Test Article with Two Published Dates"
+ assert result["published"].replace(tzinfo=None) == datetime(2014, 4, 30, 12, 11, 00)
+ assert result["published"].utcoffset() == timedelta(hours=-8)
+
+
+def test_convert_relative_paths():
+ parsed = load_test("relative_paths")
+ result = mf.interpret(parsed, "http://example.com/blog/", base_href="../")
+ assert (
+ result["content"]
+ == 'This is an <img alt="alt text" title="the title" src="http://example.com/static/img.jpg"/> example document with <a href="http://example.com/relative_paths.html">relative paths</a>.'
+ )
+
+
+def test_no_p_name():
+ parsed = load_test("article_no_p-name")
+ result = mf.interpret(parsed, "http://example.com")
+ assert "Give me crayons and I will draw a rocketship." in result["content"]
+ assert "name" not in result
+
+
+def test_p_content():
+ """make sure p-content (instead of the usual e-content) doesn't cause
+ us to throw an exception
+ """
+ parsed = {
+ "items": [
+ {
+ "properties": {
+ "author": [
+ {
+ "properties": {
+ "name": ["Kyle"],
+ "url": ["https://kylewm.com"],
+ },
+ "type": ["h-card"],
+ "value": "Kyle",
+ }
+ ],
+ "content": ["Thanks for hosting!"],
+ "in-reply-to": [
+ "https://snarfed.org/2014-06-16_homebrew-website-club-at-quip"
+ ],
+ "name": [
+ "I'm attending\n Homebrew Website Club at Quip\n Thanks for hosting!\n Kyle"
+ ],
+ "rsvp": ["yes"],
+ },
+ "type": ["h-entry"],
+ }
+ ],
+ "rel-urls": {},
+ "rels": {},
+ }
+ result = mf.interpret(parsed, "http://kylewm.com/test/rsvp.html")
+ assert "Thanks for hosting!" == result.get("content")
+
+
+def test_unusual_properties():
+ parsed = load_test("unusual_properties")
+ result = mf.interpret(parsed, "https://example.com/")
+ assert "Rocky Raccoon" == result.get("name")
+ assert "https://foo.bar/" == result.get("url")
+ assert "https://foo.bar/" == result.get("uid")
+
+
+def test_h_feed_excludes_rel_syndication():
+ """Represents a feed that (incorrectly) includes page-scoped
+ rel=syndication values in the feed itself. If we're not careful,
+ these values will be slurped into every entry in the feed.
+ """
+ parsed = {
+ "items": [
+ {
+ "type": ["h-entry"],
+ "properties": {
+ "name": ["First Post"],
+ "url": ["http://example.com/first-post"],
+ "content": [
+ {
+ "html": "This is the body of the first post",
+ "value": "This is the body of the first post",
+ }
+ ],
+ "syndication": [
+ "https://twitter.com/example_com/123456",
+ "https://www.facebook.com/example.com/123456",
+ ],
+ },
+ },
+ {
+ "type": ["h-event"],
+ "properties": {
+ "name": ["Second Post"],
+ "url": ["http://example.com/second-post"],
+ "content": [
+ {
+ "html": "This is the body of the second post",
+ "value": "This is the body of the second post",
+ }
+ ],
+ "syndication": [
+ "https://twitter.com/example_com/7891011",
+ "https://www.facebook.com/example.com/7891011",
+ ],
+ },
+ },
+ ],
+ "rels": {
+ "syndication": [
+ "https://twitter.com/example_com/123456",
+ "https://twitter.com/example_com/7891011",
+ "https://www.facebook.com/example.com/123456",
+ "https://www.facebook.com/example.com/7891011",
+ ],
+ },
+ }
+ result = mf.interpret_feed(parsed, "http://example.com")
+ assert result["entries"][0]["syndication"] == [
+ "https://twitter.com/example_com/123456",
+ "https://www.facebook.com/example.com/123456",
+ ]
+ assert result["entries"][1]["syndication"] == [
+ "https://twitter.com/example_com/7891011",
+ "https://www.facebook.com/example.com/7891011",
+ ]
+
+
+def test_location_hcard():
+ """Test the location algorithm with an h-card.
+
+ https://indieweb.org/location#How_to_determine_the_location_of_a_microformat
+ """
+ parsed = load_test("location_h-card")
+ result = mf.interpret(parsed, "http://example.com/")
+ assert result["location"] == {
+ "name": "Timeless Coffee Roasters",
+ "latitude": "37.83",
+ "longitude": "-122.25",
+ }
+
+
+def test_location_geo():
+ """Test the location algorithm with an h-geo."""
+ parsed = load_test("location_h-geo")
+ result = mf.interpret(parsed, "http://example.com/")
+ assert result["location"] == {
+ "altitude": "123.0",
+ "latitude": "37.83",
+ "longitude": "-122.25",
+ }
+
+
+def test_location_geo_url():
+ """Test the location algorithm with a u-geo geo: URL.
+
+ http://microformats.org/wiki/microformats2#h-card
+ https://tools.ietf.org/html/rfc5870
+ """
+ parsed = {
+ "items": [
+ {
+ "type": ["h-entry"],
+ "properties": {
+ "geo": ["geo:48.2010,16.3695,183;crs=wgs84;u=40"],
+ },
+ }
+ ]
+ }
+ result = mf.interpret(parsed, "http://example.com/")
+ assert result["location"] == {
+ "altitude": "183",
+ "latitude": "48.2010",
+ "longitude": "16.3695",
+ }
+
+ parsed["items"][0]["properties"]["geo"] = ["geo:48.2010,16.3695"]
+ result = mf.interpret(parsed, "http://example.com/")
+ assert result["location"] == {
+ "latitude": "48.2010",
+ "longitude": "16.3695",
+ }
+
+
+def test_location_adr():
+ """Test the location algorithm with an h-adr."""
+ parsed = load_test("location_h-adr")
+ result = mf.interpret(parsed, "http://example.com/")
+ assert result["location"] == {
+ "street-address": "17 Austerstræti",
+ "locality": "Reykjavík",
+ "country-name": "Iceland",
+ "postal-code": "107",
+ "name": "17 Austerstræti Reykjavík Iceland 107",
+ }
+
+
+def test_location_top_level():
+ """Test the location algorithm with top level properties."""
+ parsed = load_test("location_top_level")
+ result = mf.interpret(parsed, "http://example.com/")
+ assert result["location"] == {
+ "latitude": "37.83",
+ "longitude": "-122.25",
+ }
index 0000000..ef767ce
--- /dev/null
+import sys
+
+import mf
+
+
+def test_is_name_a_title():
+ for name, content, expected in [
+ # simple
+ ("this is the content", "this is the content", False),
+ ("This is a title", "This is some content", True),
+ # common case with no explicit p-name
+ ("nonsensethe contentnonsense", "the content", False),
+ # ignore case, punctuation
+ ("the content", "ThE cONTeNT...", False),
+ # test bytestrings
+ (b"This is a title", b"This is some content", True),
+ ]:
+ assert expected == mf.is_name_a_title(name, content)
index 0000000..edaa7ba
--- /dev/null
+"""
+Tests for post_type_discovery
+
+"""
+
+import json
+
+import mf
+
+
+def test_post_type_discovery():
+ for test, implied_type in [
+ ("interpret/hwc-event", "event"),
+ ("interpret/reply_h-cite", "reply"),
+ ("interpret/reply_u-in-reply-to", "reply"),
+ ("interpret/reply_rsvp", "rsvp"),
+ ("interpret/note_with_comment_and_like", "note"),
+ ("interpret/article_naive_datetime", "article"),
+ ("interpret/article_non_ascii_content", "article"),
+ ("interpret/follow", "follow"),
+ ("posttype/tantek_photo", "photo"),
+ ("posttype/only_html_content", "note"),
+ ("posttype/hcard_no_name", "person"),
+ ("posttype/hcard_org", "org"),
+ # TODO add more tests
+ ]:
+ parsed = json.load(open("tests/" + test + ".json"))
+ types = (
+ ["h-card"] if implied_type in ("person", "org") else ["h-entry", "h-event"]
+ )
+ entry = mf.find_first_entry(parsed, types)
+ assert implied_type == mf.post_type_discovery(entry)
index 0000000..1921b76
--- /dev/null
+"""
+Test representative h-card parsing
+
+"""
+
+import mf
+
+
+def test_url_matches_uid():
+ p = {
+ "rels": {},
+ "items": [
+ {
+ "type": ["h-card"],
+ "properties": {
+ "url": ["http://foo.com/bar", "http://tilde.club/~foobar"],
+ "name": ["Bad"],
+ },
+ },
+ {
+ "type": ["h-card"],
+ "properties": {
+ "url": ["http://foo.com/bar", "http://tilde.club/~foobar"],
+ "uid": ["http://foo.com/bar"],
+ "name": ["Good"],
+ },
+ },
+ ],
+ }
+ hcard = mf.representative_hcard(p, "http://foo.com/bar")
+ assert hcard
+ assert hcard["properties"]["name"][0] == "Good"
+
+ # removing the uid should prevent us from finding the h-card
+ del p["items"][1]["properties"]["uid"]
+ hcard = mf.representative_hcard(p, "http://foo.com/bar")
+ assert not hcard
+
+
+def test_nested_hcard():
+ p = {
+ "rels": {},
+ "items": [
+ {
+ "type": ["h-card"],
+ "properties": {
+ "url": ["http://foo.com/bar", "http://tilde.club/~foobar"],
+ "name": ["Bad"],
+ },
+ },
+ {
+ "type": ["h-entry"],
+ "children": [
+ {
+ "type": ["h-card"],
+ "properties": {
+ "url": ["http://foo.com/bar", "http://tilde.club/~foobar"],
+ "uid": ["http://foo.com/bar"],
+ "name": ["Good"],
+ },
+ },
+ ],
+ },
+ ],
+ }
+ hcard = mf.representative_hcard(p, "http://foo.com/bar")
+ assert hcard
+ assert hcard["properties"]["name"][0] == "Good"
+
+
+def test_url_matches_rel_me():
+ # rel-me points to identity hosted on about.me
+ p = {
+ "rels": {
+ "me": ["http://about.me/foobar"],
+ },
+ "items": [
+ {
+ "type": ["h-card"],
+ "properties": {
+ "url": ["http://tilde.club/~foobar"],
+ "name": ["Bad"],
+ },
+ },
+ {
+ "type": ["h-card"],
+ "properties": {
+ "url": ["http://about.me/foobar", "http://tilde.club/~foobar"],
+ "name": ["Good"],
+ },
+ },
+ ],
+ }
+ hcard = mf.representative_hcard(p, "http://foo.com/bar")
+ assert hcard
+ assert hcard["properties"]["name"][0] == "Good"
+
+
+def test_one_matching_url():
+ p = {
+ "rels": {},
+ "items": [
+ {
+ "type": ["h-card"],
+ "properties": {
+ "url": ["http://tilde.club/~foobar"],
+ "name": ["Bad"],
+ },
+ },
+ {
+ "type": ["h-card"],
+ "properties": {
+ "url": ["http://foo.com/bar", "http://tilde.club/~foobar"],
+ "name": ["Good"],
+ },
+ },
+ ],
+ }
+ hcard = mf.representative_hcard(p, "http://foo.com/bar")
+ assert hcard
+ assert hcard["properties"]["name"][0] == "Good"
+
+ p["items"].append(
+ {
+ "type": ["h-card"],
+ "properties": {
+ "url": ["http://foo.com/bar", "http://flickr.com/photos/foobar"],
+ "name": ["Too Many Cooks"],
+ },
+ }
+ )
+ hcard = mf.representative_hcard(p, "http://foo.com/bar")
+ assert not hcard
+
+
+def test_hcard_as_a_property():
+ """h-card is the p-author of the primary h-feed"""
+ p = {
+ "rels": {},
+ "items": [
+ {
+ "type": ["h-feed"],
+ "properties": {
+ "author": [
+ {
+ "type": ["h-card"],
+ "properties": {
+ "name": ["Elliot Alderson"],
+ "url": ["http://foo.com/bar"],
+ },
+ }
+ ]
+ },
+ }
+ ],
+ }
+ hcard = mf.representative_hcard(p, "http://foo.com/bar")
+ assert hcard
+ assert hcard["properties"]["name"][0] == "Elliot Alderson"