Skip to content

Commit

Permalink
Support CDDL marking and linking
Browse files Browse the repository at this point in the history
This makes Bikeshed process CDDL blocks à la `<pre class=cddl>` as described
in #2072 to:
- add highlighting (done by Pygments, the code merely sets the right class)
- wrap terms in `<dfn>` and `<a>` automatically, making it possible to
define them in, or reference them from, the rest of the prose.

Most of the logic is copied from the logic used to process IDL blocks.

As opposed to IDL definitions, CDDL definitions are not exported by default as
most CDDL definitions only apply to the underlying spec.

Support for CDDL definitions means 4 new definition types get introduced:
- `cddl-type`: roughly the equivalent of an IDL interface. Type definitions do
not have a `data-dfn-for` attribute.
- `cddl-key`: roughly the equivalent of an IDL attribute. Key definitions
always have a `data-dfn-for` attribute that links back to a CDDL type.
- `cddl-value`: roughly the equivalent of an enum-value in IDL. Value
definitions always have a `data-dfn-for` attribute that links back to a CDDL
type or to a CDDL key.
- `cddl-parameter`: used for generic parameter names (noting that no known spec
uses CDDL generics for the time being).

The definition types are prefixed with `cddl-` to avoid collision with other
types used for other purpose (e.g., `value` in CSS).

The code also collects CDDL definitions to produce a CDDL index at the end of
the spec. To accommodate specs like WebDriver BiDi that define two sets of
CDDL (remote end and local end), a mechanism gets added to associate CDDL
definitions with a given module:
1. The CDDL module must be defined with a `<dfn>` with a `data-cddl-module`
attribute set to a shortname for the CDDL module
2. CDDL blocks must add a `data-cddl-module` attribute set to a comma-separated
list of CDDL module shortnames they belong to. If a CDDL block does not have
that attribute, the code considers it is defined in all CDDL modules.

The index is split per module, using the `<dfn>` text as title for each module.

Note: Even when modules are used, CDDL definitions in a spec are part of the
same namespace, meaning a `foo` rule cannot be defined differently within a
single spec for two different CDDL modules.

CDDL parsing is done through a hand-made CDDL parser that follows the CDDL
grammar defined in RFC 8610, currently sitting under:
 https://github.com/tidoust/cddlparser

To ease authoring, a new shorthand notation gets introduced to reference CDDL:
`{^foo^}` is an autolink to a CDDL definition. FWIW, the shorthand was chosen
to mean "shortcut to CDDL code" on the grounds that:
- `{}` indicates a code block (for IDL)
- `^` means "cut" in CDDL

CDDL type definitions can become somewhat convoluted, the code does not
attempt to be too smart and won't autolink definitions that are too complex.
  • Loading branch information
tidoust committed Dec 3, 2024
1 parent c79ddb7 commit 17ea0ee
Show file tree
Hide file tree
Showing 21 changed files with 6,184 additions and 143 deletions.
4 changes: 4 additions & 0 deletions bikeshed/Spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
biblio,
boilerplate,
caniuse,
cddl,
conditional,
constants,
datablocks,
Expand Down Expand Up @@ -288,13 +289,15 @@ def processDocument(self) -> Spec:
u.checkVarHygiene(self)
u.processIssuesAndExamples(self)
idl.markupIDL(self)
cddl.markupCDDL(self)
u.inlineRemoteIssues(self)
u.addImageSize(self)

# Handle all the links
u.processBiblioLinks(self)
u.processDfns(self)
u.processIDL(self)
u.processCDDL(self)
dfns.annotateDfns(self)
u.formatArgumentdefTables(self)
u.formatElementdefTables(self)
Expand All @@ -307,6 +310,7 @@ def processDocument(self) -> Spec:
boilerplate.addReferencesSection(self)
boilerplate.addPropertyIndex(self)
boilerplate.addIDLSection(self)
boilerplate.addCDDLSection(self)
boilerplate.addIssuesSection(self)
boilerplate.addCustomBoilerplate(self)
headings.processHeadings(self, "all") # again
Expand Down
51 changes: 51 additions & 0 deletions bikeshed/boilerplate.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,57 @@ def addIDLSection(doc: t.SpecT) -> None:
del el.attrib["id"]
h.addClass(doc, container, "highlight")

def addCDDLSection(doc: t.SpecT) -> None:
allCddlBlocks = [x for x in h.findAll("pre.cddl, xmp.cddl", doc) if h.isNormative(doc, x)]
if len(allCddlBlocks) == 0:
return
html = getFillContainer("cddl-index", doc=doc, default=True)
if html is None:
return

h.appendChild(
html,
h.E.h2({"class": "no-num no-ref", "id": h.safeID(doc, "cddl-index")}, _t("CDDL Index")),
)

# Specs such as WebDriver BiDi define two sets of CDDL definitions for
# the local and remote ends of the protocol. The convention is that
# these modules need to have a dfn with a "data-cddl-module" attribute
# that contains the module's shortname (the dfn itself provides the label
# for the index). CDDL blocks reference one or more modules through a
# "data-cddl-module" attribute.
# When modules are defined, CDDL blocks that do not reference a module
# are considered to apply to all modules. In particular, they do not create
# a "default" module
cddlModules = [(x.get("data-cddl-module") or "", x.text) for x in h.findAll("dfn[data-cddl-module]", doc)]
if len(cddlModules) == 0:
cddlModules = [("", "")]
for module in cddlModules:
cddlBlocks = []
for block in allCddlBlocks:
forModules = [x.strip() for x in block.get("data-cddl-module", "").split(",")]
if (len(forModules) == 1 and forModules[0] == "") or module[0] in forModules:
cddlBlocks.append(block)
if len(cddlBlocks) == 0:
continue
if module[0] != "":
h.appendChild(
html,
h.E.h3({"class": "no-num no-ref", "id": h.safeID(doc, "cddl-module-" + module[0])}, _t(module[1].capitalize()))
)
container = h.appendChild(html, h.E.pre({"class": "cddl"}))
for block in cddlBlocks:
if h.hasClass(doc, block, "extract"):
continue
blockCopy = copy.deepcopy(block)
h.appendContents(container, blockCopy)
h.appendChild(container, "\n")
for el in h.findAll("[id]", container):
if el.tag == "dfn":
el.tag = "a"
el.set("href", "#" + el.get("id", ""))
del el.attrib["id"]
h.addClass(doc, container, "highlight")

def addTOCSection(doc: t.SpecT) -> None:
toc = getFillContainer("table-of-contents", doc=doc, default=False)
Expand Down
197 changes: 197 additions & 0 deletions bikeshed/cddl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
from __future__ import annotations

import os
import sys
from typing import get_args

from . import config, h, t
from . import messages as m

# TODO: Drop once cddlparser gets published as a Pypi package
basepath = os.path.dirname(os.path.realpath(__file__))
cddlpath = os.path.abspath(os.path.join(basepath, '..', '..'))
sys.path.append(cddlpath)
import cddlparser

class CDDLMarker(cddlparser.ast.Marker):
'''
Marker that wraps CDDL definitions and references in <cddl> and <a> blocks
so that cross-referencing logic may take place.
'''
def serializeValue(self, prefix: str, value: str, suffix: str, node: cddlparser.ast.CDDLNode) -> str:
name = prefix + value + suffix
if node.type != 'text' and node.type != 'bytes':
return name
parent = node.parentNode
if isinstance(parent, cddlparser.ast.Memberkey) and node.type == 'text':
# A literal text string also gives rise to a type
# see RFC 8610, section 3.5.1:
# https://datatracker.ietf.org/doc/html/rfc8610#section-3.5.1
forName = self._getFor(parent.parentNode)
if forName is None:
# Cannot easily link member key back to a definition
return name
else:
# Name of created type should not include the quotes
return '<cddl data-cddl-type="key" data-cddl-for="{}" data-lt="{}">{}</cddl>'.format(h.escapeAttr(forName), h.escapeAttr(value), name)
elif isinstance(parent, cddlparser.ast.Operator) and parent.controller == node:
# Probably a ".default" value. It may be possible to link the value
# back to an enumeration but it's equally possible that this is just
# a string that's not defined anywhere. Let's ignore.
return name
else:
forName = self._getFor(node)
if forName is None:
return name
else:
return '<cddl data-cddl-type="value" data-cddl-for="{}" data-lt="{}">{}</cddl>'.format(h.escapeAttr(forName), h.escapeAttr(name), name)

def serializeName(self, name: str, node: cddlparser.ast.CDDLNode) -> str:
# The node is a Typename. Such a node may appear in a Rule, a Type,
# a Reference, a Memberkey, a GroupEntry, or GenericParameters
parent = node.parentNode
if isinstance(parent, cddlparser.ast.Rule):
# Rule definition
if parent.assign.type == cddlparser.Tokens.TCHOICEALT or parent.assign.type == cddlparser.Tokens.GCHOICEALT:
# The definition extends a base definition
return '<a data-link-type="cddl" data-link-for="/">{}</a>'.format(name)
else:
return '<cddl data-cddl-type="type" data-lt="{}">{}</cddl>'.format(h.escapeAttr(name), name)
elif isinstance(parent, cddlparser.ast.Memberkey):
# Member definition
if not parent.hasColon:
# The key is actually a reference to a type
if name in get_args(cddlparser.ast.PreludeType):
# From the CDDL prelude, nothing to link to
return name
else:
return '<a data-link-type="cddl" data-link-for="/">{}</a>'.format(name)
forName = self._getFor(parent.parentNode)
if forName is None:
# Cannot easily link member key back to a definition
return name
else:
return '<cddl data-cddl-type="key" data-cddl-for="{}" data-lt="{}">{}</cddl>'.format(h.escapeAttr(forName), h.escapeAttr(name), name)
elif isinstance(parent, cddlparser.ast.GenericParameters):
typename = parent.parentNode
assert isinstance(typename, cddlparser.ast.Typename)
return '<cddl data-cddl-type="parameter" data-cddl-for="{}" data-lt="{}">{}</cddl>'.format(h.escapeAttr(typename.name), h.escapeAttr(name), name)
elif name in get_args(cddlparser.ast.PreludeType):
# Do not link types that come from the CDDL prelude
# defined in RFC 8610
return name
else:
return '<a data-link-type="cddl" data-link-for="/">{}</a>'.format(name)

def _getFor(self, node: cddlparser.ast.CDDLNode) -> str | None:
'''
Retrieve the "for" attribute for the node.
'''
parent = node.parentNode
while parent is not None:
if isinstance(parent, cddlparser.ast.Rule):
# Something defined in a rule
return parent.name.name
elif isinstance(parent, cddlparser.ast.GroupEntry) and parent.key is not None:
# A type in a member key definition
parentFor = self._getFor(parent.parentNode)
if parentFor is None:
return parentFor
if isinstance(parent.key.type, cddlparser.ast.Value) and parent.key.type.type == "text":
return parentFor + "/" + parent.key.type.value
elif isinstance(parent.key.type, cddlparser.ast.Typename):
return parentFor + "/" + parent.key.type.name
else:
return None
parent = parent.parentNode
return None

def markupCDDL(doc: t.SpecT) -> None:
cddlEls = h.findAll("pre.cddl:not([data-no-cddl]), xmp.cddl:not([data-no-cddl])", doc)

marker = CDDLMarker()
for el in cddlEls:
if h.isNormative(doc, el):
text = h.textContent(el)
try:
ast = cddlparser.parse(text)
h.replaceContents(el, h.parseHTML(ast.serialize(marker)))
except Exception as err:
m.die(f"{err}\nInvalid CDDL block (first 100 characters):\n{text[0:100]}{'...' if len(text) > 100 else ''}")
h.addClass(doc, el, "highlight")
doc.extraJC.addCDDLHighlighting()

def markupCDDLBlock(pre: t.ElementT, doc: t.SpecT) -> set[t.ElementT]:
'''
Convert <cddl> blocks into "dfn" or links.
'''
localDfns = set()
forcedDfns = []
for x in (h.treeAttr(pre, "data-dfn-force") or "").split():
x = x.strip()
if x.endswith("<interface>"):
x = x[:-11]
forcedDfns.append(x)
for el in h.findAll("cddl", pre):
# Prefix CDDL types with "cddl-" to avoid confusion with other
# types (notably CSS ones such as "value")
cddlType = "cddl-" + el.get("data-cddl-type")
assert isinstance(cddlType, str)
url = None
forceDfn = False
ref = None
cddlText = None
for cddlText in (el.get("data-lt") or "").split("|"):
if cddlType == "interface" and cddlText in forcedDfns:
forceDfn = True
linkFors = t.cast("list[str|None]", config.splitForValues(el.get("data-cddl-for", ""))) or [None]
for linkFor in linkFors:
ref = doc.refs.getRef(
cddlType,
cddlText,
linkFor=linkFor,
status="local",
el=el,
error=False,
)
if ref:
url = ref.url
break
if ref:
break
if url is None or forceDfn:
el.tag = "dfn"
el.set("data-dfn-type", cddlType)
del el.attrib["data-cddl-type"]
if el.get("data-cddl-for"):
el.set("data-dfn-for", el.get("data-cddl-for") or "")
del el.attrib["data-cddl-for"]
else:
# Copy over the auto-generated linking text to the manual dfn.
dfn = h.find(url, doc)
# How in the hell does this work, the url is not a selector???
assert dfn is not None
lts = combineCDDLLinkingTexts(el.get("data-lt"), dfn.get("data-lt"))
dfn.set("data-lt", lts)
localDfns.add(dfn)

# Reset the <cddl> element to be a link to the manual dfn.
el.tag = "a"
el.set("data-link-type", cddlType)
el.set("data-lt", cddlText)
del el.attrib["data-cddl-type"]
if el.get("data-cddl-for"):
el.set("data-link-for", el.get("data-cddl-for") or "")
del el.attrib["data-cddl-for"]
if el.get("id"):
# ID was defensively added by the Marker.
del el.attrib["id"]
return localDfns

def combineCDDLLinkingTexts(t1: str | None, t2: str | None) -> str:
t1s = (t1 or "").split("|")
t2s = (t2 or "").split("|")
for lt in t2s:
if lt not in t1s:
t1s.append(lt)
return "|".join(t1s)
1 change: 1 addition & 0 deletions bikeshed/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .dfnTypes import (
adjustKey,
anchorishElements,
cddlTypes,
cssTypes,
dfnClassToType,
dfnElements,
Expand Down
11 changes: 8 additions & 3 deletions bikeshed/config/dfnTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,17 @@
"facetdef": "facet",
"http-headerdef": "http-header",
"permissiondef": "permission",
"cddl-type": "cddl-type",
"cddl-key": "cddl-key",
"cddl-parameter": "cddl-parameter",
"cddl-value": "cddl-value",
}


dfnTypes = frozenset(list(dfnClassToType.values()) + ["dfn"])
maybeTypes = frozenset(["value", "type", "at-rule", "function", "selector"])
cssTypes = frozenset(["property", "value", "at-rule", "descriptor", "type", "function", "selector"])
markupTypes = frozenset(["element", "element-attr", "element-state", "attr-value"])
cddlTypes = frozenset(["cddl-type", "cddl-key", "cddl-parameter", "cddl-value"])
idlTypes = frozenset(
[
"event",
Expand Down Expand Up @@ -83,7 +87,7 @@
idlNameTypes = frozenset(["interface", "namespace", "dictionary", "enum", "typedef", "callback"])
functionishTypes = frozenset(["function", "method", "constructor", "stringifier"])
idlMethodTypes = frozenset(["method", "constructor", "stringifier", "idl", "idl-name"])
linkTypes = dfnTypes | frozenset(["propdesc", "functionish", "idl", "idl-name", "element-sub", "maybe", "biblio"])
linkTypes = dfnTypes | frozenset(["propdesc", "functionish", "idl", "idl-name", "element-sub", "maybe", "biblio", "cddl"])
typesUsingFor = frozenset(
[
"descriptor",
Expand Down Expand Up @@ -168,11 +172,12 @@ def adjustKey(text: str, type: str) -> tuple[str, str]:
"functionish": functionishTypes,
"idl": idlTypes,
"idl-name": idlNameTypes,
"cddl": cddlTypes,
"element-sub": frozenset(["element-attr", "element-state"]),
"maybe": maybeTypes,
"dfn": frozenset(["dfn"]),
"biblio": frozenset(["biblio"]),
"codelike": frozenset(["element", "element-attr", "element-state", "attr-value"]) | idlTypes,
"codelike": frozenset(["element", "element-attr", "element-state", "attr-value"]) | idlTypes | cddlTypes,
"all": linkTypes,
}
for dfnType in dfnClassToType.values():
Expand Down
4 changes: 2 additions & 2 deletions bikeshed/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def __init__(self) -> None:
self.mailingList: str | None = None
self.mailingListArchives: str | None = None
self.markupShorthands: config.BoolSet = config.BoolSet(
["css", "dfn", "biblio", "markup", "http", "idl", "algorithm"],
["css", "dfn", "biblio", "markup", "http", "idl", "cddl", "algorithm"],
)
self.maxToCDepth: int | float | None = float("inf")
self.metadataInclude: config.BoolSet = config.BoolSet(default=True)
Expand Down Expand Up @@ -747,7 +747,7 @@ def parseMarkupShorthands(key: str, val: str, lineNum: str | int | None) -> conf
# TODO: Just call parseBoolistList instead
vals = [v.strip() for v in val.lower().split(",")]
ret = config.BoolSet(default=False)
validCategories = frozenset(["css", "markup", "dfn", "biblio", "http", "idl", "markdown", "algorithm"])
validCategories = frozenset(["css", "markup", "dfn", "biblio", "http", "idl", "cddl", "markdown", "algorithm"])
for v in vals:
pieces = v.split()
if len(pieces) != 2:
Expand Down
Loading

0 comments on commit 17ea0ee

Please sign in to comment.