254 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			254 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from __future__ import annotations
 | |
| 
 | |
| import re
 | |
| from functools import lru_cache
 | |
| from itertools import chain, count
 | |
| from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple
 | |
| 
 | |
| try:
 | |
|     from lxml import etree
 | |
| except ImportError:
 | |
|     # lxml is required for subsetting SVG, but we prefer to delay the import error
 | |
|     # until subset_glyphs() is called (i.e. if font to subset has an 'SVG ' table)
 | |
|     etree = None
 | |
| 
 | |
| from fontTools import ttLib
 | |
| from fontTools.subset.util import _add_method
 | |
| from fontTools.ttLib.tables.S_V_G_ import SVGDocument
 | |
| 
 | |
| 
 | |
| __all__ = ["subset_glyphs"]
 | |
| 
 | |
| 
 | |
| GID_RE = re.compile(r"^glyph(\d+)$")
 | |
| 
 | |
| NAMESPACES = {
 | |
|     "svg": "http://www.w3.org/2000/svg",
 | |
|     "xlink": "http://www.w3.org/1999/xlink",
 | |
| }
 | |
| XLINK_HREF = f'{{{NAMESPACES["xlink"]}}}href'
 | |
| 
 | |
| 
 | |
| # TODO(antrotype): Replace with functools.cache once we are 3.9+
 | |
| @lru_cache(maxsize=None)
 | |
| def xpath(path):
 | |
|     # compile XPath upfront, caching result to reuse on multiple elements
 | |
|     return etree.XPath(path, namespaces=NAMESPACES)
 | |
| 
 | |
| 
 | |
| def group_elements_by_id(tree: etree.Element) -> Dict[str, etree.Element]:
 | |
|     # select all svg elements with 'id' attribute no matter where they are
 | |
|     # including the root element itself:
 | |
|     # https://github.com/fonttools/fonttools/issues/2548
 | |
|     return {el.attrib["id"]: el for el in xpath("//svg:*[@id]")(tree)}
 | |
| 
 | |
| 
 | |
| def parse_css_declarations(style_attr: str) -> Dict[str, str]:
 | |
|     # https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/style
 | |
|     # https://developer.mozilla.org/en-US/docs/Web/CSS/Syntax#css_declarations
 | |
|     result = {}
 | |
|     for declaration in style_attr.split(";"):
 | |
|         if declaration.count(":") == 1:
 | |
|             property_name, value = declaration.split(":")
 | |
|             property_name = property_name.strip()
 | |
|             result[property_name] = value.strip()
 | |
|         elif declaration.strip():
 | |
|             raise ValueError(f"Invalid CSS declaration syntax: {declaration}")
 | |
|     return result
 | |
| 
 | |
| 
 | |
| def iter_referenced_ids(tree: etree.Element) -> Iterator[str]:
 | |
|     # Yield all the ids that can be reached via references from this element tree.
 | |
|     # We currently support xlink:href (as used by <use> and gradient templates),
 | |
|     # and local url(#...) links found in fill or clip-path attributes
 | |
|     # TODO(anthrotype): Check we aren't missing other supported kinds of reference
 | |
|     find_svg_elements_with_references = xpath(
 | |
|         ".//svg:*[ "
 | |
|         "starts-with(@xlink:href, '#') "
 | |
|         "or starts-with(@fill, 'url(#') "
 | |
|         "or starts-with(@clip-path, 'url(#') "
 | |
|         "or contains(@style, ':url(#') "
 | |
|         "]",
 | |
|     )
 | |
|     for el in chain([tree], find_svg_elements_with_references(tree)):
 | |
|         ref_id = href_local_target(el)
 | |
|         if ref_id is not None:
 | |
|             yield ref_id
 | |
| 
 | |
|         attrs = el.attrib
 | |
|         if "style" in attrs:
 | |
|             attrs = {**dict(attrs), **parse_css_declarations(el.attrib["style"])}
 | |
|         for attr in ("fill", "clip-path"):
 | |
|             if attr in attrs:
 | |
|                 value = attrs[attr]
 | |
|                 if value.startswith("url(#") and value.endswith(")"):
 | |
|                     ref_id = value[5:-1]
 | |
|                     assert ref_id
 | |
|                     yield ref_id
 | |
| 
 | |
| 
 | |
| def closure_element_ids(
 | |
|     elements: Dict[str, etree.Element], element_ids: Set[str]
 | |
| ) -> None:
 | |
|     # Expand the initial subset of element ids to include ids that can be reached
 | |
|     # via references from the initial set.
 | |
|     unvisited = element_ids
 | |
|     while unvisited:
 | |
|         referenced: Set[str] = set()
 | |
|         for el_id in unvisited:
 | |
|             if el_id not in elements:
 | |
|                 # ignore dangling reference; not our job to validate svg
 | |
|                 continue
 | |
|             referenced.update(iter_referenced_ids(elements[el_id]))
 | |
|         referenced -= element_ids
 | |
|         element_ids.update(referenced)
 | |
|         unvisited = referenced
 | |
| 
 | |
| 
 | |
| def subset_elements(el: etree.Element, retained_ids: Set[str]) -> bool:
 | |
|     # Keep elements if their id is in the subset, or any of their children's id is.
 | |
|     # Drop elements whose id is not in the subset, and either have no children,
 | |
|     # or all their children are being dropped.
 | |
|     if el.attrib.get("id") in retained_ids:
 | |
|         # if id is in the set, don't recurse; keep whole subtree
 | |
|         return True
 | |
|     # recursively subset all the children; we use a list comprehension instead
 | |
|     # of a parentheses-less generator expression because we don't want any() to
 | |
|     # short-circuit, as our function has a side effect of dropping empty elements.
 | |
|     if any([subset_elements(e, retained_ids) for e in el]):
 | |
|         return True
 | |
|     assert len(el) == 0
 | |
|     parent = el.getparent()
 | |
|     if parent is not None:
 | |
|         parent.remove(el)
 | |
|     return False
 | |
| 
 | |
| 
 | |
| def remap_glyph_ids(
 | |
|     svg: etree.Element, glyph_index_map: Dict[int, int]
 | |
| ) -> Dict[str, str]:
 | |
|     # Given {old_gid: new_gid} map, rename all elements containing id="glyph{gid}"
 | |
|     # special attributes
 | |
|     elements = group_elements_by_id(svg)
 | |
|     id_map = {}
 | |
|     for el_id, el in elements.items():
 | |
|         m = GID_RE.match(el_id)
 | |
|         if not m:
 | |
|             continue
 | |
|         old_index = int(m.group(1))
 | |
|         new_index = glyph_index_map.get(old_index)
 | |
|         if new_index is not None:
 | |
|             if old_index == new_index:
 | |
|                 continue
 | |
|             new_id = f"glyph{new_index}"
 | |
|         else:
 | |
|             # If the old index is missing, the element correspond to a glyph that was
 | |
|             # excluded from the font's subset.
 | |
|             # We rename it to avoid clashes with the new GIDs or other element ids.
 | |
|             new_id = f".{el_id}"
 | |
|             n = count(1)
 | |
|             while new_id in elements:
 | |
|                 new_id = f"{new_id}.{next(n)}"
 | |
| 
 | |
|         id_map[el_id] = new_id
 | |
|         el.attrib["id"] = new_id
 | |
| 
 | |
|     return id_map
 | |
| 
 | |
| 
 | |
| def href_local_target(el: etree.Element) -> Optional[str]:
 | |
|     if XLINK_HREF in el.attrib:
 | |
|         href = el.attrib[XLINK_HREF]
 | |
|         if href.startswith("#") and len(href) > 1:
 | |
|             return href[1:]  # drop the leading #
 | |
|     return None
 | |
| 
 | |
| 
 | |
| def update_glyph_href_links(svg: etree.Element, id_map: Dict[str, str]) -> None:
 | |
|     # update all xlink:href="#glyph..." attributes to point to the new glyph ids
 | |
|     for el in xpath(".//svg:*[starts-with(@xlink:href, '#glyph')]")(svg):
 | |
|         old_id = href_local_target(el)
 | |
|         assert old_id is not None
 | |
|         if old_id in id_map:
 | |
|             new_id = id_map[old_id]
 | |
|             el.attrib[XLINK_HREF] = f"#{new_id}"
 | |
| 
 | |
| 
 | |
| def ranges(ints: Iterable[int]) -> Iterator[Tuple[int, int]]:
 | |
|     # Yield sorted, non-overlapping (min, max) ranges of consecutive integers
 | |
|     sorted_ints = iter(sorted(set(ints)))
 | |
|     try:
 | |
|         start = end = next(sorted_ints)
 | |
|     except StopIteration:
 | |
|         return
 | |
|     for v in sorted_ints:
 | |
|         if v - 1 == end:
 | |
|             end = v
 | |
|         else:
 | |
|             yield (start, end)
 | |
|             start = end = v
 | |
|     yield (start, end)
 | |
| 
 | |
| 
 | |
| @_add_method(ttLib.getTableClass("SVG "))
 | |
| def subset_glyphs(self, s) -> bool:
 | |
|     if etree is None:
 | |
|         raise ImportError("No module named 'lxml', required to subset SVG")
 | |
| 
 | |
|     # glyph names (before subsetting)
 | |
|     glyph_order: List[str] = s.orig_glyph_order
 | |
|     # map from glyph names to original glyph indices
 | |
|     rev_orig_glyph_map: Dict[str, int] = s.reverseOrigGlyphMap
 | |
|     # map from original to new glyph indices (after subsetting)
 | |
|     glyph_index_map: Dict[int, int] = s.glyph_index_map
 | |
| 
 | |
|     new_docs: List[SVGDocument] = []
 | |
|     for doc in self.docList:
 | |
|         glyphs = {
 | |
|             glyph_order[i] for i in range(doc.startGlyphID, doc.endGlyphID + 1)
 | |
|         }.intersection(s.glyphs)
 | |
|         if not glyphs:
 | |
|             # no intersection: we can drop the whole record
 | |
|             continue
 | |
| 
 | |
|         svg = etree.fromstring(
 | |
|             # encode because fromstring dislikes xml encoding decl if input is str.
 | |
|             # SVG xml encoding must be utf-8 as per OT spec.
 | |
|             doc.data.encode("utf-8"),
 | |
|             parser=etree.XMLParser(
 | |
|                 # Disable libxml2 security restrictions to support very deep trees.
 | |
|                 # Without this we would get an error like this:
 | |
|                 # `lxml.etree.XMLSyntaxError: internal error: Huge input lookup`
 | |
|                 # when parsing big fonts e.g. noto-emoji-picosvg.ttf.
 | |
|                 huge_tree=True,
 | |
|                 # ignore blank text as it's not meaningful in OT-SVG; it also prevents
 | |
|                 # dangling tail text after removing an element when pretty_print=True
 | |
|                 remove_blank_text=True,
 | |
|                 # don't replace entities; we don't expect any in OT-SVG and they may
 | |
|                 # be abused for XXE attacks
 | |
|                 resolve_entities=False,
 | |
|             ),
 | |
|         )
 | |
| 
 | |
|         elements = group_elements_by_id(svg)
 | |
|         gids = {rev_orig_glyph_map[g] for g in glyphs}
 | |
|         element_ids = {f"glyph{i}" for i in gids}
 | |
|         closure_element_ids(elements, element_ids)
 | |
| 
 | |
|         if not subset_elements(svg, element_ids):
 | |
|             continue
 | |
| 
 | |
|         if not s.options.retain_gids:
 | |
|             id_map = remap_glyph_ids(svg, glyph_index_map)
 | |
|             update_glyph_href_links(svg, id_map)
 | |
| 
 | |
|         new_doc = etree.tostring(svg, pretty_print=s.options.pretty_svg).decode("utf-8")
 | |
| 
 | |
|         new_gids = (glyph_index_map[i] for i in gids)
 | |
|         for start, end in ranges(new_gids):
 | |
|             new_docs.append(SVGDocument(new_doc, start, end, doc.compressed))
 | |
| 
 | |
|     self.docList = new_docs
 | |
| 
 | |
|     return bool(self.docList)
 |