440 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			440 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import logging
 | |
| import os
 | |
| from collections import defaultdict, namedtuple
 | |
| from dataclasses import dataclass
 | |
| from functools import cached_property, reduce
 | |
| from itertools import chain
 | |
| from math import log2
 | |
| from typing import DefaultDict, Dict, Iterable, List, Sequence, Tuple
 | |
| 
 | |
| from fontTools.config import OPTIONS
 | |
| from fontTools.misc.intTools import bit_count, bit_indices
 | |
| from fontTools.ttLib import TTFont
 | |
| from fontTools.ttLib.tables import otBase, otTables
 | |
| 
 | |
| log = logging.getLogger(__name__)
 | |
| 
 | |
| COMPRESSION_LEVEL = OPTIONS[f"{__name__}:COMPRESSION_LEVEL"]
 | |
| 
 | |
| # Kept because ufo2ft depends on it, to be removed once ufo2ft uses the config instead
 | |
| # https://github.com/fonttools/fonttools/issues/2592
 | |
| GPOS_COMPACT_MODE_ENV_KEY = "FONTTOOLS_GPOS_COMPACT_MODE"
 | |
| GPOS_COMPACT_MODE_DEFAULT = str(COMPRESSION_LEVEL.default)
 | |
| 
 | |
| 
 | |
| def _compression_level_from_env() -> int:
 | |
|     env_level = GPOS_COMPACT_MODE_DEFAULT
 | |
|     if GPOS_COMPACT_MODE_ENV_KEY in os.environ:
 | |
|         import warnings
 | |
| 
 | |
|         warnings.warn(
 | |
|             f"'{GPOS_COMPACT_MODE_ENV_KEY}' environment variable is deprecated. "
 | |
|             "Please set the 'fontTools.otlLib.optimize.gpos:COMPRESSION_LEVEL' option "
 | |
|             "in TTFont.cfg.",
 | |
|             DeprecationWarning,
 | |
|         )
 | |
| 
 | |
|         env_level = os.environ[GPOS_COMPACT_MODE_ENV_KEY]
 | |
|     if len(env_level) == 1 and env_level in "0123456789":
 | |
|         return int(env_level)
 | |
|     raise ValueError(f"Bad {GPOS_COMPACT_MODE_ENV_KEY}={env_level}")
 | |
| 
 | |
| 
 | |
| def compact(font: TTFont, level: int) -> TTFont:
 | |
|     # Ideal plan:
 | |
|     #  1. Find lookups of Lookup Type 2: Pair Adjustment Positioning Subtable
 | |
|     #     https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#lookup-type-2-pair-adjustment-positioning-subtable
 | |
|     #  2. Extract glyph-glyph kerning and class-kerning from all present subtables
 | |
|     #  3. Regroup into different subtable arrangements
 | |
|     #  4. Put back into the lookup
 | |
|     #
 | |
|     # Actual implementation:
 | |
|     #  2. Only class kerning is optimized currently
 | |
|     #  3. If the input kerning is already in several subtables, the subtables
 | |
|     #     are not grouped together first; instead each subtable is treated
 | |
|     #     independently, so currently this step is:
 | |
|     #     Split existing subtables into more smaller subtables
 | |
|     gpos = font.get("GPOS")
 | |
| 
 | |
|     # If the font does not contain a GPOS table, there is nothing to do.
 | |
|     if gpos is None:
 | |
|         return font
 | |
| 
 | |
|     for lookup in gpos.table.LookupList.Lookup:
 | |
|         if lookup.LookupType == 2:
 | |
|             compact_lookup(font, level, lookup)
 | |
|         elif lookup.LookupType == 9 and lookup.SubTable[0].ExtensionLookupType == 2:
 | |
|             compact_ext_lookup(font, level, lookup)
 | |
| 
 | |
|     return font
 | |
| 
 | |
| 
 | |
| def compact_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None:
 | |
|     new_subtables = compact_pair_pos(font, level, lookup.SubTable)
 | |
|     lookup.SubTable = new_subtables
 | |
|     lookup.SubTableCount = len(new_subtables)
 | |
| 
 | |
| 
 | |
| def compact_ext_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None:
 | |
|     new_subtables = compact_pair_pos(
 | |
|         font, level, [ext_subtable.ExtSubTable for ext_subtable in lookup.SubTable]
 | |
|     )
 | |
|     new_ext_subtables = []
 | |
|     for subtable in new_subtables:
 | |
|         ext_subtable = otTables.ExtensionPos()
 | |
|         ext_subtable.Format = 1
 | |
|         ext_subtable.ExtSubTable = subtable
 | |
|         new_ext_subtables.append(ext_subtable)
 | |
|     lookup.SubTable = new_ext_subtables
 | |
|     lookup.SubTableCount = len(new_ext_subtables)
 | |
| 
 | |
| 
 | |
| def compact_pair_pos(
 | |
|     font: TTFont, level: int, subtables: Sequence[otTables.PairPos]
 | |
| ) -> Sequence[otTables.PairPos]:
 | |
|     new_subtables = []
 | |
|     for subtable in subtables:
 | |
|         if subtable.Format == 1:
 | |
|             # Not doing anything to Format 1 (yet?)
 | |
|             new_subtables.append(subtable)
 | |
|         elif subtable.Format == 2:
 | |
|             new_subtables.extend(compact_class_pairs(font, level, subtable))
 | |
|     return new_subtables
 | |
| 
 | |
| 
 | |
| def compact_class_pairs(
 | |
|     font: TTFont, level: int, subtable: otTables.PairPos
 | |
| ) -> List[otTables.PairPos]:
 | |
|     from fontTools.otlLib.builder import buildPairPosClassesSubtable
 | |
| 
 | |
|     subtables = []
 | |
|     classes1: DefaultDict[int, List[str]] = defaultdict(list)
 | |
|     for g in subtable.Coverage.glyphs:
 | |
|         classes1[subtable.ClassDef1.classDefs.get(g, 0)].append(g)
 | |
|     classes2: DefaultDict[int, List[str]] = defaultdict(list)
 | |
|     for g, i in subtable.ClassDef2.classDefs.items():
 | |
|         classes2[i].append(g)
 | |
|     all_pairs = {}
 | |
|     for i, class1 in enumerate(subtable.Class1Record):
 | |
|         for j, class2 in enumerate(class1.Class2Record):
 | |
|             if is_really_zero(class2):
 | |
|                 continue
 | |
|             all_pairs[(tuple(sorted(classes1[i])), tuple(sorted(classes2[j])))] = (
 | |
|                 getattr(class2, "Value1", None),
 | |
|                 getattr(class2, "Value2", None),
 | |
|             )
 | |
|     grouped_pairs = cluster_pairs_by_class2_coverage_custom_cost(font, all_pairs, level)
 | |
|     for pairs in grouped_pairs:
 | |
|         subtables.append(buildPairPosClassesSubtable(pairs, font.getReverseGlyphMap()))
 | |
|     return subtables
 | |
| 
 | |
| 
 | |
| def is_really_zero(class2: otTables.Class2Record) -> bool:
 | |
|     v1 = getattr(class2, "Value1", None)
 | |
|     v2 = getattr(class2, "Value2", None)
 | |
|     return (v1 is None or v1.getEffectiveFormat() == 0) and (
 | |
|         v2 is None or v2.getEffectiveFormat() == 0
 | |
|     )
 | |
| 
 | |
| 
 | |
| Pairs = Dict[
 | |
|     Tuple[Tuple[str, ...], Tuple[str, ...]],
 | |
|     Tuple[otBase.ValueRecord, otBase.ValueRecord],
 | |
| ]
 | |
| 
 | |
| 
 | |
| # Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L935-L958
 | |
| def _getClassRanges(glyphIDs: Iterable[int]):
 | |
|     glyphIDs = sorted(glyphIDs)
 | |
|     last = glyphIDs[0]
 | |
|     ranges = [[last]]
 | |
|     for glyphID in glyphIDs[1:]:
 | |
|         if glyphID != last + 1:
 | |
|             ranges[-1].append(last)
 | |
|             ranges.append([glyphID])
 | |
|         last = glyphID
 | |
|     ranges[-1].append(last)
 | |
|     return ranges, glyphIDs[0], glyphIDs[-1]
 | |
| 
 | |
| 
 | |
| # Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L960-L989
 | |
| def _classDef_bytes(
 | |
|     class_data: List[Tuple[List[Tuple[int, int]], int, int]],
 | |
|     class_ids: List[int],
 | |
|     coverage=False,
 | |
| ):
 | |
|     if not class_ids:
 | |
|         return 0
 | |
|     first_ranges, min_glyph_id, max_glyph_id = class_data[class_ids[0]]
 | |
|     range_count = len(first_ranges)
 | |
|     for i in class_ids[1:]:
 | |
|         data = class_data[i]
 | |
|         range_count += len(data[0])
 | |
|         min_glyph_id = min(min_glyph_id, data[1])
 | |
|         max_glyph_id = max(max_glyph_id, data[2])
 | |
|     glyphCount = max_glyph_id - min_glyph_id + 1
 | |
|     # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-1
 | |
|     format1_bytes = 6 + glyphCount * 2
 | |
|     # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-2
 | |
|     format2_bytes = 4 + range_count * 6
 | |
|     return min(format1_bytes, format2_bytes)
 | |
| 
 | |
| 
 | |
| ClusteringContext = namedtuple(
 | |
|     "ClusteringContext",
 | |
|     [
 | |
|         "lines",
 | |
|         "all_class1",
 | |
|         "all_class1_data",
 | |
|         "all_class2_data",
 | |
|         "valueFormat1_bytes",
 | |
|         "valueFormat2_bytes",
 | |
|     ],
 | |
| )
 | |
| 
 | |
| 
 | |
| @dataclass
 | |
| class Cluster:
 | |
|     ctx: ClusteringContext
 | |
|     indices_bitmask: int
 | |
| 
 | |
|     @cached_property
 | |
|     def indices(self):
 | |
|         return bit_indices(self.indices_bitmask)
 | |
| 
 | |
|     @cached_property
 | |
|     def column_indices(self):
 | |
|         # Indices of columns that have a 1 in at least 1 line
 | |
|         #   => binary OR all the lines
 | |
|         bitmask = reduce(int.__or__, (self.ctx.lines[i] for i in self.indices))
 | |
|         return bit_indices(bitmask)
 | |
| 
 | |
|     @property
 | |
|     def width(self):
 | |
|         # Add 1 because Class2=0 cannot be used but needs to be encoded.
 | |
|         return len(self.column_indices) + 1
 | |
| 
 | |
|     @cached_property
 | |
|     def cost(self):
 | |
|         return (
 | |
|             # 2 bytes to store the offset to this subtable in the Lookup table above
 | |
|             2
 | |
|             # Contents of the subtable
 | |
|             # From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment
 | |
|             # uint16	posFormat	Format identifier: format = 2
 | |
|             + 2
 | |
|             # Offset16	coverageOffset	Offset to Coverage table, from beginning of PairPos subtable.
 | |
|             + 2
 | |
|             + self.coverage_bytes
 | |
|             # uint16	valueFormat1	ValueRecord definition — for the first glyph of the pair (may be zero).
 | |
|             + 2
 | |
|             # uint16	valueFormat2	ValueRecord definition — for the second glyph of the pair (may be zero).
 | |
|             + 2
 | |
|             # Offset16	classDef1Offset	Offset to ClassDef table, from beginning of PairPos subtable — for the first glyph of the pair.
 | |
|             + 2
 | |
|             + self.classDef1_bytes
 | |
|             # Offset16	classDef2Offset	Offset to ClassDef table, from beginning of PairPos subtable — for the second glyph of the pair.
 | |
|             + 2
 | |
|             + self.classDef2_bytes
 | |
|             # uint16	class1Count	Number of classes in classDef1 table — includes Class 0.
 | |
|             + 2
 | |
|             # uint16	class2Count	Number of classes in classDef2 table — includes Class 0.
 | |
|             + 2
 | |
|             # Class1Record	class1Records[class1Count]	Array of Class1 records, ordered by classes in classDef1.
 | |
|             + (self.ctx.valueFormat1_bytes + self.ctx.valueFormat2_bytes)
 | |
|             * len(self.indices)
 | |
|             * self.width
 | |
|         )
 | |
| 
 | |
|     @property
 | |
|     def coverage_bytes(self):
 | |
|         format1_bytes = (
 | |
|             # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-1
 | |
|             # uint16	coverageFormat	Format identifier — format = 1
 | |
|             # uint16	glyphCount	Number of glyphs in the glyph array
 | |
|             4
 | |
|             # uint16	glyphArray[glyphCount]	Array of glyph IDs — in numerical order
 | |
|             + sum(len(self.ctx.all_class1[i]) for i in self.indices) * 2
 | |
|         )
 | |
|         ranges = sorted(
 | |
|             chain.from_iterable(self.ctx.all_class1_data[i][0] for i in self.indices)
 | |
|         )
 | |
|         merged_range_count = 0
 | |
|         last = None
 | |
|         for start, end in ranges:
 | |
|             if last is not None and start != last + 1:
 | |
|                 merged_range_count += 1
 | |
|             last = end
 | |
|         format2_bytes = (
 | |
|             # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-2
 | |
|             # uint16	coverageFormat	Format identifier — format = 2
 | |
|             # uint16	rangeCount	Number of RangeRecords
 | |
|             4
 | |
|             # RangeRecord	rangeRecords[rangeCount]	Array of glyph ranges — ordered by startGlyphID.
 | |
|             # uint16	startGlyphID	First glyph ID in the range
 | |
|             # uint16	endGlyphID	Last glyph ID in the range
 | |
|             # uint16	startCoverageIndex	Coverage Index of first glyph ID in range
 | |
|             + merged_range_count * 6
 | |
|         )
 | |
|         return min(format1_bytes, format2_bytes)
 | |
| 
 | |
|     @property
 | |
|     def classDef1_bytes(self):
 | |
|         # We can skip encoding one of the Class1 definitions, and use
 | |
|         # Class1=0 to represent it instead, because Class1 is gated by the
 | |
|         # Coverage definition. Use Class1=0 for the highest byte savings.
 | |
|         # Going through all options takes too long, pick the biggest class
 | |
|         # = what happens in otlLib.builder.ClassDefBuilder.classes()
 | |
|         biggest_index = max(self.indices, key=lambda i: len(self.ctx.all_class1[i]))
 | |
|         return _classDef_bytes(
 | |
|             self.ctx.all_class1_data, [i for i in self.indices if i != biggest_index]
 | |
|         )
 | |
| 
 | |
|     @property
 | |
|     def classDef2_bytes(self):
 | |
|         # All Class2 need to be encoded because we can't use Class2=0
 | |
|         return _classDef_bytes(self.ctx.all_class2_data, self.column_indices)
 | |
| 
 | |
| 
 | |
| def cluster_pairs_by_class2_coverage_custom_cost(
 | |
|     font: TTFont,
 | |
|     pairs: Pairs,
 | |
|     compression: int = 5,
 | |
| ) -> List[Pairs]:
 | |
|     if not pairs:
 | |
|         # The subtable was actually empty?
 | |
|         return [pairs]
 | |
| 
 | |
|     # Sorted for reproducibility/determinism
 | |
|     all_class1 = sorted(set(pair[0] for pair in pairs))
 | |
|     all_class2 = sorted(set(pair[1] for pair in pairs))
 | |
| 
 | |
|     # Use Python's big ints for binary vectors representing each line
 | |
|     lines = [
 | |
|         sum(
 | |
|             1 << i if (class1, class2) in pairs else 0
 | |
|             for i, class2 in enumerate(all_class2)
 | |
|         )
 | |
|         for class1 in all_class1
 | |
|     ]
 | |
| 
 | |
|     # Map glyph names to ids and work with ints throughout for ClassDef formats
 | |
|     name_to_id = font.getReverseGlyphMap()
 | |
|     # Each entry in the arrays below is (range_count, min_glyph_id, max_glyph_id)
 | |
|     all_class1_data = [
 | |
|         _getClassRanges(name_to_id[name] for name in cls) for cls in all_class1
 | |
|     ]
 | |
|     all_class2_data = [
 | |
|         _getClassRanges(name_to_id[name] for name in cls) for cls in all_class2
 | |
|     ]
 | |
| 
 | |
|     format1 = 0
 | |
|     format2 = 0
 | |
|     for pair, value in pairs.items():
 | |
|         format1 |= value[0].getEffectiveFormat() if value[0] else 0
 | |
|         format2 |= value[1].getEffectiveFormat() if value[1] else 0
 | |
|     valueFormat1_bytes = bit_count(format1) * 2
 | |
|     valueFormat2_bytes = bit_count(format2) * 2
 | |
| 
 | |
|     ctx = ClusteringContext(
 | |
|         lines,
 | |
|         all_class1,
 | |
|         all_class1_data,
 | |
|         all_class2_data,
 | |
|         valueFormat1_bytes,
 | |
|         valueFormat2_bytes,
 | |
|     )
 | |
| 
 | |
|     cluster_cache: Dict[int, Cluster] = {}
 | |
| 
 | |
|     def make_cluster(indices: int) -> Cluster:
 | |
|         cluster = cluster_cache.get(indices, None)
 | |
|         if cluster is not None:
 | |
|             return cluster
 | |
|         cluster = Cluster(ctx, indices)
 | |
|         cluster_cache[indices] = cluster
 | |
|         return cluster
 | |
| 
 | |
|     def merge(cluster: Cluster, other: Cluster) -> Cluster:
 | |
|         return make_cluster(cluster.indices_bitmask | other.indices_bitmask)
 | |
| 
 | |
|     # Agglomerative clustering by hand, checking the cost gain of the new
 | |
|     # cluster against the previously separate clusters
 | |
|     # Start with 1 cluster per line
 | |
|     # cluster = set of lines = new subtable
 | |
|     clusters = [make_cluster(1 << i) for i in range(len(lines))]
 | |
| 
 | |
|     # Cost of 1 cluster with everything
 | |
|     # `(1 << len) - 1` gives a bitmask full of 1's of length `len`
 | |
|     cost_before_splitting = make_cluster((1 << len(lines)) - 1).cost
 | |
|     log.debug(f"        len(clusters) = {len(clusters)}")
 | |
| 
 | |
|     while len(clusters) > 1:
 | |
|         lowest_cost_change = None
 | |
|         best_cluster_index = None
 | |
|         best_other_index = None
 | |
|         best_merged = None
 | |
|         for i, cluster in enumerate(clusters):
 | |
|             for j, other in enumerate(clusters[i + 1 :]):
 | |
|                 merged = merge(cluster, other)
 | |
|                 cost_change = merged.cost - cluster.cost - other.cost
 | |
|                 if lowest_cost_change is None or cost_change < lowest_cost_change:
 | |
|                     lowest_cost_change = cost_change
 | |
|                     best_cluster_index = i
 | |
|                     best_other_index = i + 1 + j
 | |
|                     best_merged = merged
 | |
|         assert lowest_cost_change is not None
 | |
|         assert best_cluster_index is not None
 | |
|         assert best_other_index is not None
 | |
|         assert best_merged is not None
 | |
| 
 | |
|         # If the best merge we found is still taking down the file size, then
 | |
|         # there's no question: we must do it, because it's beneficial in both
 | |
|         # ways (lower file size and lower number of subtables).  However, if the
 | |
|         # best merge we found is not reducing file size anymore, then we need to
 | |
|         # look at the other stop criteria = the compression factor.
 | |
|         if lowest_cost_change > 0:
 | |
|             # Stop critera: check whether we should keep merging.
 | |
|             # Compute size reduction brought by splitting
 | |
|             cost_after_splitting = sum(c.cost for c in clusters)
 | |
|             # size_reduction so that after = before * (1 - size_reduction)
 | |
|             # E.g. before = 1000, after = 800, 1 - 800/1000 = 0.2
 | |
|             size_reduction = 1 - cost_after_splitting / cost_before_splitting
 | |
| 
 | |
|             # Force more merging by taking into account the compression number.
 | |
|             # Target behaviour: compression number = 1 to 9, default 5 like gzip
 | |
|             #   - 1 = accept to add 1 subtable to reduce size by 50%
 | |
|             #   - 5 = accept to add 5 subtables to reduce size by 50%
 | |
|             # See https://github.com/harfbuzz/packtab/blob/master/Lib/packTab/__init__.py#L690-L691
 | |
|             # Given the size reduction we have achieved so far, compute how many
 | |
|             # new subtables are acceptable.
 | |
|             max_new_subtables = -log2(1 - size_reduction) * compression
 | |
|             log.debug(
 | |
|                 f"            len(clusters) = {len(clusters):3d}    size_reduction={size_reduction:5.2f}    max_new_subtables={max_new_subtables}",
 | |
|             )
 | |
|             if compression == 9:
 | |
|                 # Override level 9 to mean: create any number of subtables
 | |
|                 max_new_subtables = len(clusters)
 | |
| 
 | |
|             # If we have managed to take the number of new subtables below the
 | |
|             # threshold, then we can stop.
 | |
|             if len(clusters) <= max_new_subtables + 1:
 | |
|                 break
 | |
| 
 | |
|         # No reason to stop yet, do the merge and move on to the next.
 | |
|         del clusters[best_other_index]
 | |
|         clusters[best_cluster_index] = best_merged
 | |
| 
 | |
|     # All clusters are final; turn bitmasks back into the "Pairs" format
 | |
|     pairs_by_class1: Dict[Tuple[str, ...], Pairs] = defaultdict(dict)
 | |
|     for pair, values in pairs.items():
 | |
|         pairs_by_class1[pair[0]][pair] = values
 | |
|     pairs_groups: List[Pairs] = []
 | |
|     for cluster in clusters:
 | |
|         pairs_group: Pairs = dict()
 | |
|         for i in cluster.indices:
 | |
|             class1 = all_class1[i]
 | |
|             pairs_group.update(pairs_by_class1[class1])
 | |
|         pairs_groups.append(pairs_group)
 | |
|     return pairs_groups
 |