You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			533 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			533 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
"""
 | 
						|
A python interface to Adobe Font Metrics Files.
 | 
						|
 | 
						|
Although a number of other Python implementations exist, and may be more
 | 
						|
complete than this, it was decided not to go with them because they were
 | 
						|
either:
 | 
						|
 | 
						|
1) copyrighted or used a non-BSD compatible license
 | 
						|
2) had too many dependencies and a free standing lib was needed
 | 
						|
3) did more than needed and it was easier to write afresh rather than
 | 
						|
   figure out how to get just what was needed.
 | 
						|
 | 
						|
It is pretty easy to use, and has no external dependencies:
 | 
						|
 | 
						|
>>> import matplotlib as mpl
 | 
						|
>>> from pathlib import Path
 | 
						|
>>> afm_path = Path(mpl.get_data_path(), 'fonts', 'afm', 'ptmr8a.afm')
 | 
						|
>>>
 | 
						|
>>> from matplotlib.afm import AFM
 | 
						|
>>> with afm_path.open('rb') as fh:
 | 
						|
...     afm = AFM(fh)
 | 
						|
>>> afm.string_width_height('What the heck?')
 | 
						|
(6220.0, 694)
 | 
						|
>>> afm.get_fontname()
 | 
						|
'Times-Roman'
 | 
						|
>>> afm.get_kern_dist('A', 'f')
 | 
						|
0
 | 
						|
>>> afm.get_kern_dist('A', 'y')
 | 
						|
-92.0
 | 
						|
>>> afm.get_bbox_char('!')
 | 
						|
[130, -9, 238, 676]
 | 
						|
 | 
						|
As in the Adobe Font Metrics File Format Specification, all dimensions
 | 
						|
are given in units of 1/1000 of the scale factor (point size) of the font
 | 
						|
being used.
 | 
						|
"""
 | 
						|
 | 
						|
from collections import namedtuple
 | 
						|
import logging
 | 
						|
import re
 | 
						|
 | 
						|
from ._mathtext_data import uni2type1
 | 
						|
 | 
						|
 | 
						|
_log = logging.getLogger(__name__)
 | 
						|
 | 
						|
 | 
						|
def _to_int(x):
 | 
						|
    # Some AFM files have floats where we are expecting ints -- there is
 | 
						|
    # probably a better way to handle this (support floats, round rather than
 | 
						|
    # truncate).  But I don't know what the best approach is now and this
 | 
						|
    # change to _to_int should at least prevent Matplotlib from crashing on
 | 
						|
    # these.  JDH (2009-11-06)
 | 
						|
    return int(float(x))
 | 
						|
 | 
						|
 | 
						|
def _to_float(x):
 | 
						|
    # Some AFM files use "," instead of "." as decimal separator -- this
 | 
						|
    # shouldn't be ambiguous (unless someone is wicked enough to use "," as
 | 
						|
    # thousands separator...).
 | 
						|
    if isinstance(x, bytes):
 | 
						|
        # Encoding doesn't really matter -- if we have codepoints >127 the call
 | 
						|
        # to float() will error anyways.
 | 
						|
        x = x.decode('latin-1')
 | 
						|
    return float(x.replace(',', '.'))
 | 
						|
 | 
						|
 | 
						|
def _to_str(x):
 | 
						|
    return x.decode('utf8')
 | 
						|
 | 
						|
 | 
						|
def _to_list_of_ints(s):
 | 
						|
    s = s.replace(b',', b' ')
 | 
						|
    return [_to_int(val) for val in s.split()]
 | 
						|
 | 
						|
 | 
						|
def _to_list_of_floats(s):
 | 
						|
    return [_to_float(val) for val in s.split()]
 | 
						|
 | 
						|
 | 
						|
def _to_bool(s):
 | 
						|
    if s.lower().strip() in (b'false', b'0', b'no'):
 | 
						|
        return False
 | 
						|
    else:
 | 
						|
        return True
 | 
						|
 | 
						|
 | 
						|
def _parse_header(fh):
 | 
						|
    """
 | 
						|
    Read the font metrics header (up to the char metrics) and returns
 | 
						|
    a dictionary mapping *key* to *val*.  *val* will be converted to the
 | 
						|
    appropriate python type as necessary; e.g.:
 | 
						|
 | 
						|
        * 'False'->False
 | 
						|
        * '0'->0
 | 
						|
        * '-168 -218 1000 898'-> [-168, -218, 1000, 898]
 | 
						|
 | 
						|
    Dictionary keys are
 | 
						|
 | 
						|
      StartFontMetrics, FontName, FullName, FamilyName, Weight,
 | 
						|
      ItalicAngle, IsFixedPitch, FontBBox, UnderlinePosition,
 | 
						|
      UnderlineThickness, Version, Notice, EncodingScheme, CapHeight,
 | 
						|
      XHeight, Ascender, Descender, StartCharMetrics
 | 
						|
    """
 | 
						|
    header_converters = {
 | 
						|
        b'StartFontMetrics': _to_float,
 | 
						|
        b'FontName': _to_str,
 | 
						|
        b'FullName': _to_str,
 | 
						|
        b'FamilyName': _to_str,
 | 
						|
        b'Weight': _to_str,
 | 
						|
        b'ItalicAngle': _to_float,
 | 
						|
        b'IsFixedPitch': _to_bool,
 | 
						|
        b'FontBBox': _to_list_of_ints,
 | 
						|
        b'UnderlinePosition': _to_float,
 | 
						|
        b'UnderlineThickness': _to_float,
 | 
						|
        b'Version': _to_str,
 | 
						|
        # Some AFM files have non-ASCII characters (which are not allowed by
 | 
						|
        # the spec).  Given that there is actually no public API to even access
 | 
						|
        # this field, just return it as straight bytes.
 | 
						|
        b'Notice': lambda x: x,
 | 
						|
        b'EncodingScheme': _to_str,
 | 
						|
        b'CapHeight': _to_float,  # Is the second version a mistake, or
 | 
						|
        b'Capheight': _to_float,  # do some AFM files contain 'Capheight'? -JKS
 | 
						|
        b'XHeight': _to_float,
 | 
						|
        b'Ascender': _to_float,
 | 
						|
        b'Descender': _to_float,
 | 
						|
        b'StdHW': _to_float,
 | 
						|
        b'StdVW': _to_float,
 | 
						|
        b'StartCharMetrics': _to_int,
 | 
						|
        b'CharacterSet': _to_str,
 | 
						|
        b'Characters': _to_int,
 | 
						|
    }
 | 
						|
    d = {}
 | 
						|
    first_line = True
 | 
						|
    for line in fh:
 | 
						|
        line = line.rstrip()
 | 
						|
        if line.startswith(b'Comment'):
 | 
						|
            continue
 | 
						|
        lst = line.split(b' ', 1)
 | 
						|
        key = lst[0]
 | 
						|
        if first_line:
 | 
						|
            # AFM spec, Section 4: The StartFontMetrics keyword
 | 
						|
            # [followed by a version number] must be the first line in
 | 
						|
            # the file, and the EndFontMetrics keyword must be the
 | 
						|
            # last non-empty line in the file.  We just check the
 | 
						|
            # first header entry.
 | 
						|
            if key != b'StartFontMetrics':
 | 
						|
                raise RuntimeError('Not an AFM file')
 | 
						|
            first_line = False
 | 
						|
        if len(lst) == 2:
 | 
						|
            val = lst[1]
 | 
						|
        else:
 | 
						|
            val = b''
 | 
						|
        try:
 | 
						|
            converter = header_converters[key]
 | 
						|
        except KeyError:
 | 
						|
            _log.error("Found an unknown keyword in AFM header (was %r)", key)
 | 
						|
            continue
 | 
						|
        try:
 | 
						|
            d[key] = converter(val)
 | 
						|
        except ValueError:
 | 
						|
            _log.error('Value error parsing header in AFM: %s, %s', key, val)
 | 
						|
            continue
 | 
						|
        if key == b'StartCharMetrics':
 | 
						|
            break
 | 
						|
    else:
 | 
						|
        raise RuntimeError('Bad parse')
 | 
						|
    return d
 | 
						|
 | 
						|
 | 
						|
CharMetrics = namedtuple('CharMetrics', 'width, name, bbox')
 | 
						|
CharMetrics.__doc__ = """
 | 
						|
    Represents the character metrics of a single character.
 | 
						|
 | 
						|
    Notes
 | 
						|
    -----
 | 
						|
    The fields do currently only describe a subset of character metrics
 | 
						|
    information defined in the AFM standard.
 | 
						|
    """
 | 
						|
CharMetrics.width.__doc__ = """The character width (WX)."""
 | 
						|
CharMetrics.name.__doc__ = """The character name (N)."""
 | 
						|
CharMetrics.bbox.__doc__ = """
 | 
						|
    The bbox of the character (B) as a tuple (*llx*, *lly*, *urx*, *ury*)."""
 | 
						|
 | 
						|
 | 
						|
def _parse_char_metrics(fh):
 | 
						|
    """
 | 
						|
    Parse the given filehandle for character metrics information and return
 | 
						|
    the information as dicts.
 | 
						|
 | 
						|
    It is assumed that the file cursor is on the line behind
 | 
						|
    'StartCharMetrics'.
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    ascii_d : dict
 | 
						|
         A mapping "ASCII num of the character" to `.CharMetrics`.
 | 
						|
    name_d : dict
 | 
						|
         A mapping "character name" to `.CharMetrics`.
 | 
						|
 | 
						|
    Notes
 | 
						|
    -----
 | 
						|
    This function is incomplete per the standard, but thus far parses
 | 
						|
    all the sample afm files tried.
 | 
						|
    """
 | 
						|
    required_keys = {'C', 'WX', 'N', 'B'}
 | 
						|
 | 
						|
    ascii_d = {}
 | 
						|
    name_d = {}
 | 
						|
    for line in fh:
 | 
						|
        # We are defensively letting values be utf8. The spec requires
 | 
						|
        # ascii, but there are non-compliant fonts in circulation
 | 
						|
        line = _to_str(line.rstrip())  # Convert from byte-literal
 | 
						|
        if line.startswith('EndCharMetrics'):
 | 
						|
            return ascii_d, name_d
 | 
						|
        # Split the metric line into a dictionary, keyed by metric identifiers
 | 
						|
        vals = dict(s.strip().split(' ', 1) for s in line.split(';') if s)
 | 
						|
        # There may be other metrics present, but only these are needed
 | 
						|
        if not required_keys.issubset(vals):
 | 
						|
            raise RuntimeError('Bad char metrics line: %s' % line)
 | 
						|
        num = _to_int(vals['C'])
 | 
						|
        wx = _to_float(vals['WX'])
 | 
						|
        name = vals['N']
 | 
						|
        bbox = _to_list_of_floats(vals['B'])
 | 
						|
        bbox = list(map(int, bbox))
 | 
						|
        metrics = CharMetrics(wx, name, bbox)
 | 
						|
        # Workaround: If the character name is 'Euro', give it the
 | 
						|
        # corresponding character code, according to WinAnsiEncoding (see PDF
 | 
						|
        # Reference).
 | 
						|
        if name == 'Euro':
 | 
						|
            num = 128
 | 
						|
        elif name == 'minus':
 | 
						|
            num = ord("\N{MINUS SIGN}")  # 0x2212
 | 
						|
        if num != -1:
 | 
						|
            ascii_d[num] = metrics
 | 
						|
        name_d[name] = metrics
 | 
						|
    raise RuntimeError('Bad parse')
 | 
						|
 | 
						|
 | 
						|
def _parse_kern_pairs(fh):
 | 
						|
    """
 | 
						|
    Return a kern pairs dictionary; keys are (*char1*, *char2*) tuples and
 | 
						|
    values are the kern pair value.  For example, a kern pairs line like
 | 
						|
    ``KPX A y -50``
 | 
						|
 | 
						|
    will be represented as::
 | 
						|
 | 
						|
      d[ ('A', 'y') ] = -50
 | 
						|
 | 
						|
    """
 | 
						|
 | 
						|
    line = next(fh)
 | 
						|
    if not line.startswith(b'StartKernPairs'):
 | 
						|
        raise RuntimeError('Bad start of kern pairs data: %s' % line)
 | 
						|
 | 
						|
    d = {}
 | 
						|
    for line in fh:
 | 
						|
        line = line.rstrip()
 | 
						|
        if not line:
 | 
						|
            continue
 | 
						|
        if line.startswith(b'EndKernPairs'):
 | 
						|
            next(fh)  # EndKernData
 | 
						|
            return d
 | 
						|
        vals = line.split()
 | 
						|
        if len(vals) != 4 or vals[0] != b'KPX':
 | 
						|
            raise RuntimeError('Bad kern pairs line: %s' % line)
 | 
						|
        c1, c2, val = _to_str(vals[1]), _to_str(vals[2]), _to_float(vals[3])
 | 
						|
        d[(c1, c2)] = val
 | 
						|
    raise RuntimeError('Bad kern pairs parse')
 | 
						|
 | 
						|
 | 
						|
CompositePart = namedtuple('CompositePart', 'name, dx, dy')
 | 
						|
CompositePart.__doc__ = """
 | 
						|
    Represents the information on a composite element of a composite char."""
 | 
						|
CompositePart.name.__doc__ = """Name of the part, e.g. 'acute'."""
 | 
						|
CompositePart.dx.__doc__ = """x-displacement of the part from the origin."""
 | 
						|
CompositePart.dy.__doc__ = """y-displacement of the part from the origin."""
 | 
						|
 | 
						|
 | 
						|
def _parse_composites(fh):
 | 
						|
    """
 | 
						|
    Parse the given filehandle for composites information return them as a
 | 
						|
    dict.
 | 
						|
 | 
						|
    It is assumed that the file cursor is on the line behind 'StartComposites'.
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    dict
 | 
						|
        A dict mapping composite character names to a parts list. The parts
 | 
						|
        list is a list of `.CompositePart` entries describing the parts of
 | 
						|
        the composite.
 | 
						|
 | 
						|
    Examples
 | 
						|
    --------
 | 
						|
    A composite definition line::
 | 
						|
 | 
						|
      CC Aacute 2 ; PCC A 0 0 ; PCC acute 160 170 ;
 | 
						|
 | 
						|
    will be represented as::
 | 
						|
 | 
						|
      composites['Aacute'] = [CompositePart(name='A', dx=0, dy=0),
 | 
						|
                              CompositePart(name='acute', dx=160, dy=170)]
 | 
						|
 | 
						|
    """
 | 
						|
    composites = {}
 | 
						|
    for line in fh:
 | 
						|
        line = line.rstrip()
 | 
						|
        if not line:
 | 
						|
            continue
 | 
						|
        if line.startswith(b'EndComposites'):
 | 
						|
            return composites
 | 
						|
        vals = line.split(b';')
 | 
						|
        cc = vals[0].split()
 | 
						|
        name, _num_parts = cc[1], _to_int(cc[2])
 | 
						|
        pccParts = []
 | 
						|
        for s in vals[1:-1]:
 | 
						|
            pcc = s.split()
 | 
						|
            part = CompositePart(pcc[1], _to_float(pcc[2]), _to_float(pcc[3]))
 | 
						|
            pccParts.append(part)
 | 
						|
        composites[name] = pccParts
 | 
						|
 | 
						|
    raise RuntimeError('Bad composites parse')
 | 
						|
 | 
						|
 | 
						|
def _parse_optional(fh):
 | 
						|
    """
 | 
						|
    Parse the optional fields for kern pair data and composites.
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    kern_data : dict
 | 
						|
        A dict containing kerning information. May be empty.
 | 
						|
        See `._parse_kern_pairs`.
 | 
						|
    composites : dict
 | 
						|
        A dict containing composite information. May be empty.
 | 
						|
        See `._parse_composites`.
 | 
						|
    """
 | 
						|
    optional = {
 | 
						|
        b'StartKernData': _parse_kern_pairs,
 | 
						|
        b'StartComposites':  _parse_composites,
 | 
						|
        }
 | 
						|
 | 
						|
    d = {b'StartKernData': {},
 | 
						|
         b'StartComposites': {}}
 | 
						|
    for line in fh:
 | 
						|
        line = line.rstrip()
 | 
						|
        if not line:
 | 
						|
            continue
 | 
						|
        key = line.split()[0]
 | 
						|
 | 
						|
        if key in optional:
 | 
						|
            d[key] = optional[key](fh)
 | 
						|
 | 
						|
    return d[b'StartKernData'], d[b'StartComposites']
 | 
						|
 | 
						|
 | 
						|
class AFM:
 | 
						|
 | 
						|
    def __init__(self, fh):
 | 
						|
        """Parse the AFM file in file object *fh*."""
 | 
						|
        self._header = _parse_header(fh)
 | 
						|
        self._metrics, self._metrics_by_name = _parse_char_metrics(fh)
 | 
						|
        self._kern, self._composite = _parse_optional(fh)
 | 
						|
 | 
						|
    def get_bbox_char(self, c, isord=False):
 | 
						|
        if not isord:
 | 
						|
            c = ord(c)
 | 
						|
        return self._metrics[c].bbox
 | 
						|
 | 
						|
    def string_width_height(self, s):
 | 
						|
        """
 | 
						|
        Return the string width (including kerning) and string height
 | 
						|
        as a (*w*, *h*) tuple.
 | 
						|
        """
 | 
						|
        if not len(s):
 | 
						|
            return 0, 0
 | 
						|
        total_width = 0
 | 
						|
        namelast = None
 | 
						|
        miny = 1e9
 | 
						|
        maxy = 0
 | 
						|
        for c in s:
 | 
						|
            if c == '\n':
 | 
						|
                continue
 | 
						|
            wx, name, bbox = self._metrics[ord(c)]
 | 
						|
 | 
						|
            total_width += wx + self._kern.get((namelast, name), 0)
 | 
						|
            l, b, w, h = bbox
 | 
						|
            miny = min(miny, b)
 | 
						|
            maxy = max(maxy, b + h)
 | 
						|
 | 
						|
            namelast = name
 | 
						|
 | 
						|
        return total_width, maxy - miny
 | 
						|
 | 
						|
    def get_str_bbox_and_descent(self, s):
 | 
						|
        """Return the string bounding box and the maximal descent."""
 | 
						|
        if not len(s):
 | 
						|
            return 0, 0, 0, 0, 0
 | 
						|
        total_width = 0
 | 
						|
        namelast = None
 | 
						|
        miny = 1e9
 | 
						|
        maxy = 0
 | 
						|
        left = 0
 | 
						|
        if not isinstance(s, str):
 | 
						|
            s = _to_str(s)
 | 
						|
        for c in s:
 | 
						|
            if c == '\n':
 | 
						|
                continue
 | 
						|
            name = uni2type1.get(ord(c), f"uni{ord(c):04X}")
 | 
						|
            try:
 | 
						|
                wx, _, bbox = self._metrics_by_name[name]
 | 
						|
            except KeyError:
 | 
						|
                name = 'question'
 | 
						|
                wx, _, bbox = self._metrics_by_name[name]
 | 
						|
            total_width += wx + self._kern.get((namelast, name), 0)
 | 
						|
            l, b, w, h = bbox
 | 
						|
            left = min(left, l)
 | 
						|
            miny = min(miny, b)
 | 
						|
            maxy = max(maxy, b + h)
 | 
						|
 | 
						|
            namelast = name
 | 
						|
 | 
						|
        return left, miny, total_width, maxy - miny, -miny
 | 
						|
 | 
						|
    def get_str_bbox(self, s):
 | 
						|
        """Return the string bounding box."""
 | 
						|
        return self.get_str_bbox_and_descent(s)[:4]
 | 
						|
 | 
						|
    def get_name_char(self, c, isord=False):
 | 
						|
        """Get the name of the character, i.e., ';' is 'semicolon'."""
 | 
						|
        if not isord:
 | 
						|
            c = ord(c)
 | 
						|
        return self._metrics[c].name
 | 
						|
 | 
						|
    def get_width_char(self, c, isord=False):
 | 
						|
        """
 | 
						|
        Get the width of the character from the character metric WX field.
 | 
						|
        """
 | 
						|
        if not isord:
 | 
						|
            c = ord(c)
 | 
						|
        return self._metrics[c].width
 | 
						|
 | 
						|
    def get_width_from_char_name(self, name):
 | 
						|
        """Get the width of the character from a type1 character name."""
 | 
						|
        return self._metrics_by_name[name].width
 | 
						|
 | 
						|
    def get_height_char(self, c, isord=False):
 | 
						|
        """Get the bounding box (ink) height of character *c* (space is 0)."""
 | 
						|
        if not isord:
 | 
						|
            c = ord(c)
 | 
						|
        return self._metrics[c].bbox[-1]
 | 
						|
 | 
						|
    def get_kern_dist(self, c1, c2):
 | 
						|
        """
 | 
						|
        Return the kerning pair distance (possibly 0) for chars *c1* and *c2*.
 | 
						|
        """
 | 
						|
        name1, name2 = self.get_name_char(c1), self.get_name_char(c2)
 | 
						|
        return self.get_kern_dist_from_name(name1, name2)
 | 
						|
 | 
						|
    def get_kern_dist_from_name(self, name1, name2):
 | 
						|
        """
 | 
						|
        Return the kerning pair distance (possibly 0) for chars
 | 
						|
        *name1* and *name2*.
 | 
						|
        """
 | 
						|
        return self._kern.get((name1, name2), 0)
 | 
						|
 | 
						|
    def get_fontname(self):
 | 
						|
        """Return the font name, e.g., 'Times-Roman'."""
 | 
						|
        return self._header[b'FontName']
 | 
						|
 | 
						|
    @property
 | 
						|
    def postscript_name(self):  # For consistency with FT2Font.
 | 
						|
        return self.get_fontname()
 | 
						|
 | 
						|
    def get_fullname(self):
 | 
						|
        """Return the font full name, e.g., 'Times-Roman'."""
 | 
						|
        name = self._header.get(b'FullName')
 | 
						|
        if name is None:  # use FontName as a substitute
 | 
						|
            name = self._header[b'FontName']
 | 
						|
        return name
 | 
						|
 | 
						|
    def get_familyname(self):
 | 
						|
        """Return the font family name, e.g., 'Times'."""
 | 
						|
        name = self._header.get(b'FamilyName')
 | 
						|
        if name is not None:
 | 
						|
            return name
 | 
						|
 | 
						|
        # FamilyName not specified so we'll make a guess
 | 
						|
        name = self.get_fullname()
 | 
						|
        extras = (r'(?i)([ -](regular|plain|italic|oblique|bold|semibold|'
 | 
						|
                  r'light|ultralight|extra|condensed))+$')
 | 
						|
        return re.sub(extras, '', name)
 | 
						|
 | 
						|
    @property
 | 
						|
    def family_name(self):
 | 
						|
        """The font family name, e.g., 'Times'."""
 | 
						|
        return self.get_familyname()
 | 
						|
 | 
						|
    def get_weight(self):
 | 
						|
        """Return the font weight, e.g., 'Bold' or 'Roman'."""
 | 
						|
        return self._header[b'Weight']
 | 
						|
 | 
						|
    def get_angle(self):
 | 
						|
        """Return the fontangle as float."""
 | 
						|
        return self._header[b'ItalicAngle']
 | 
						|
 | 
						|
    def get_capheight(self):
 | 
						|
        """Return the cap height as float."""
 | 
						|
        return self._header[b'CapHeight']
 | 
						|
 | 
						|
    def get_xheight(self):
 | 
						|
        """Return the xheight as float."""
 | 
						|
        return self._header[b'XHeight']
 | 
						|
 | 
						|
    def get_underline_thickness(self):
 | 
						|
        """Return the underline thickness as float."""
 | 
						|
        return self._header[b'UnderlineThickness']
 | 
						|
 | 
						|
    def get_horizontal_stem_width(self):
 | 
						|
        """
 | 
						|
        Return the standard horizontal stem width as float, or *None* if
 | 
						|
        not specified in AFM file.
 | 
						|
        """
 | 
						|
        return self._header.get(b'StdHW', None)
 | 
						|
 | 
						|
    def get_vertical_stem_width(self):
 | 
						|
        """
 | 
						|
        Return the standard vertical stem width as float, or *None* if
 | 
						|
        not specified in AFM file.
 | 
						|
        """
 | 
						|
        return self._header.get(b'StdVW', None)
 |