You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			136 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			136 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Python
		
	
"""Extend the Python codecs module with a few encodings that are used in OpenType (name table)
 | 
						|
but missing from Python.  See https://github.com/fonttools/fonttools/issues/236 for details."""
 | 
						|
 | 
						|
import codecs
 | 
						|
import encodings
 | 
						|
 | 
						|
 | 
						|
class ExtendCodec(codecs.Codec):
 | 
						|
    def __init__(self, name, base_encoding, mapping):
 | 
						|
        self.name = name
 | 
						|
        self.base_encoding = base_encoding
 | 
						|
        self.mapping = mapping
 | 
						|
        self.reverse = {v: k for k, v in mapping.items()}
 | 
						|
        self.max_len = max(len(v) for v in mapping.values())
 | 
						|
        self.info = codecs.CodecInfo(
 | 
						|
            name=self.name, encode=self.encode, decode=self.decode
 | 
						|
        )
 | 
						|
        codecs.register_error(name, self.error)
 | 
						|
 | 
						|
    def _map(self, mapper, output_type, exc_type, input, errors):
 | 
						|
        base_error_handler = codecs.lookup_error(errors)
 | 
						|
        length = len(input)
 | 
						|
        out = output_type()
 | 
						|
        while input:
 | 
						|
            # first try to use self.error as the error handler
 | 
						|
            try:
 | 
						|
                part = mapper(input, self.base_encoding, errors=self.name)
 | 
						|
                out += part
 | 
						|
                break  # All converted
 | 
						|
            except exc_type as e:
 | 
						|
                # else convert the correct part, handle error as requested and continue
 | 
						|
                out += mapper(input[: e.start], self.base_encoding, self.name)
 | 
						|
                replacement, pos = base_error_handler(e)
 | 
						|
                out += replacement
 | 
						|
                input = input[pos:]
 | 
						|
        return out, length
 | 
						|
 | 
						|
    def encode(self, input, errors="strict"):
 | 
						|
        return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors)
 | 
						|
 | 
						|
    def decode(self, input, errors="strict"):
 | 
						|
        return self._map(codecs.decode, str, UnicodeDecodeError, input, errors)
 | 
						|
 | 
						|
    def error(self, e):
 | 
						|
        if isinstance(e, UnicodeDecodeError):
 | 
						|
            for end in range(e.start + 1, e.end + 1):
 | 
						|
                s = e.object[e.start : end]
 | 
						|
                if s in self.mapping:
 | 
						|
                    return self.mapping[s], end
 | 
						|
        elif isinstance(e, UnicodeEncodeError):
 | 
						|
            for end in range(e.start + 1, e.start + self.max_len + 1):
 | 
						|
                s = e.object[e.start : end]
 | 
						|
                if s in self.reverse:
 | 
						|
                    return self.reverse[s], end
 | 
						|
        e.encoding = self.name
 | 
						|
        raise e
 | 
						|
 | 
						|
 | 
						|
_extended_encodings = {
 | 
						|
    "x_mac_japanese_ttx": (
 | 
						|
        "shift_jis",
 | 
						|
        {
 | 
						|
            b"\xFC": chr(0x007C),
 | 
						|
            b"\x7E": chr(0x007E),
 | 
						|
            b"\x80": chr(0x005C),
 | 
						|
            b"\xA0": chr(0x00A0),
 | 
						|
            b"\xFD": chr(0x00A9),
 | 
						|
            b"\xFE": chr(0x2122),
 | 
						|
            b"\xFF": chr(0x2026),
 | 
						|
        },
 | 
						|
    ),
 | 
						|
    "x_mac_trad_chinese_ttx": (
 | 
						|
        "big5",
 | 
						|
        {
 | 
						|
            b"\x80": chr(0x005C),
 | 
						|
            b"\xA0": chr(0x00A0),
 | 
						|
            b"\xFD": chr(0x00A9),
 | 
						|
            b"\xFE": chr(0x2122),
 | 
						|
            b"\xFF": chr(0x2026),
 | 
						|
        },
 | 
						|
    ),
 | 
						|
    "x_mac_korean_ttx": (
 | 
						|
        "euc_kr",
 | 
						|
        {
 | 
						|
            b"\x80": chr(0x00A0),
 | 
						|
            b"\x81": chr(0x20A9),
 | 
						|
            b"\x82": chr(0x2014),
 | 
						|
            b"\x83": chr(0x00A9),
 | 
						|
            b"\xFE": chr(0x2122),
 | 
						|
            b"\xFF": chr(0x2026),
 | 
						|
        },
 | 
						|
    ),
 | 
						|
    "x_mac_simp_chinese_ttx": (
 | 
						|
        "gb2312",
 | 
						|
        {
 | 
						|
            b"\x80": chr(0x00FC),
 | 
						|
            b"\xA0": chr(0x00A0),
 | 
						|
            b"\xFD": chr(0x00A9),
 | 
						|
            b"\xFE": chr(0x2122),
 | 
						|
            b"\xFF": chr(0x2026),
 | 
						|
        },
 | 
						|
    ),
 | 
						|
}
 | 
						|
 | 
						|
_cache = {}
 | 
						|
 | 
						|
 | 
						|
def search_function(name):
 | 
						|
    name = encodings.normalize_encoding(name)  # Rather undocumented...
 | 
						|
    if name in _extended_encodings:
 | 
						|
        if name not in _cache:
 | 
						|
            base_encoding, mapping = _extended_encodings[name]
 | 
						|
            assert name[-4:] == "_ttx"
 | 
						|
            # Python 2 didn't have any of the encodings that we are implementing
 | 
						|
            # in this file.  Python 3 added aliases for the East Asian ones, mapping
 | 
						|
            # them "temporarily" to the same base encoding as us, with a comment
 | 
						|
            # suggesting that full implementation will appear some time later.
 | 
						|
            # As such, try the Python version of the x_mac_... first, if that is found,
 | 
						|
            # use *that* as our base encoding.  This would make our encoding upgrade
 | 
						|
            # to the full encoding when and if Python finally implements that.
 | 
						|
            # http://bugs.python.org/issue24041
 | 
						|
            base_encodings = [name[:-4], base_encoding]
 | 
						|
            for base_encoding in base_encodings:
 | 
						|
                try:
 | 
						|
                    codecs.lookup(base_encoding)
 | 
						|
                except LookupError:
 | 
						|
                    continue
 | 
						|
                _cache[name] = ExtendCodec(name, base_encoding, mapping)
 | 
						|
                break
 | 
						|
        return _cache[name].info
 | 
						|
 | 
						|
    return None
 | 
						|
 | 
						|
 | 
						|
codecs.register(search_function)
 |