lib.glyphs

View Source

  1import re
  2import unicodedata
  3from typing import Literal
  4from loguru import logger
  5from pyuca import Collator
  6from icecream import ic
  7
  8from lib import helpers
  9from .external import adobeGlyphList
 10
 11collator = Collator()
 12
 13
 14def toChar(value: str, strict=False) -> str:
 15    """Converts a glyph name or value to its corresponding character.
 16
 17    Args:
 18        value: The glyph name or value to convert.
 19        strict: If True, only returns a character if conversion is successful.
 20
 21    Returns:
 22        The corresponding character, or the original value with suffix if not strict.
 23    """
 24    if value is None:
 25        return None
 26
 27    # List: Assume they’re names that all map to the same char => pick first
 28    value = helpers.pickFirst(value)
 29
 30    # Remove snakeCase
 31    value = toNormalCase(value)
 32
 33    if isCharSingle(value):
 34        return value
 35    else:
 36        if isSuffixed(value):
 37            value, suffix = toParts(value)
 38        else:
 39            suffix = ""
 40
 41        quad = toUni(value, "quad")
 42
 43        try:
 44            if quad:
 45                return chr(int(quad, 16)) + suffix
 46            else:
 47                if not strict:
 48                    return value + suffix
 49        except Exception as e:
 50            logger.warning("[toChar {}] {}", value, e)
 51
 52
 53def toCharBase(value: str) -> str:
 54    """Returns the base character for a given value."""
 55    value = toPartBase(value)
 56    return toChar(value)
 57
 58
 59def toParts(value: str) -> tuple | None:
 60    """Splits a value into its base and suffix parts if suffixed.
 61
 62    Args:
 63        value: The value to split.
 64
 65    Returns:
 66        A tuple of (base, suffix) if suffixed, otherwise None.
 67    """
 68    if not isinstance(value, str):
 69        return None
 70
 71    # Matches foo.bar and also foo.bar.bar
 72    match = re.compile(r"^(?P<base>[^\s\.]+)\.(?P<suffix>\S+)$").search(value)
 73    if match:
 74        return match.group("base"), "." + match.group("suffix")
 75
 76
 77def toPartBase(value: str) -> str:
 78    """Returns the base part of a value, removing any suffix."""
 79    # List: Assume they’re names that all map to the same char => pick first
 80    value = helpers.pickFirst(value)
 81
 82    if not isinstance(value, str):
 83        return None
 84
 85    if isSuffixed(value):
 86        value, _ = toParts(value)
 87
 88    return value
 89
 90
 91def toName(value: str, strict=False) -> str:
 92    """Converts a value to its Adobe Glyph List name.
 93
 94    Args:
 95        value: The value to convert.
 96        strict: If True, only returns a name if conversion is successful.
 97
 98    Returns:
 99        The glyph name, or the original value with suffix if not strict.
100    """
101    if isSuffixed(value):
102        value, suffix = toParts(value)
103    else:
104        suffix = ""
105
106    try:
107        # Remove snakeCase
108        value = toNormalCase(value)
109
110        if not isUniQuad(value):
111            quad = toUni(value, "quad")
112        else:
113            quad = value
114
115        # Can be str or str[]
116        name = adobeGlyphList.UV2AGL[quad]
117        if isinstance(name, list):
118            return [n + suffix for n in name]
119        else:
120            return name + suffix
121    except:
122        try:
123            if not strict:
124                return value + suffix
125        except:
126            logger.warning("[Cannon convert toName] {}", value)
127
128
129def toNameBase(value: str) -> str:
130    """Returns the base glyph name for a given value."""
131    value = toPartBase(value)
132    return toName(value)
133
134
135def toUni(value: str, mode: Literal["quad", "full"] = "quad") -> str:
136    """Converts a value to its Unicode codepoint string.
137
138    Args:
139        value: The value to convert.
140        mode: 'quad' for 4-digit hex, 'full' for 'uniXXXX' format.
141
142    Returns:
143        The Unicode codepoint string, or None if conversion fails.
144    """
145    # List: Assume they’re names that all map to the same char => pick first
146    value = helpers.pickFirst(value)
147
148    if not isinstance(value, str):
149        return None
150
151    if isSuffixed(value):
152        value, suffix = toParts(value)
153    else:
154        suffix = ""
155
156    if isName(value):
157        quad = adobeGlyphList.AGL2UV[value]
158    elif isChar(value):
159        value = toChar(value, strict=True)
160        if value:
161            quad = hex(ord(value))[2:].zfill(4).upper()
162        else:
163            return None
164    else:
165        if not value.startswith("uni") and not isUni(value):
166            logger.trace("[Cannot Convert to Uni] {}", value)
167            return None
168        quad = value.replace("uni", "")
169
170    valueUni = "uni" + quad if mode == "full" else quad
171    return valueUni + suffix
172
173
174def toSnakeCase(value: str) -> str:
175    """Converts a glyph name to snake_case format.
176
177    Example:
178        `ffj` => `f_f_j`
179    """
180
181    def _processSnake(value: str) -> str:
182        short = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})$").match(value)
183        suffixed = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})\.(?P<rest>.+)$").match(
184            value
185        )
186        match = short or suffixed
187
188        if match:
189            char = "_".join(list(match.group("char")))
190            if suffixed:
191                return ".".join([char, suffixed.group("rest")])
192            else:
193                return char
194
195    if not isinstance(value, str):
196        return None
197
198    if not isName(value):
199        value = toName(value)
200
201    # Name can be str or str[]
202    if isinstance(value, list):
203        values = helpers.removeNone([_processSnake(v) for v in value])
204        if values:
205            return values
206    else:
207        return _processSnake(value)
208
209
210def toNormalCase(value: str) -> str:
211    """Reverts snake_case in glyph names to normal case.
212
213    Example:
214        `f_f_j.liga` => `ffj.liga`
215    """
216    if isSnakeCase(value):
217        return value.replace("_", "")
218    else:
219        return value
220
221
222def isChar(value: str) -> bool:
223    """Returns True if value is a single character (after removing suffix)."""
224    if isSuffixed(value):
225        value, _ = toParts(value)
226
227    return isCharSingle(value)
228
229
230def isCharSingle(value: str) -> bool:
231    """Returns True if value is a single character."""
232    return isinstance(value, str) and len(value) == 1
233
234
235def isName(value: str) -> bool:
236    """Returns True if value is a glyph name in the Adobe Glyph List."""
237    return value in adobeGlyphList.AGL2UV.keys()
238
239
240def isUni(value: str) -> str | bool:
241    """Checks if value is a Unicode codepoint string.
242
243    Args:
244        value: The value to check.
245
246    Returns:
247        'full' if value is in 'uniXXXX' format,
248        'quad' if value is a 4-digit hex,
249        False otherwise.
250    """
251    if not isinstance(value, str):
252        return None
253
254    match = re.compile("^(?P<prefix>uni)?(?P<quad>[A-Z0-9]{4})$").search(value)
255    if match:
256        isPrefix = match.group("prefix")
257        isQuad = match.group("quad")
258        isFull = isPrefix and isQuad
259        if isFull:
260            return "full"
261        elif isQuad:
262            return "quad"
263
264    return False
265
266
267def isUniFull(value: str) -> bool:
268    """Returns True if value is a full Unicode string (e.g., 'uniXXXX')."""
269    return isUni(value) == "full"
270
271
272def isUniQuad(value: str) -> bool:
273    """Returns True if value is a quad Unicode string (e.g., 'XXXX')."""
274    return isUni(value) == "quad"
275
276
277def isSuffixed(value: str) -> bool:
278    """Returns True if value has a suffix (e.g., 'name.suffix')."""
279    return bool(toParts(value))
280
281
282def isSnakeCase(value: str) -> bool:
283    """Returns True if value is in snake_case format (contains underscores)."""
284    # "_" is underscore, not snakeCase
285    if isinstance(value, str):
286        return "_" in value and len(value) > 1
287
288
289def getCategory(char):
290    """Returns the Unicode category or custom category for a character.
291
292    Args:
293        char: The character or glyph name.
294
295    Returns:
296        The Unicode category string, or a custom category for special cases.
297    """
298    # May be a list of multiple glyphNames
299    char = helpers.pickFirst(char)
300
301    if isSuffixed(char):
302        char, suffix = toParts(char)
303    else:
304        suffix = ""
305
306    # Edge cases
307    # Old-style/tabular figures
308    if any([s in suffix for s in ["osf", "tf"]]):
309        # NdOsf
310        return "Nd" + suffix.strip(".").title()
311    # Denominators, numerators
312    elif any([s in suffix for s in ["dnom", "numr"]]):
313        # NoDnom, NoNumr
314        return "No" + suffix.strip(".").title()
315    elif "superior" in toName(char):
316        return "NoSups"  # Number other
317    # Edge case for inferiors
318    elif "inferior" in toName(char):
319        return "NoInf"  # Number other
320    # Circled numbers
321    elif "circle" in toName(char):
322        return "NoCiOt"
323    else:
324        # TODO: Check if in given unicode range
325        try:
326            MIN, MAX = map(lambda num: int(num, 16), ["2776", "277E"])
327            if MIN <= int(toUni(char), 16) <= MAX:
328                return "NoCiSo"
329        except:
330            pass
331
332    try:
333        return unicodedata.category(toChar(char))
334    except:
335        pass
336
337
338def sortByUnicode(char: str):
339    """Returns a sort key for a character based on Unicode collation."""
340    if isSuffixed(char):
341        char, suffix = toParts(char)
342    else:
343        suffix = ""
344
345    charBase = toCharBase(char) + suffix
346
347    if charBase:
348        return collator.sort_key(charBase)
349    else:
350        logger.warning("Unable to sort: {}", char)
351
352
353def sortByCategory(glyphItem: str):
354    """
355    Returns a priority index for sorting glyphs by Unicode category.
356
357    - `L` lowercase
358    - `N` number
359    - `P` punctuation
360    - `S` symbol
361    - `M` mark
362    - `C` control
363    - `Z` Space separator
364
365    Args:
366        glyphItem: The glyph item to categorize.
367
368    Returns:
369        An integer priority index for sorting.
370    """
371    priorities = [
372        "Lu",
373        "Ll",
374        "L",
375        "Nd",
376        "NdTnum",
377        "NdOsf",
378        "NoSups",
379        "NoNumr",
380        "NoDnom",
381        "N",
382        "P",
383        "S",
384        "M",
385        "C",
386        "Z",
387    ]
388
389    glyphCat = getCategory(glyphItem)
390    glyphName = toNameBase(glyphItem)
391
392    priority = helpers.findClosestIndex(priorities, glyphCat)
393
394    if not isinstance(priority, int):
395        if glyphCat:
396            logger.info("[No priority]", glyphCat, glyphName)
397        return 10
398    else:
399        return priority

collator = <pyuca.collator.Collator_9_0_0 object>

def toChar(value: str, strict=False) -> str: View Source

15def toChar(value: str, strict=False) -> str:
16    """Converts a glyph name or value to its corresponding character.
17
18    Args:
19        value: The glyph name or value to convert.
20        strict: If True, only returns a character if conversion is successful.
21
22    Returns:
23        The corresponding character, or the original value with suffix if not strict.
24    """
25    if value is None:
26        return None
27
28    # List: Assume they’re names that all map to the same char => pick first
29    value = helpers.pickFirst(value)
30
31    # Remove snakeCase
32    value = toNormalCase(value)
33
34    if isCharSingle(value):
35        return value
36    else:
37        if isSuffixed(value):
38            value, suffix = toParts(value)
39        else:
40            suffix = ""
41
42        quad = toUni(value, "quad")
43
44        try:
45            if quad:
46                return chr(int(quad, 16)) + suffix
47            else:
48                if not strict:
49                    return value + suffix
50        except Exception as e:
51            logger.warning("[toChar {}] {}", value, e)

Converts a glyph name or value to its corresponding character.

Arguments:

value: The glyph name or value to convert.
strict: If True, only returns a character if conversion is successful.

Returns:

The corresponding character, or the original value with suffix if not strict.

def toCharBase(value: str) -> str: View Source

54def toCharBase(value: str) -> str:
55    """Returns the base character for a given value."""
56    value = toPartBase(value)
57    return toChar(value)

Returns the base character for a given value.

def toParts(value: str) -> tuple | None: View Source

60def toParts(value: str) -> tuple | None:
61    """Splits a value into its base and suffix parts if suffixed.
62
63    Args:
64        value: The value to split.
65
66    Returns:
67        A tuple of (base, suffix) if suffixed, otherwise None.
68    """
69    if not isinstance(value, str):
70        return None
71
72    # Matches foo.bar and also foo.bar.bar
73    match = re.compile(r"^(?P<base>[^\s\.]+)\.(?P<suffix>\S+)$").search(value)
74    if match:
75        return match.group("base"), "." + match.group("suffix")

Splits a value into its base and suffix parts if suffixed.

Arguments:

value: The value to split.

Returns:

A tuple of (base, suffix) if suffixed, otherwise None.

def toPartBase(value: str) -> str: View Source

78def toPartBase(value: str) -> str:
79    """Returns the base part of a value, removing any suffix."""
80    # List: Assume they’re names that all map to the same char => pick first
81    value = helpers.pickFirst(value)
82
83    if not isinstance(value, str):
84        return None
85
86    if isSuffixed(value):
87        value, _ = toParts(value)
88
89    return value

Returns the base part of a value, removing any suffix.

def toName(value: str, strict=False) -> str: View Source

 92def toName(value: str, strict=False) -> str:
 93    """Converts a value to its Adobe Glyph List name.
 94
 95    Args:
 96        value: The value to convert.
 97        strict: If True, only returns a name if conversion is successful.
 98
 99    Returns:
100        The glyph name, or the original value with suffix if not strict.
101    """
102    if isSuffixed(value):
103        value, suffix = toParts(value)
104    else:
105        suffix = ""
106
107    try:
108        # Remove snakeCase
109        value = toNormalCase(value)
110
111        if not isUniQuad(value):
112            quad = toUni(value, "quad")
113        else:
114            quad = value
115
116        # Can be str or str[]
117        name = adobeGlyphList.UV2AGL[quad]
118        if isinstance(name, list):
119            return [n + suffix for n in name]
120        else:
121            return name + suffix
122    except:
123        try:
124            if not strict:
125                return value + suffix
126        except:
127            logger.warning("[Cannon convert toName] {}", value)

Converts a value to its Adobe Glyph List name.

Arguments:

value: The value to convert.
strict: If True, only returns a name if conversion is successful.

Returns:

The glyph name, or the original value with suffix if not strict.

def toNameBase(value: str) -> str: View Source

130def toNameBase(value: str) -> str:
131    """Returns the base glyph name for a given value."""
132    value = toPartBase(value)
133    return toName(value)

Returns the base glyph name for a given value.

def toUni(value: str, mode: Literal['quad', 'full'] = 'quad') -> str: View Source

136def toUni(value: str, mode: Literal["quad", "full"] = "quad") -> str:
137    """Converts a value to its Unicode codepoint string.
138
139    Args:
140        value: The value to convert.
141        mode: 'quad' for 4-digit hex, 'full' for 'uniXXXX' format.
142
143    Returns:
144        The Unicode codepoint string, or None if conversion fails.
145    """
146    # List: Assume they’re names that all map to the same char => pick first
147    value = helpers.pickFirst(value)
148
149    if not isinstance(value, str):
150        return None
151
152    if isSuffixed(value):
153        value, suffix = toParts(value)
154    else:
155        suffix = ""
156
157    if isName(value):
158        quad = adobeGlyphList.AGL2UV[value]
159    elif isChar(value):
160        value = toChar(value, strict=True)
161        if value:
162            quad = hex(ord(value))[2:].zfill(4).upper()
163        else:
164            return None
165    else:
166        if not value.startswith("uni") and not isUni(value):
167            logger.trace("[Cannot Convert to Uni] {}", value)
168            return None
169        quad = value.replace("uni", "")
170
171    valueUni = "uni" + quad if mode == "full" else quad
172    return valueUni + suffix

Converts a value to its Unicode codepoint string.

Arguments:

value: The value to convert.
mode: 'quad' for 4-digit hex, 'full' for 'uniXXXX' format.

Returns:

The Unicode codepoint string, or None if conversion fails.

def toSnakeCase(value: str) -> str: View Source

175def toSnakeCase(value: str) -> str:
176    """Converts a glyph name to snake_case format.
177
178    Example:
179        `ffj` => `f_f_j`
180    """
181
182    def _processSnake(value: str) -> str:
183        short = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})$").match(value)
184        suffixed = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})\.(?P<rest>.+)$").match(
185            value
186        )
187        match = short or suffixed
188
189        if match:
190            char = "_".join(list(match.group("char")))
191            if suffixed:
192                return ".".join([char, suffixed.group("rest")])
193            else:
194                return char
195
196    if not isinstance(value, str):
197        return None
198
199    if not isName(value):
200        value = toName(value)
201
202    # Name can be str or str[]
203    if isinstance(value, list):
204        values = helpers.removeNone([_processSnake(v) for v in value])
205        if values:
206            return values
207    else:
208        return _processSnake(value)

Converts a glyph name to snake_case format.

Example:

ffj => f_f_j

def toNormalCase(value: str) -> str: View Source

211def toNormalCase(value: str) -> str:
212    """Reverts snake_case in glyph names to normal case.
213
214    Example:
215        `f_f_j.liga` => `ffj.liga`
216    """
217    if isSnakeCase(value):
218        return value.replace("_", "")
219    else:
220        return value

Reverts snake_case in glyph names to normal case.

Example:

f_f_j.liga => ffj.liga

def isChar(value: str) -> bool: View Source

223def isChar(value: str) -> bool:
224    """Returns True if value is a single character (after removing suffix)."""
225    if isSuffixed(value):
226        value, _ = toParts(value)
227
228    return isCharSingle(value)

Returns True if value is a single character (after removing suffix).

def isCharSingle(value: str) -> bool: View Source

231def isCharSingle(value: str) -> bool:
232    """Returns True if value is a single character."""
233    return isinstance(value, str) and len(value) == 1

Returns True if value is a single character.

def isName(value: str) -> bool: View Source

236def isName(value: str) -> bool:
237    """Returns True if value is a glyph name in the Adobe Glyph List."""
238    return value in adobeGlyphList.AGL2UV.keys()

Returns True if value is a glyph name in the Adobe Glyph List.

def isUni(value: str) -> str | bool: View Source

241def isUni(value: str) -> str | bool:
242    """Checks if value is a Unicode codepoint string.
243
244    Args:
245        value: The value to check.
246
247    Returns:
248        'full' if value is in 'uniXXXX' format,
249        'quad' if value is a 4-digit hex,
250        False otherwise.
251    """
252    if not isinstance(value, str):
253        return None
254
255    match = re.compile("^(?P<prefix>uni)?(?P<quad>[A-Z0-9]{4})$").search(value)
256    if match:
257        isPrefix = match.group("prefix")
258        isQuad = match.group("quad")
259        isFull = isPrefix and isQuad
260        if isFull:
261            return "full"
262        elif isQuad:
263            return "quad"
264
265    return False

Checks if value is a Unicode codepoint string.

Arguments:

value: The value to check.

Returns:

'full' if value is in 'uniXXXX' format, 'quad' if value is a 4-digit hex, False otherwise.

def isUniFull(value: str) -> bool: View Source

268def isUniFull(value: str) -> bool:
269    """Returns True if value is a full Unicode string (e.g., 'uniXXXX')."""
270    return isUni(value) == "full"

Returns True if value is a full Unicode string (e.g., 'uniXXXX').

def isUniQuad(value: str) -> bool: View Source

273def isUniQuad(value: str) -> bool:
274    """Returns True if value is a quad Unicode string (e.g., 'XXXX')."""
275    return isUni(value) == "quad"

Returns True if value is a quad Unicode string (e.g., 'XXXX').

def isSuffixed(value: str) -> bool: View Source

278def isSuffixed(value: str) -> bool:
279    """Returns True if value has a suffix (e.g., 'name.suffix')."""
280    return bool(toParts(value))

Returns True if value has a suffix (e.g., 'name.suffix').

def isSnakeCase(value: str) -> bool: View Source

283def isSnakeCase(value: str) -> bool:
284    """Returns True if value is in snake_case format (contains underscores)."""
285    # "_" is underscore, not snakeCase
286    if isinstance(value, str):
287        return "_" in value and len(value) > 1

Returns True if value is in snake_case format (contains underscores).

def getCategory(char): View Source

290def getCategory(char):
291    """Returns the Unicode category or custom category for a character.
292
293    Args:
294        char: The character or glyph name.
295
296    Returns:
297        The Unicode category string, or a custom category for special cases.
298    """
299    # May be a list of multiple glyphNames
300    char = helpers.pickFirst(char)
301
302    if isSuffixed(char):
303        char, suffix = toParts(char)
304    else:
305        suffix = ""
306
307    # Edge cases
308    # Old-style/tabular figures
309    if any([s in suffix for s in ["osf", "tf"]]):
310        # NdOsf
311        return "Nd" + suffix.strip(".").title()
312    # Denominators, numerators
313    elif any([s in suffix for s in ["dnom", "numr"]]):
314        # NoDnom, NoNumr
315        return "No" + suffix.strip(".").title()
316    elif "superior" in toName(char):
317        return "NoSups"  # Number other
318    # Edge case for inferiors
319    elif "inferior" in toName(char):
320        return "NoInf"  # Number other
321    # Circled numbers
322    elif "circle" in toName(char):
323        return "NoCiOt"
324    else:
325        # TODO: Check if in given unicode range
326        try:
327            MIN, MAX = map(lambda num: int(num, 16), ["2776", "277E"])
328            if MIN <= int(toUni(char), 16) <= MAX:
329                return "NoCiSo"
330        except:
331            pass
332
333    try:
334        return unicodedata.category(toChar(char))
335    except:
336        pass

Returns the Unicode category or custom category for a character.

Arguments:

char: The character or glyph name.

Returns:

The Unicode category string, or a custom category for special cases.

def sortByUnicode(char: str): View Source

339def sortByUnicode(char: str):
340    """Returns a sort key for a character based on Unicode collation."""
341    if isSuffixed(char):
342        char, suffix = toParts(char)
343    else:
344        suffix = ""
345
346    charBase = toCharBase(char) + suffix
347
348    if charBase:
349        return collator.sort_key(charBase)
350    else:
351        logger.warning("Unable to sort: {}", char)

Returns a sort key for a character based on Unicode collation.

def sortByCategory(glyphItem: str): View Source

354def sortByCategory(glyphItem: str):
355    """
356    Returns a priority index for sorting glyphs by Unicode category.
357
358    - `L` lowercase
359    - `N` number
360    - `P` punctuation
361    - `S` symbol
362    - `M` mark
363    - `C` control
364    - `Z` Space separator
365
366    Args:
367        glyphItem: The glyph item to categorize.
368
369    Returns:
370        An integer priority index for sorting.
371    """
372    priorities = [
373        "Lu",
374        "Ll",
375        "L",
376        "Nd",
377        "NdTnum",
378        "NdOsf",
379        "NoSups",
380        "NoNumr",
381        "NoDnom",
382        "N",
383        "P",
384        "S",
385        "M",
386        "C",
387        "Z",
388    ]
389
390    glyphCat = getCategory(glyphItem)
391    glyphName = toNameBase(glyphItem)
392
393    priority = helpers.findClosestIndex(priorities, glyphCat)
394
395    if not isinstance(priority, int):
396        if glyphCat:
397            logger.info("[No priority]", glyphCat, glyphName)
398        return 10
399    else:
400        return priority

Returns a priority index for sorting glyphs by Unicode category.

L lowercase
N number
P punctuation
S symbol
M mark
C control
Z Space separator

Arguments:

glyphItem: The glyph item to categorize.

Returns:

An integer priority index for sorting.