lib.glyphs

  1import re
  2import unicodedata
  3from typing import Literal
  4from loguru import logger
  5from pyuca import Collator
  6import drawBot
  7from icecream import ic
  8
  9from lib import helpers
 10from .external import adobeGlyphList
 11
 12collator = Collator()
 13
 14
 15def listAvailableGlyphs(fontPath: str) -> list[str]:
 16    """Returns a list of available glyph names in the specified font."""
 17    with drawBot.savedState():
 18        drawBot.font(fontPath)
 19        return drawBot.listFontGlyphNames()
 20
 21
 22def toChar(value: str, strict=False) -> str:
 23    """Converts a glyph name or value to its corresponding character.
 24
 25    Args:
 26        value: The glyph name or value to convert.
 27        strict: If True, only returns a character if conversion is successful.
 28
 29    Returns:
 30        The corresponding character, or the original value with suffix if not strict.
 31
 32    Example:
 33        `toChar("004A")` => `J`
 34    """
 35    if value is None:
 36        return None
 37
 38    # List: Assume they’re names that all map to the same char => pick first
 39    value = helpers.pickFirst(value)
 40
 41    # Remove snakeCase
 42    value = toNormalCase(value)
 43
 44    if isCharSingle(value):
 45        return value
 46    else:
 47        if isSuffixed(value):
 48            value, suffix = toParts(value)
 49        else:
 50            suffix = ""
 51
 52        quad = toUni(value, "quad")
 53
 54        try:
 55            if quad:
 56                return chr(int(quad, 16)) + suffix
 57            else:
 58                if not strict:
 59                    return value + suffix
 60        except Exception as e:
 61            logger.warning("[toChar {}] {}", value, e)
 62
 63
 64def toCharBase(value: str) -> str:
 65    """Returns the base character for a given value.
 66
 67    Example:
 68        `004A.ss01` => `J`
 69        `Aacute.ss01` => `Á`
 70    """
 71    value = toPartBase(value)
 72    return toChar(value)
 73
 74
 75def toParts(value: str) -> tuple | None:
 76    """Splits a value into its base and suffix parts if suffixed.
 77
 78    Args:
 79        value: The value to split.
 80
 81    Returns:
 82        A tuple of (base, suffix) if suffixed, otherwise None.
 83    """
 84    if not isinstance(value, str):
 85        return None
 86
 87    # Matches foo.bar and also foo.bar.bar
 88    match = re.compile(r"^(?P<base>[^\s\.]+)\.(?P<suffix>\S+)$").search(value)
 89    if match:
 90        return match.group("base"), "." + match.group("suffix")
 91
 92
 93def toPartBase(value: str) -> str:
 94    """Returns the base part of a value, removing any suffix.
 95
 96    Example:
 97        `004A.ss01` => `004A`
 98    """
 99    # List: Assume they’re names that all map to the same char => pick first
100    value = helpers.pickFirst(value)
101
102    if not isinstance(value, str):
103        return None
104
105    if isSuffixed(value):
106        value, _ = toParts(value)
107
108    return value
109
110
111def toName(value: str, strict=False) -> str:
112    """Converts a value to its Adobe Glyph List name.
113
114    Args:
115        value: The value to convert.
116        strict: If True, only returns a name if conversion is successful.
117
118    Returns:
119        The glyph name, or the original value with suffix if not strict.
120    """
121    if isSuffixed(value):
122        value, suffix = toParts(value)
123    else:
124        suffix = ""
125
126    try:
127        # Remove snakeCase
128        value = toNormalCase(value)
129
130        if not isUniQuad(value):
131            quad = toUni(value, "quad")
132        else:
133            quad = value
134
135        # Can be str or str[]
136        name = adobeGlyphList.UV2AGL[quad]
137        if isinstance(name, list):
138            return [n + suffix for n in name]
139        else:
140            return name + suffix
141    except Exception as e:
142        try:
143            if not strict:
144                return value + suffix
145            else:
146                logger.warning("[Cannot convert toName strict] {}: {}", value, e)
147                return None
148        except Exception as e:
149            logger.warning("[Cannot convert toName] {}: {}", value, e)
150
151
152def toNameBase(value: str) -> str:
153    """Returns the base glyph name for a given value."""
154    value = toPartBase(value)
155    return toName(value)
156
157
158def toUni(value: str, mode: Literal["quad", "full"] = "quad") -> str:
159    """Converts a value to its Unicode codepoint string.
160
161    Args:
162        value: The value to convert.
163        mode: 'quad' for 4-digit hex, 'full' for 'uniXXXX' format.
164
165    Returns:
166        The Unicode codepoint string, or None if conversion fails.
167    """
168    # List: Assume they’re names that all map to the same char => pick first
169    value = helpers.pickFirst(value)
170
171    if not isinstance(value, str):
172        return None
173
174    if isSuffixed(value):
175        value, suffix = toParts(value)
176    else:
177        suffix = ""
178
179    if isAGLName(value):
180        quad = adobeGlyphList.AGL2UV[value]
181    elif isChar(value):
182        value = toChar(value, strict=True)
183        if value:
184            quad = hex(ord(value))[2:].zfill(4).upper()
185        else:
186            return None
187    else:
188        if not value.startswith("uni") and not isUni(value):
189            logger.trace("[Cannot Convert to Uni] {}", value)
190            return None
191        quad = value.replace("uni", "")
192
193    valueUni = "uni" + quad if mode == "full" else quad
194    return valueUni + suffix
195
196
197def toSnakeCase(value: str) -> str:
198    """Converts a glyph name to snake_case format.
199
200    Example:
201        `ffj` => `f_f_j`
202    """
203
204    def _processSnake(value: str) -> str:
205        short = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})$").match(value)
206        suffixed = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})\.(?P<rest>.+)$").match(
207            value
208        )
209        match = short or suffixed
210
211        if match:
212            char = "_".join(list(match.group("char")))
213            if suffixed:
214                return ".".join([char, suffixed.group("rest")])
215            else:
216                return char
217
218    if not isinstance(value, str):
219        return None
220
221    if not isAGLName(value):
222        value = toName(value)
223
224    # Name can be str or str[]
225    if isinstance(value, list):
226        values = helpers.removeNone([_processSnake(v) for v in value])
227        if values:
228            return values
229    else:
230        return _processSnake(value)
231
232
233def toNormalCase(value: str) -> str:
234    """Reverts snake_case in glyph names to normal case.
235
236    Example:
237        `f_f_j.liga` => `ffj.liga`
238    """
239    if isSnakeCase(value):
240        return value.replace("_", "")
241    else:
242        return value
243
244
245def isChar(value: str) -> bool:
246    """Returns True if value is a single character (after removing suffix)."""
247    if isSuffixed(value):
248        value, _ = toParts(value)
249
250    return isCharSingle(value)
251
252
253def isCharSingle(value: str) -> bool:
254    """Returns True if value is a single character.
255
256    Example:
257        - `J` => True
258        - `č` => True
259        - `004A` => False
260    """
261    return isinstance(value, str) and len(value) == 1
262
263
264def isAGLName(value: str) -> bool:
265    """Returns True if value is a glyph name in the Adobe Glyph List.
266
267    Example:
268        - `A` => True
269        - `ccaron` => True
270        - `004A` => False
271    """
272    return value in adobeGlyphList.AGL2UV.keys()
273
274
275def isUni(value: str) -> str | bool:
276    """Checks if value is a Unicode codepoint string.
277
278    Args:
279        value: The value to check.
280
281    Returns:
282        - `full` if value is in 'uniXXXX' format,
283        - `quad` if value is a 4-digit hex,
284        - False otherwise.
285    """
286    if not isinstance(value, str):
287        return None
288
289    match = re.compile("^(?P<prefix>uni)?(?P<quad>[A-Z0-9]{4})$").search(value)
290    if match:
291        isPrefix = match.group("prefix")
292        isQuad = match.group("quad")
293        isFull = isPrefix and isQuad
294        if isFull:
295            return "full"
296        elif isQuad:
297            return "quad"
298
299    return False
300
301
302def isUniFull(value: str) -> bool:
303    """Returns True if value is a full Unicode string (e.g., 'uniXXXX')."""
304    return isUni(value) == "full"
305
306
307def isUniQuad(value: str) -> bool:
308    """Returns True if value is a quad Unicode string (e.g., 'XXXX')."""
309    return isUni(value) == "quad"
310
311
312def isSuffixed(value: str) -> bool:
313    """Returns True if value has a suffix (e.g., 'name.suffix')."""
314    return bool(toParts(value))
315
316
317def isSnakeCase(value: str) -> bool:
318    """Returns True if value is in snake_case format (contains underscores)."""
319    # "_" is underscore, not snakeCase
320    if isinstance(value, str):
321        return "_" in value and len(value) > 1
322
323
324def canLigate(value: str) -> bool:
325    """Returns True if value is a multi-character sequence that could form a ligature.
326
327    A ligaturable sequence is 2–3 alphanumeric characters that are not already
328    expressed as a snake_case ligature glyph name.
329
330    Args:
331        value: A glyph name, character sequence, or suffixed variant.
332
333    Returns:
334        True if the base value is a candidate ligature sequence.
335
336    Examples:
337        `canLigate("fi")` => True
338        `canLigate("ffl")` => True
339        `canLigate("tt")` => True
340        `canLigate("f_i")` => False  (already a ligature glyph name)
341        `canLigate("A")` => False  (single character)
342    """
343    if not isinstance(value, str):
344        return False
345
346    base = toPartBase(value) if isSuffixed(value) else value
347
348    if isSnakeCase(base) or isFigureLike(base):
349        return False
350
351    # Allow 2–3 character sequences, excluding common non-ligature sequences
352    if len(base) == 2 and base not in ["CR", "LF", "at"]:
353        return True
354    elif len(base) == 3 and base.lower() not in ["eth", "eng", "bar", "yen"]:
355        return True
356
357    return False
358
359
360def isLigature(value: str) -> bool:
361    """Returns True if value represents an actual ligature glyph.
362
363    Detects ligatures either by snake_case glyph naming convention (e.g. `f_i`)
364    or by Unicode classification (e.g. `fi` → U+FB01 LATIN SMALL LIGATURE FI).
365
366    Args:
367        value: A glyph name, character sequence, or suffixed variant.
368
369    Returns:
370        True if the base value is an actual ligature glyph.
371
372    Examples:
373        `isLigature("f_i")` => True  (snake_case ligature name)
374        `isLigature("fi")` => True  (U+FB01 LATIN SMALL LIGATURE FI)
375        `isLigature("fl")` => True  (U+FB02 LATIN SMALL LIGATURE FL)
376        `isLigature("tt")` => False  (no Unicode ligature codepoint, not snake_case)
377        `isLigature("A")` => False
378    """
379    if not isinstance(value, str):
380        return False
381
382    base = toPartBase(value) if isSuffixed(value) else value
383
384    if isSnakeCase(base):
385        return True
386
387    char = toChar(base, strict=True)
388    if char and isCharSingle(char):
389        try:
390            return "LIGATURE" in unicodedata.name(char)
391        except ValueError:
392            pass
393
394    return False
395
396
397def isFigureLike(value: str) -> bool:
398    """Returns True if value is a figure-like glyph name or character.
399
400    Detects figure-like glyphs by Unicode category (Number, Symbol) or by
401    common suffixes (e.g., 'zero.osf', 'one.tf', 'two.numr').
402
403    Args:
404        value: A glyph name, character sequence, or suffixed variant.
405    Returns:
406        True if the value is figure-like.
407    """
408    if not isinstance(value, str):
409        return False
410
411    if isSuffixed(value):
412        value, suffix = toParts(value)
413        if any(s in suffix for s in ["osf", "tf", "numr", "dnom", "sups", "inf"]):
414            return True
415
416    char = toChar(value, strict=True)
417    if char and isCharSingle(char):
418        category = getCategory(char)
419        if category and category.startswith(("N", "S")):
420            return True
421
422    return False
423
424
425LIGATURE_SEQUENCES = sorted(
426    set(["ffi", "ffl", "ffj", "ffk", "fi", "ff", "fl", "tt", "fj", "fh", "fb", "fk"]),
427    key=lambda s: (-len(s), s),  # Sort by length (longer first) then alphabetically
428)
429
430
431def findLigatureSequence(string: str) -> str | None:
432    """Finds the first ligature sequence in a string.
433
434    Example: `findLigatureSequence("office")` => `ffi`
435    """
436    for liga in LIGATURE_SEQUENCES:
437        if liga in string:
438            return liga
439    return None
440
441
442def getCategory(char: str) -> str | None:
443    """Returns the Unicode category or custom category for a character.
444
445    Args:
446        char: The character or glyph name.
447
448    Returns:
449        The Unicode category string, or a custom category for special cases.
450    """
451    # May be a list of multiple glyphNames
452    char = helpers.pickFirst(char)
453
454    if isSuffixed(char):
455        char, suffix = toParts(char)
456    else:
457        suffix = ""
458
459    # Edge cases
460    # Old-style/tabular figures
461    if any([s in suffix for s in ["osf", "tf"]]):
462        # NdOsf
463        return "Nd" + suffix.strip(".").title()
464    # Denominators, numerators
465    elif any([s in suffix for s in ["dnom", "numr"]]):
466        # NoDnom, NoNumr
467        return "No" + suffix.strip(".").title()
468    elif "superior" in toName(char):
469        return "NoSups"  # Number other
470    # Edge case for inferiors
471    elif "inferior" in toName(char):
472        return "NoInf"  # Number other
473    # Circled numbers
474    elif "circle" in toName(char):
475        return "NoCiOt"
476    else:
477        # TODO: Check if in given unicode range
478        try:
479            MIN, MAX = map(lambda num: int(num, 16), ["2776", "277E"])
480            if MIN <= int(toUni(char), 16) <= MAX:
481                return "NoCiSo"
482        except Exception as e:
483            logger.trace("Error checking Unicode range for {}: {}", char, e)
484
485    try:
486        return unicodedata.category(toChar(char))
487    except Exception as e:
488        logger.trace("Error getting Unicode category for {}: {}", char, e)
489
490def sortByUnicode(char: str):
491    """Returns a sort key for a character based on Unicode collation."""
492    if isSuffixed(char):
493        char, suffix = toParts(char)
494    else:
495        suffix = ""
496
497    charBase = toCharBase(char) + suffix
498
499    if charBase:
500        return collator.sort_key(charBase)
501    else:
502        logger.warning("Unable to sort: {}", char)
503
504
505def sortByCategory(glyphItem: str):
506    """
507    Returns a priority index for sorting glyphs by Unicode category.
508
509    - `L` lowercase
510    - `N` number
511    - `P` punctuation
512    - `S` symbol
513    - `M` mark
514    - `C` control
515    - `Z` Space separator
516
517    Args:
518        glyphItem: The glyph item to categorize.
519
520    Returns:
521        An integer priority index for sorting.
522    """
523    priorities = [
524        "Lu",
525        "Ll",
526        "L",
527        "Nd",
528        "NdTnum",
529        "NdOsf",
530        "NoSups",
531        "NoNumr",
532        "NoDnom",
533        "N",
534        "P",
535        "S",
536        "M",
537        "C",
538        "Z",
539    ]
540
541    glyphCat = getCategory(glyphItem)
542    glyphName = toNameBase(glyphItem)
543
544    priority = helpers.findClosestIndex(priorities, glyphCat)
545
546    if not isinstance(priority, int):
547        if glyphCat:
548            logger.info("[No priority]", glyphCat, glyphName)
549        return 10
550    else:
551        return priority
collator = <pyuca.collator.Collator_9_0_0 object>
def listAvailableGlyphs(fontPath: str) -> list[str]:
16def listAvailableGlyphs(fontPath: str) -> list[str]:
17    """Returns a list of available glyph names in the specified font."""
18    with drawBot.savedState():
19        drawBot.font(fontPath)
20        return drawBot.listFontGlyphNames()

Returns a list of available glyph names in the specified font.

def toChar(value: str, strict=False) -> str:
23def toChar(value: str, strict=False) -> str:
24    """Converts a glyph name or value to its corresponding character.
25
26    Args:
27        value: The glyph name or value to convert.
28        strict: If True, only returns a character if conversion is successful.
29
30    Returns:
31        The corresponding character, or the original value with suffix if not strict.
32
33    Example:
34        `toChar("004A")` => `J`
35    """
36    if value is None:
37        return None
38
39    # List: Assume they’re names that all map to the same char => pick first
40    value = helpers.pickFirst(value)
41
42    # Remove snakeCase
43    value = toNormalCase(value)
44
45    if isCharSingle(value):
46        return value
47    else:
48        if isSuffixed(value):
49            value, suffix = toParts(value)
50        else:
51            suffix = ""
52
53        quad = toUni(value, "quad")
54
55        try:
56            if quad:
57                return chr(int(quad, 16)) + suffix
58            else:
59                if not strict:
60                    return value + suffix
61        except Exception as e:
62            logger.warning("[toChar {}] {}", value, e)

Converts a glyph name or value to its corresponding character.

Arguments:
  • value: The glyph name or value to convert.
  • strict: If True, only returns a character if conversion is successful.
Returns:

The corresponding character, or the original value with suffix if not strict.

Example:

toChar("004A") => J

def toCharBase(value: str) -> str:
65def toCharBase(value: str) -> str:
66    """Returns the base character for a given value.
67
68    Example:
69        `004A.ss01` => `J`
70        `Aacute.ss01` => `Á`
71    """
72    value = toPartBase(value)
73    return toChar(value)

Returns the base character for a given value.

Example:

004A.ss01 => J Aacute.ss01 => Á

def toParts(value: str) -> tuple | None:
76def toParts(value: str) -> tuple | None:
77    """Splits a value into its base and suffix parts if suffixed.
78
79    Args:
80        value: The value to split.
81
82    Returns:
83        A tuple of (base, suffix) if suffixed, otherwise None.
84    """
85    if not isinstance(value, str):
86        return None
87
88    # Matches foo.bar and also foo.bar.bar
89    match = re.compile(r"^(?P<base>[^\s\.]+)\.(?P<suffix>\S+)$").search(value)
90    if match:
91        return match.group("base"), "." + match.group("suffix")

Splits a value into its base and suffix parts if suffixed.

Arguments:
  • value: The value to split.
Returns:

A tuple of (base, suffix) if suffixed, otherwise None.

def toPartBase(value: str) -> str:
 94def toPartBase(value: str) -> str:
 95    """Returns the base part of a value, removing any suffix.
 96
 97    Example:
 98        `004A.ss01` => `004A`
 99    """
100    # List: Assume they’re names that all map to the same char => pick first
101    value = helpers.pickFirst(value)
102
103    if not isinstance(value, str):
104        return None
105
106    if isSuffixed(value):
107        value, _ = toParts(value)
108
109    return value

Returns the base part of a value, removing any suffix.

Example:

004A.ss01 => 004A

def toName(value: str, strict=False) -> str:
112def toName(value: str, strict=False) -> str:
113    """Converts a value to its Adobe Glyph List name.
114
115    Args:
116        value: The value to convert.
117        strict: If True, only returns a name if conversion is successful.
118
119    Returns:
120        The glyph name, or the original value with suffix if not strict.
121    """
122    if isSuffixed(value):
123        value, suffix = toParts(value)
124    else:
125        suffix = ""
126
127    try:
128        # Remove snakeCase
129        value = toNormalCase(value)
130
131        if not isUniQuad(value):
132            quad = toUni(value, "quad")
133        else:
134            quad = value
135
136        # Can be str or str[]
137        name = adobeGlyphList.UV2AGL[quad]
138        if isinstance(name, list):
139            return [n + suffix for n in name]
140        else:
141            return name + suffix
142    except Exception as e:
143        try:
144            if not strict:
145                return value + suffix
146            else:
147                logger.warning("[Cannot convert toName strict] {}: {}", value, e)
148                return None
149        except Exception as e:
150            logger.warning("[Cannot convert toName] {}: {}", value, e)

Converts a value to its Adobe Glyph List name.

Arguments:
  • value: The value to convert.
  • strict: If True, only returns a name if conversion is successful.
Returns:

The glyph name, or the original value with suffix if not strict.

def toNameBase(value: str) -> str:
153def toNameBase(value: str) -> str:
154    """Returns the base glyph name for a given value."""
155    value = toPartBase(value)
156    return toName(value)

Returns the base glyph name for a given value.

def toUni(value: str, mode: Literal['quad', 'full'] = 'quad') -> str:
159def toUni(value: str, mode: Literal["quad", "full"] = "quad") -> str:
160    """Converts a value to its Unicode codepoint string.
161
162    Args:
163        value: The value to convert.
164        mode: 'quad' for 4-digit hex, 'full' for 'uniXXXX' format.
165
166    Returns:
167        The Unicode codepoint string, or None if conversion fails.
168    """
169    # List: Assume they’re names that all map to the same char => pick first
170    value = helpers.pickFirst(value)
171
172    if not isinstance(value, str):
173        return None
174
175    if isSuffixed(value):
176        value, suffix = toParts(value)
177    else:
178        suffix = ""
179
180    if isAGLName(value):
181        quad = adobeGlyphList.AGL2UV[value]
182    elif isChar(value):
183        value = toChar(value, strict=True)
184        if value:
185            quad = hex(ord(value))[2:].zfill(4).upper()
186        else:
187            return None
188    else:
189        if not value.startswith("uni") and not isUni(value):
190            logger.trace("[Cannot Convert to Uni] {}", value)
191            return None
192        quad = value.replace("uni", "")
193
194    valueUni = "uni" + quad if mode == "full" else quad
195    return valueUni + suffix

Converts a value to its Unicode codepoint string.

Arguments:
  • value: The value to convert.
  • mode: 'quad' for 4-digit hex, 'full' for 'uniXXXX' format.
Returns:

The Unicode codepoint string, or None if conversion fails.

def toSnakeCase(value: str) -> str:
198def toSnakeCase(value: str) -> str:
199    """Converts a glyph name to snake_case format.
200
201    Example:
202        `ffj` => `f_f_j`
203    """
204
205    def _processSnake(value: str) -> str:
206        short = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})$").match(value)
207        suffixed = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})\.(?P<rest>.+)$").match(
208            value
209        )
210        match = short or suffixed
211
212        if match:
213            char = "_".join(list(match.group("char")))
214            if suffixed:
215                return ".".join([char, suffixed.group("rest")])
216            else:
217                return char
218
219    if not isinstance(value, str):
220        return None
221
222    if not isAGLName(value):
223        value = toName(value)
224
225    # Name can be str or str[]
226    if isinstance(value, list):
227        values = helpers.removeNone([_processSnake(v) for v in value])
228        if values:
229            return values
230    else:
231        return _processSnake(value)

Converts a glyph name to snake_case format.

Example:

ffj => f_f_j

def toNormalCase(value: str) -> str:
234def toNormalCase(value: str) -> str:
235    """Reverts snake_case in glyph names to normal case.
236
237    Example:
238        `f_f_j.liga` => `ffj.liga`
239    """
240    if isSnakeCase(value):
241        return value.replace("_", "")
242    else:
243        return value

Reverts snake_case in glyph names to normal case.

Example:

f_f_j.liga => ffj.liga

def isChar(value: str) -> bool:
246def isChar(value: str) -> bool:
247    """Returns True if value is a single character (after removing suffix)."""
248    if isSuffixed(value):
249        value, _ = toParts(value)
250
251    return isCharSingle(value)

Returns True if value is a single character (after removing suffix).

def isCharSingle(value: str) -> bool:
254def isCharSingle(value: str) -> bool:
255    """Returns True if value is a single character.
256
257    Example:
258        - `J` => True
259        - `č` => True
260        - `004A` => False
261    """
262    return isinstance(value, str) and len(value) == 1

Returns True if value is a single character.

Example:
  • J => True
  • č => True
  • 004A => False
def isAGLName(value: str) -> bool:
265def isAGLName(value: str) -> bool:
266    """Returns True if value is a glyph name in the Adobe Glyph List.
267
268    Example:
269        - `A` => True
270        - `ccaron` => True
271        - `004A` => False
272    """
273    return value in adobeGlyphList.AGL2UV.keys()

Returns True if value is a glyph name in the Adobe Glyph List.

Example:
  • A => True
  • ccaron => True
  • 004A => False
def isUni(value: str) -> str | bool:
276def isUni(value: str) -> str | bool:
277    """Checks if value is a Unicode codepoint string.
278
279    Args:
280        value: The value to check.
281
282    Returns:
283        - `full` if value is in 'uniXXXX' format,
284        - `quad` if value is a 4-digit hex,
285        - False otherwise.
286    """
287    if not isinstance(value, str):
288        return None
289
290    match = re.compile("^(?P<prefix>uni)?(?P<quad>[A-Z0-9]{4})$").search(value)
291    if match:
292        isPrefix = match.group("prefix")
293        isQuad = match.group("quad")
294        isFull = isPrefix and isQuad
295        if isFull:
296            return "full"
297        elif isQuad:
298            return "quad"
299
300    return False

Checks if value is a Unicode codepoint string.

Arguments:
  • value: The value to check.
Returns:
  • full if value is in 'uniXXXX' format,
  • quad if value is a 4-digit hex,
  • False otherwise.
def isUniFull(value: str) -> bool:
303def isUniFull(value: str) -> bool:
304    """Returns True if value is a full Unicode string (e.g., 'uniXXXX')."""
305    return isUni(value) == "full"

Returns True if value is a full Unicode string (e.g., 'uniXXXX').

def isUniQuad(value: str) -> bool:
308def isUniQuad(value: str) -> bool:
309    """Returns True if value is a quad Unicode string (e.g., 'XXXX')."""
310    return isUni(value) == "quad"

Returns True if value is a quad Unicode string (e.g., 'XXXX').

def isSuffixed(value: str) -> bool:
313def isSuffixed(value: str) -> bool:
314    """Returns True if value has a suffix (e.g., 'name.suffix')."""
315    return bool(toParts(value))

Returns True if value has a suffix (e.g., 'name.suffix').

def isSnakeCase(value: str) -> bool:
318def isSnakeCase(value: str) -> bool:
319    """Returns True if value is in snake_case format (contains underscores)."""
320    # "_" is underscore, not snakeCase
321    if isinstance(value, str):
322        return "_" in value and len(value) > 1

Returns True if value is in snake_case format (contains underscores).

def canLigate(value: str) -> bool:
325def canLigate(value: str) -> bool:
326    """Returns True if value is a multi-character sequence that could form a ligature.
327
328    A ligaturable sequence is 2–3 alphanumeric characters that are not already
329    expressed as a snake_case ligature glyph name.
330
331    Args:
332        value: A glyph name, character sequence, or suffixed variant.
333
334    Returns:
335        True if the base value is a candidate ligature sequence.
336
337    Examples:
338        `canLigate("fi")` => True
339        `canLigate("ffl")` => True
340        `canLigate("tt")` => True
341        `canLigate("f_i")` => False  (already a ligature glyph name)
342        `canLigate("A")` => False  (single character)
343    """
344    if not isinstance(value, str):
345        return False
346
347    base = toPartBase(value) if isSuffixed(value) else value
348
349    if isSnakeCase(base) or isFigureLike(base):
350        return False
351
352    # Allow 2–3 character sequences, excluding common non-ligature sequences
353    if len(base) == 2 and base not in ["CR", "LF", "at"]:
354        return True
355    elif len(base) == 3 and base.lower() not in ["eth", "eng", "bar", "yen"]:
356        return True
357
358    return False

Returns True if value is a multi-character sequence that could form a ligature.

A ligaturable sequence is 2–3 alphanumeric characters that are not already expressed as a snake_case ligature glyph name.

Arguments:
  • value: A glyph name, character sequence, or suffixed variant.
Returns:

True if the base value is a candidate ligature sequence.

Examples:

canLigate("fi") => True canLigate("ffl") => True canLigate("tt") => True canLigate("f_i") => False (already a ligature glyph name) canLigate("A") => False (single character)

def isLigature(value: str) -> bool:
361def isLigature(value: str) -> bool:
362    """Returns True if value represents an actual ligature glyph.
363
364    Detects ligatures either by snake_case glyph naming convention (e.g. `f_i`)
365    or by Unicode classification (e.g. `fi` → U+FB01 LATIN SMALL LIGATURE FI).
366
367    Args:
368        value: A glyph name, character sequence, or suffixed variant.
369
370    Returns:
371        True if the base value is an actual ligature glyph.
372
373    Examples:
374        `isLigature("f_i")` => True  (snake_case ligature name)
375        `isLigature("fi")` => True  (U+FB01 LATIN SMALL LIGATURE FI)
376        `isLigature("fl")` => True  (U+FB02 LATIN SMALL LIGATURE FL)
377        `isLigature("tt")` => False  (no Unicode ligature codepoint, not snake_case)
378        `isLigature("A")` => False
379    """
380    if not isinstance(value, str):
381        return False
382
383    base = toPartBase(value) if isSuffixed(value) else value
384
385    if isSnakeCase(base):
386        return True
387
388    char = toChar(base, strict=True)
389    if char and isCharSingle(char):
390        try:
391            return "LIGATURE" in unicodedata.name(char)
392        except ValueError:
393            pass
394
395    return False

Returns True if value represents an actual ligature glyph.

Detects ligatures either by snake_case glyph naming convention (e.g. f_i) or by Unicode classification (e.g. fi → U+FB01 LATIN SMALL LIGATURE FI).

Arguments:
  • value: A glyph name, character sequence, or suffixed variant.
Returns:

True if the base value is an actual ligature glyph.

Examples:

isLigature("f_i") => True (snake_case ligature name) isLigature("fi") => True (U+FB01 LATIN SMALL LIGATURE FI) isLigature("fl") => True (U+FB02 LATIN SMALL LIGATURE FL) isLigature("tt") => False (no Unicode ligature codepoint, not snake_case) isLigature("A") => False

def isFigureLike(value: str) -> bool:
398def isFigureLike(value: str) -> bool:
399    """Returns True if value is a figure-like glyph name or character.
400
401    Detects figure-like glyphs by Unicode category (Number, Symbol) or by
402    common suffixes (e.g., 'zero.osf', 'one.tf', 'two.numr').
403
404    Args:
405        value: A glyph name, character sequence, or suffixed variant.
406    Returns:
407        True if the value is figure-like.
408    """
409    if not isinstance(value, str):
410        return False
411
412    if isSuffixed(value):
413        value, suffix = toParts(value)
414        if any(s in suffix for s in ["osf", "tf", "numr", "dnom", "sups", "inf"]):
415            return True
416
417    char = toChar(value, strict=True)
418    if char and isCharSingle(char):
419        category = getCategory(char)
420        if category and category.startswith(("N", "S")):
421            return True
422
423    return False

Returns True if value is a figure-like glyph name or character.

Detects figure-like glyphs by Unicode category (Number, Symbol) or by common suffixes (e.g., 'zero.osf', 'one.tf', 'two.numr').

Arguments:
  • value: A glyph name, character sequence, or suffixed variant.
Returns:

True if the value is figure-like.

LIGATURE_SEQUENCES = ['ffi', 'ffj', 'ffk', 'ffl', 'fb', 'ff', 'fh', 'fi', 'fj', 'fk', 'fl', 'tt']
def findLigatureSequence(string: str) -> str | None:
432def findLigatureSequence(string: str) -> str | None:
433    """Finds the first ligature sequence in a string.
434
435    Example: `findLigatureSequence("office")` => `ffi`
436    """
437    for liga in LIGATURE_SEQUENCES:
438        if liga in string:
439            return liga
440    return None

Finds the first ligature sequence in a string.

Example: findLigatureSequence("office") => ffi

def getCategory(char: str) -> str | None:
443def getCategory(char: str) -> str | None:
444    """Returns the Unicode category or custom category for a character.
445
446    Args:
447        char: The character or glyph name.
448
449    Returns:
450        The Unicode category string, or a custom category for special cases.
451    """
452    # May be a list of multiple glyphNames
453    char = helpers.pickFirst(char)
454
455    if isSuffixed(char):
456        char, suffix = toParts(char)
457    else:
458        suffix = ""
459
460    # Edge cases
461    # Old-style/tabular figures
462    if any([s in suffix for s in ["osf", "tf"]]):
463        # NdOsf
464        return "Nd" + suffix.strip(".").title()
465    # Denominators, numerators
466    elif any([s in suffix for s in ["dnom", "numr"]]):
467        # NoDnom, NoNumr
468        return "No" + suffix.strip(".").title()
469    elif "superior" in toName(char):
470        return "NoSups"  # Number other
471    # Edge case for inferiors
472    elif "inferior" in toName(char):
473        return "NoInf"  # Number other
474    # Circled numbers
475    elif "circle" in toName(char):
476        return "NoCiOt"
477    else:
478        # TODO: Check if in given unicode range
479        try:
480            MIN, MAX = map(lambda num: int(num, 16), ["2776", "277E"])
481            if MIN <= int(toUni(char), 16) <= MAX:
482                return "NoCiSo"
483        except Exception as e:
484            logger.trace("Error checking Unicode range for {}: {}", char, e)
485
486    try:
487        return unicodedata.category(toChar(char))
488    except Exception as e:
489        logger.trace("Error getting Unicode category for {}: {}", char, e)

Returns the Unicode category or custom category for a character.

Arguments:
  • char: The character or glyph name.
Returns:

The Unicode category string, or a custom category for special cases.

def sortByUnicode(char: str):
491def sortByUnicode(char: str):
492    """Returns a sort key for a character based on Unicode collation."""
493    if isSuffixed(char):
494        char, suffix = toParts(char)
495    else:
496        suffix = ""
497
498    charBase = toCharBase(char) + suffix
499
500    if charBase:
501        return collator.sort_key(charBase)
502    else:
503        logger.warning("Unable to sort: {}", char)

Returns a sort key for a character based on Unicode collation.

def sortByCategory(glyphItem: str):
506def sortByCategory(glyphItem: str):
507    """
508    Returns a priority index for sorting glyphs by Unicode category.
509
510    - `L` lowercase
511    - `N` number
512    - `P` punctuation
513    - `S` symbol
514    - `M` mark
515    - `C` control
516    - `Z` Space separator
517
518    Args:
519        glyphItem: The glyph item to categorize.
520
521    Returns:
522        An integer priority index for sorting.
523    """
524    priorities = [
525        "Lu",
526        "Ll",
527        "L",
528        "Nd",
529        "NdTnum",
530        "NdOsf",
531        "NoSups",
532        "NoNumr",
533        "NoDnom",
534        "N",
535        "P",
536        "S",
537        "M",
538        "C",
539        "Z",
540    ]
541
542    glyphCat = getCategory(glyphItem)
543    glyphName = toNameBase(glyphItem)
544
545    priority = helpers.findClosestIndex(priorities, glyphCat)
546
547    if not isinstance(priority, int):
548        if glyphCat:
549            logger.info("[No priority]", glyphCat, glyphName)
550        return 10
551    else:
552        return priority

Returns a priority index for sorting glyphs by Unicode category.

  • L lowercase
  • N number
  • P punctuation
  • S symbol
  • M mark
  • C control
  • Z Space separator
Arguments:
  • glyphItem: The glyph item to categorize.
Returns:

An integer priority index for sorting.