lib.glyphs
1import re 2import unicodedata 3from typing import Literal 4from loguru import logger 5from pyuca import Collator 6import drawBot 7from icecream import ic 8 9from lib import helpers 10from .external import adobeGlyphList 11 12collator = Collator() 13 14 15def listAvailableGlyphs(fontPath: str) -> list[str]: 16 """Returns a list of available glyph names in the specified font.""" 17 with drawBot.savedState(): 18 drawBot.font(fontPath) 19 return drawBot.listFontGlyphNames() 20 21 22def toChar(value: str, strict=False) -> str: 23 """Converts a glyph name or value to its corresponding character. 24 25 Args: 26 value: The glyph name or value to convert. 27 strict: If True, only returns a character if conversion is successful. 28 29 Returns: 30 The corresponding character, or the original value with suffix if not strict. 31 32 Example: 33 `toChar("004A")` => `J` 34 """ 35 if value is None: 36 return None 37 38 # List: Assume they’re names that all map to the same char => pick first 39 value = helpers.pickFirst(value) 40 41 # Remove snakeCase 42 value = toNormalCase(value) 43 44 if isCharSingle(value): 45 return value 46 else: 47 if isSuffixed(value): 48 value, suffix = toParts(value) 49 else: 50 suffix = "" 51 52 quad = toUni(value, "quad") 53 54 try: 55 if quad: 56 return chr(int(quad, 16)) + suffix 57 else: 58 if not strict: 59 return value + suffix 60 except Exception as e: 61 logger.warning("[toChar {}] {}", value, e) 62 63 64def toCharBase(value: str) -> str: 65 """Returns the base character for a given value. 66 67 Example: 68 `004A.ss01` => `J` 69 `Aacute.ss01` => `Á` 70 """ 71 value = toPartBase(value) 72 return toChar(value) 73 74 75def toParts(value: str) -> tuple | None: 76 """Splits a value into its base and suffix parts if suffixed. 77 78 Args: 79 value: The value to split. 80 81 Returns: 82 A tuple of (base, suffix) if suffixed, otherwise None. 83 """ 84 if not isinstance(value, str): 85 return None 86 87 # Matches foo.bar and also foo.bar.bar 88 match = re.compile(r"^(?P<base>[^\s\.]+)\.(?P<suffix>\S+)$").search(value) 89 if match: 90 return match.group("base"), "." + match.group("suffix") 91 92 93def toPartBase(value: str) -> str: 94 """Returns the base part of a value, removing any suffix. 95 96 Example: 97 `004A.ss01` => `004A` 98 """ 99 # List: Assume they’re names that all map to the same char => pick first 100 value = helpers.pickFirst(value) 101 102 if not isinstance(value, str): 103 return None 104 105 if isSuffixed(value): 106 value, _ = toParts(value) 107 108 return value 109 110 111def toName(value: str, strict=False) -> str: 112 """Converts a value to its Adobe Glyph List name. 113 114 Args: 115 value: The value to convert. 116 strict: If True, only returns a name if conversion is successful. 117 118 Returns: 119 The glyph name, or the original value with suffix if not strict. 120 """ 121 if isSuffixed(value): 122 value, suffix = toParts(value) 123 else: 124 suffix = "" 125 126 try: 127 # Remove snakeCase 128 value = toNormalCase(value) 129 130 if not isUniQuad(value): 131 quad = toUni(value, "quad") 132 else: 133 quad = value 134 135 # Can be str or str[] 136 name = adobeGlyphList.UV2AGL[quad] 137 if isinstance(name, list): 138 return [n + suffix for n in name] 139 else: 140 return name + suffix 141 except Exception as e: 142 try: 143 if not strict: 144 return value + suffix 145 else: 146 logger.warning("[Cannot convert toName strict] {}: {}", value, e) 147 return None 148 except Exception as e: 149 logger.warning("[Cannot convert toName] {}: {}", value, e) 150 151 152def toNameBase(value: str) -> str: 153 """Returns the base glyph name for a given value.""" 154 value = toPartBase(value) 155 return toName(value) 156 157 158def toUni(value: str, mode: Literal["quad", "full"] = "quad") -> str: 159 """Converts a value to its Unicode codepoint string. 160 161 Args: 162 value: The value to convert. 163 mode: 'quad' for 4-digit hex, 'full' for 'uniXXXX' format. 164 165 Returns: 166 The Unicode codepoint string, or None if conversion fails. 167 """ 168 # List: Assume they’re names that all map to the same char => pick first 169 value = helpers.pickFirst(value) 170 171 if not isinstance(value, str): 172 return None 173 174 if isSuffixed(value): 175 value, suffix = toParts(value) 176 else: 177 suffix = "" 178 179 if isAGLName(value): 180 quad = adobeGlyphList.AGL2UV[value] 181 elif isChar(value): 182 value = toChar(value, strict=True) 183 if value: 184 quad = hex(ord(value))[2:].zfill(4).upper() 185 else: 186 return None 187 else: 188 if not value.startswith("uni") and not isUni(value): 189 logger.trace("[Cannot Convert to Uni] {}", value) 190 return None 191 quad = value.replace("uni", "") 192 193 valueUni = "uni" + quad if mode == "full" else quad 194 return valueUni + suffix 195 196 197def toSnakeCase(value: str) -> str: 198 """Converts a glyph name to snake_case format. 199 200 Example: 201 `ffj` => `f_f_j` 202 """ 203 204 def _processSnake(value: str) -> str: 205 short = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})$").match(value) 206 suffixed = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})\.(?P<rest>.+)$").match( 207 value 208 ) 209 match = short or suffixed 210 211 if match: 212 char = "_".join(list(match.group("char"))) 213 if suffixed: 214 return ".".join([char, suffixed.group("rest")]) 215 else: 216 return char 217 218 if not isinstance(value, str): 219 return None 220 221 if not isAGLName(value): 222 value = toName(value) 223 224 # Name can be str or str[] 225 if isinstance(value, list): 226 values = helpers.removeNone([_processSnake(v) for v in value]) 227 if values: 228 return values 229 else: 230 return _processSnake(value) 231 232 233def toNormalCase(value: str) -> str: 234 """Reverts snake_case in glyph names to normal case. 235 236 Example: 237 `f_f_j.liga` => `ffj.liga` 238 """ 239 if isSnakeCase(value): 240 return value.replace("_", "") 241 else: 242 return value 243 244 245def isChar(value: str) -> bool: 246 """Returns True if value is a single character (after removing suffix).""" 247 if isSuffixed(value): 248 value, _ = toParts(value) 249 250 return isCharSingle(value) 251 252 253def isCharSingle(value: str) -> bool: 254 """Returns True if value is a single character. 255 256 Example: 257 - `J` => True 258 - `č` => True 259 - `004A` => False 260 """ 261 return isinstance(value, str) and len(value) == 1 262 263 264def isAGLName(value: str) -> bool: 265 """Returns True if value is a glyph name in the Adobe Glyph List. 266 267 Example: 268 - `A` => True 269 - `ccaron` => True 270 - `004A` => False 271 """ 272 return value in adobeGlyphList.AGL2UV.keys() 273 274 275def isUni(value: str) -> str | bool: 276 """Checks if value is a Unicode codepoint string. 277 278 Args: 279 value: The value to check. 280 281 Returns: 282 - `full` if value is in 'uniXXXX' format, 283 - `quad` if value is a 4-digit hex, 284 - False otherwise. 285 """ 286 if not isinstance(value, str): 287 return None 288 289 match = re.compile("^(?P<prefix>uni)?(?P<quad>[A-Z0-9]{4})$").search(value) 290 if match: 291 isPrefix = match.group("prefix") 292 isQuad = match.group("quad") 293 isFull = isPrefix and isQuad 294 if isFull: 295 return "full" 296 elif isQuad: 297 return "quad" 298 299 return False 300 301 302def isUniFull(value: str) -> bool: 303 """Returns True if value is a full Unicode string (e.g., 'uniXXXX').""" 304 return isUni(value) == "full" 305 306 307def isUniQuad(value: str) -> bool: 308 """Returns True if value is a quad Unicode string (e.g., 'XXXX').""" 309 return isUni(value) == "quad" 310 311 312def isSuffixed(value: str) -> bool: 313 """Returns True if value has a suffix (e.g., 'name.suffix').""" 314 return bool(toParts(value)) 315 316 317def isSnakeCase(value: str) -> bool: 318 """Returns True if value is in snake_case format (contains underscores).""" 319 # "_" is underscore, not snakeCase 320 if isinstance(value, str): 321 return "_" in value and len(value) > 1 322 323 324def canLigate(value: str) -> bool: 325 """Returns True if value is a multi-character sequence that could form a ligature. 326 327 A ligaturable sequence is 2–3 alphanumeric characters that are not already 328 expressed as a snake_case ligature glyph name. 329 330 Args: 331 value: A glyph name, character sequence, or suffixed variant. 332 333 Returns: 334 True if the base value is a candidate ligature sequence. 335 336 Examples: 337 `canLigate("fi")` => True 338 `canLigate("ffl")` => True 339 `canLigate("tt")` => True 340 `canLigate("f_i")` => False (already a ligature glyph name) 341 `canLigate("A")` => False (single character) 342 """ 343 if not isinstance(value, str): 344 return False 345 346 base = toPartBase(value) if isSuffixed(value) else value 347 348 if isSnakeCase(base) or isFigureLike(base): 349 return False 350 351 # Allow 2–3 character sequences, excluding common non-ligature sequences 352 if len(base) == 2 and base not in ["CR", "LF", "at"]: 353 return True 354 elif len(base) == 3 and base.lower() not in ["eth", "eng", "bar", "yen"]: 355 return True 356 357 return False 358 359 360def isLigature(value: str) -> bool: 361 """Returns True if value represents an actual ligature glyph. 362 363 Detects ligatures either by snake_case glyph naming convention (e.g. `f_i`) 364 or by Unicode classification (e.g. `fi` → U+FB01 LATIN SMALL LIGATURE FI). 365 366 Args: 367 value: A glyph name, character sequence, or suffixed variant. 368 369 Returns: 370 True if the base value is an actual ligature glyph. 371 372 Examples: 373 `isLigature("f_i")` => True (snake_case ligature name) 374 `isLigature("fi")` => True (U+FB01 LATIN SMALL LIGATURE FI) 375 `isLigature("fl")` => True (U+FB02 LATIN SMALL LIGATURE FL) 376 `isLigature("tt")` => False (no Unicode ligature codepoint, not snake_case) 377 `isLigature("A")` => False 378 """ 379 if not isinstance(value, str): 380 return False 381 382 base = toPartBase(value) if isSuffixed(value) else value 383 384 if isSnakeCase(base): 385 return True 386 387 char = toChar(base, strict=True) 388 if char and isCharSingle(char): 389 try: 390 return "LIGATURE" in unicodedata.name(char) 391 except ValueError: 392 pass 393 394 return False 395 396 397def isFigureLike(value: str) -> bool: 398 """Returns True if value is a figure-like glyph name or character. 399 400 Detects figure-like glyphs by Unicode category (Number, Symbol) or by 401 common suffixes (e.g., 'zero.osf', 'one.tf', 'two.numr'). 402 403 Args: 404 value: A glyph name, character sequence, or suffixed variant. 405 Returns: 406 True if the value is figure-like. 407 """ 408 if not isinstance(value, str): 409 return False 410 411 if isSuffixed(value): 412 value, suffix = toParts(value) 413 if any(s in suffix for s in ["osf", "tf", "numr", "dnom", "sups", "inf"]): 414 return True 415 416 char = toChar(value, strict=True) 417 if char and isCharSingle(char): 418 category = getCategory(char) 419 if category and category.startswith(("N", "S")): 420 return True 421 422 return False 423 424 425LIGATURE_SEQUENCES = sorted( 426 set(["ffi", "ffl", "ffj", "ffk", "fi", "ff", "fl", "tt", "fj", "fh", "fb", "fk"]), 427 key=lambda s: (-len(s), s), # Sort by length (longer first) then alphabetically 428) 429 430 431def findLigatureSequence(string: str) -> str | None: 432 """Finds the first ligature sequence in a string. 433 434 Example: `findLigatureSequence("office")` => `ffi` 435 """ 436 for liga in LIGATURE_SEQUENCES: 437 if liga in string: 438 return liga 439 return None 440 441 442def getCategory(char: str) -> str | None: 443 """Returns the Unicode category or custom category for a character. 444 445 Args: 446 char: The character or glyph name. 447 448 Returns: 449 The Unicode category string, or a custom category for special cases. 450 """ 451 # May be a list of multiple glyphNames 452 char = helpers.pickFirst(char) 453 454 if isSuffixed(char): 455 char, suffix = toParts(char) 456 else: 457 suffix = "" 458 459 # Edge cases 460 # Old-style/tabular figures 461 if any([s in suffix for s in ["osf", "tf"]]): 462 # NdOsf 463 return "Nd" + suffix.strip(".").title() 464 # Denominators, numerators 465 elif any([s in suffix for s in ["dnom", "numr"]]): 466 # NoDnom, NoNumr 467 return "No" + suffix.strip(".").title() 468 elif "superior" in toName(char): 469 return "NoSups" # Number other 470 # Edge case for inferiors 471 elif "inferior" in toName(char): 472 return "NoInf" # Number other 473 # Circled numbers 474 elif "circle" in toName(char): 475 return "NoCiOt" 476 else: 477 # TODO: Check if in given unicode range 478 try: 479 MIN, MAX = map(lambda num: int(num, 16), ["2776", "277E"]) 480 if MIN <= int(toUni(char), 16) <= MAX: 481 return "NoCiSo" 482 except Exception as e: 483 logger.trace("Error checking Unicode range for {}: {}", char, e) 484 485 try: 486 return unicodedata.category(toChar(char)) 487 except Exception as e: 488 logger.trace("Error getting Unicode category for {}: {}", char, e) 489 490def sortByUnicode(char: str): 491 """Returns a sort key for a character based on Unicode collation.""" 492 if isSuffixed(char): 493 char, suffix = toParts(char) 494 else: 495 suffix = "" 496 497 charBase = toCharBase(char) + suffix 498 499 if charBase: 500 return collator.sort_key(charBase) 501 else: 502 logger.warning("Unable to sort: {}", char) 503 504 505def sortByCategory(glyphItem: str): 506 """ 507 Returns a priority index for sorting glyphs by Unicode category. 508 509 - `L` lowercase 510 - `N` number 511 - `P` punctuation 512 - `S` symbol 513 - `M` mark 514 - `C` control 515 - `Z` Space separator 516 517 Args: 518 glyphItem: The glyph item to categorize. 519 520 Returns: 521 An integer priority index for sorting. 522 """ 523 priorities = [ 524 "Lu", 525 "Ll", 526 "L", 527 "Nd", 528 "NdTnum", 529 "NdOsf", 530 "NoSups", 531 "NoNumr", 532 "NoDnom", 533 "N", 534 "P", 535 "S", 536 "M", 537 "C", 538 "Z", 539 ] 540 541 glyphCat = getCategory(glyphItem) 542 glyphName = toNameBase(glyphItem) 543 544 priority = helpers.findClosestIndex(priorities, glyphCat) 545 546 if not isinstance(priority, int): 547 if glyphCat: 548 logger.info("[No priority]", glyphCat, glyphName) 549 return 10 550 else: 551 return priority
16def listAvailableGlyphs(fontPath: str) -> list[str]: 17 """Returns a list of available glyph names in the specified font.""" 18 with drawBot.savedState(): 19 drawBot.font(fontPath) 20 return drawBot.listFontGlyphNames()
Returns a list of available glyph names in the specified font.
23def toChar(value: str, strict=False) -> str: 24 """Converts a glyph name or value to its corresponding character. 25 26 Args: 27 value: The glyph name or value to convert. 28 strict: If True, only returns a character if conversion is successful. 29 30 Returns: 31 The corresponding character, or the original value with suffix if not strict. 32 33 Example: 34 `toChar("004A")` => `J` 35 """ 36 if value is None: 37 return None 38 39 # List: Assume they’re names that all map to the same char => pick first 40 value = helpers.pickFirst(value) 41 42 # Remove snakeCase 43 value = toNormalCase(value) 44 45 if isCharSingle(value): 46 return value 47 else: 48 if isSuffixed(value): 49 value, suffix = toParts(value) 50 else: 51 suffix = "" 52 53 quad = toUni(value, "quad") 54 55 try: 56 if quad: 57 return chr(int(quad, 16)) + suffix 58 else: 59 if not strict: 60 return value + suffix 61 except Exception as e: 62 logger.warning("[toChar {}] {}", value, e)
Converts a glyph name or value to its corresponding character.
Arguments:
- value: The glyph name or value to convert.
- strict: If True, only returns a character if conversion is successful.
Returns:
The corresponding character, or the original value with suffix if not strict.
Example:
toChar("004A")=>J
65def toCharBase(value: str) -> str: 66 """Returns the base character for a given value. 67 68 Example: 69 `004A.ss01` => `J` 70 `Aacute.ss01` => `Á` 71 """ 72 value = toPartBase(value) 73 return toChar(value)
Returns the base character for a given value.
Example:
004A.ss01=>JAacute.ss01=>Á
76def toParts(value: str) -> tuple | None: 77 """Splits a value into its base and suffix parts if suffixed. 78 79 Args: 80 value: The value to split. 81 82 Returns: 83 A tuple of (base, suffix) if suffixed, otherwise None. 84 """ 85 if not isinstance(value, str): 86 return None 87 88 # Matches foo.bar and also foo.bar.bar 89 match = re.compile(r"^(?P<base>[^\s\.]+)\.(?P<suffix>\S+)$").search(value) 90 if match: 91 return match.group("base"), "." + match.group("suffix")
Splits a value into its base and suffix parts if suffixed.
Arguments:
- value: The value to split.
Returns:
A tuple of (base, suffix) if suffixed, otherwise None.
94def toPartBase(value: str) -> str: 95 """Returns the base part of a value, removing any suffix. 96 97 Example: 98 `004A.ss01` => `004A` 99 """ 100 # List: Assume they’re names that all map to the same char => pick first 101 value = helpers.pickFirst(value) 102 103 if not isinstance(value, str): 104 return None 105 106 if isSuffixed(value): 107 value, _ = toParts(value) 108 109 return value
Returns the base part of a value, removing any suffix.
Example:
004A.ss01=>004A
112def toName(value: str, strict=False) -> str: 113 """Converts a value to its Adobe Glyph List name. 114 115 Args: 116 value: The value to convert. 117 strict: If True, only returns a name if conversion is successful. 118 119 Returns: 120 The glyph name, or the original value with suffix if not strict. 121 """ 122 if isSuffixed(value): 123 value, suffix = toParts(value) 124 else: 125 suffix = "" 126 127 try: 128 # Remove snakeCase 129 value = toNormalCase(value) 130 131 if not isUniQuad(value): 132 quad = toUni(value, "quad") 133 else: 134 quad = value 135 136 # Can be str or str[] 137 name = adobeGlyphList.UV2AGL[quad] 138 if isinstance(name, list): 139 return [n + suffix for n in name] 140 else: 141 return name + suffix 142 except Exception as e: 143 try: 144 if not strict: 145 return value + suffix 146 else: 147 logger.warning("[Cannot convert toName strict] {}: {}", value, e) 148 return None 149 except Exception as e: 150 logger.warning("[Cannot convert toName] {}: {}", value, e)
Converts a value to its Adobe Glyph List name.
Arguments:
- value: The value to convert.
- strict: If True, only returns a name if conversion is successful.
Returns:
The glyph name, or the original value with suffix if not strict.
153def toNameBase(value: str) -> str: 154 """Returns the base glyph name for a given value.""" 155 value = toPartBase(value) 156 return toName(value)
Returns the base glyph name for a given value.
159def toUni(value: str, mode: Literal["quad", "full"] = "quad") -> str: 160 """Converts a value to its Unicode codepoint string. 161 162 Args: 163 value: The value to convert. 164 mode: 'quad' for 4-digit hex, 'full' for 'uniXXXX' format. 165 166 Returns: 167 The Unicode codepoint string, or None if conversion fails. 168 """ 169 # List: Assume they’re names that all map to the same char => pick first 170 value = helpers.pickFirst(value) 171 172 if not isinstance(value, str): 173 return None 174 175 if isSuffixed(value): 176 value, suffix = toParts(value) 177 else: 178 suffix = "" 179 180 if isAGLName(value): 181 quad = adobeGlyphList.AGL2UV[value] 182 elif isChar(value): 183 value = toChar(value, strict=True) 184 if value: 185 quad = hex(ord(value))[2:].zfill(4).upper() 186 else: 187 return None 188 else: 189 if not value.startswith("uni") and not isUni(value): 190 logger.trace("[Cannot Convert to Uni] {}", value) 191 return None 192 quad = value.replace("uni", "") 193 194 valueUni = "uni" + quad if mode == "full" else quad 195 return valueUni + suffix
Converts a value to its Unicode codepoint string.
Arguments:
- value: The value to convert.
- mode: 'quad' for 4-digit hex, 'full' for 'uniXXXX' format.
Returns:
The Unicode codepoint string, or None if conversion fails.
198def toSnakeCase(value: str) -> str: 199 """Converts a glyph name to snake_case format. 200 201 Example: 202 `ffj` => `f_f_j` 203 """ 204 205 def _processSnake(value: str) -> str: 206 short = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})$").match(value) 207 suffixed = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})\.(?P<rest>.+)$").match( 208 value 209 ) 210 match = short or suffixed 211 212 if match: 213 char = "_".join(list(match.group("char"))) 214 if suffixed: 215 return ".".join([char, suffixed.group("rest")]) 216 else: 217 return char 218 219 if not isinstance(value, str): 220 return None 221 222 if not isAGLName(value): 223 value = toName(value) 224 225 # Name can be str or str[] 226 if isinstance(value, list): 227 values = helpers.removeNone([_processSnake(v) for v in value]) 228 if values: 229 return values 230 else: 231 return _processSnake(value)
Converts a glyph name to snake_case format.
Example:
ffj=>f_f_j
234def toNormalCase(value: str) -> str: 235 """Reverts snake_case in glyph names to normal case. 236 237 Example: 238 `f_f_j.liga` => `ffj.liga` 239 """ 240 if isSnakeCase(value): 241 return value.replace("_", "") 242 else: 243 return value
Reverts snake_case in glyph names to normal case.
Example:
f_f_j.liga=>ffj.liga
246def isChar(value: str) -> bool: 247 """Returns True if value is a single character (after removing suffix).""" 248 if isSuffixed(value): 249 value, _ = toParts(value) 250 251 return isCharSingle(value)
Returns True if value is a single character (after removing suffix).
254def isCharSingle(value: str) -> bool: 255 """Returns True if value is a single character. 256 257 Example: 258 - `J` => True 259 - `č` => True 260 - `004A` => False 261 """ 262 return isinstance(value, str) and len(value) == 1
Returns True if value is a single character.
Example:
J=> Trueč=> True004A=> False
265def isAGLName(value: str) -> bool: 266 """Returns True if value is a glyph name in the Adobe Glyph List. 267 268 Example: 269 - `A` => True 270 - `ccaron` => True 271 - `004A` => False 272 """ 273 return value in adobeGlyphList.AGL2UV.keys()
Returns True if value is a glyph name in the Adobe Glyph List.
Example:
A=> Trueccaron=> True004A=> False
276def isUni(value: str) -> str | bool: 277 """Checks if value is a Unicode codepoint string. 278 279 Args: 280 value: The value to check. 281 282 Returns: 283 - `full` if value is in 'uniXXXX' format, 284 - `quad` if value is a 4-digit hex, 285 - False otherwise. 286 """ 287 if not isinstance(value, str): 288 return None 289 290 match = re.compile("^(?P<prefix>uni)?(?P<quad>[A-Z0-9]{4})$").search(value) 291 if match: 292 isPrefix = match.group("prefix") 293 isQuad = match.group("quad") 294 isFull = isPrefix and isQuad 295 if isFull: 296 return "full" 297 elif isQuad: 298 return "quad" 299 300 return False
Checks if value is a Unicode codepoint string.
Arguments:
- value: The value to check.
Returns:
fullif value is in 'uniXXXX' format,quadif value is a 4-digit hex,- False otherwise.
303def isUniFull(value: str) -> bool: 304 """Returns True if value is a full Unicode string (e.g., 'uniXXXX').""" 305 return isUni(value) == "full"
Returns True if value is a full Unicode string (e.g., 'uniXXXX').
308def isUniQuad(value: str) -> bool: 309 """Returns True if value is a quad Unicode string (e.g., 'XXXX').""" 310 return isUni(value) == "quad"
Returns True if value is a quad Unicode string (e.g., 'XXXX').
313def isSuffixed(value: str) -> bool: 314 """Returns True if value has a suffix (e.g., 'name.suffix').""" 315 return bool(toParts(value))
Returns True if value has a suffix (e.g., 'name.suffix').
318def isSnakeCase(value: str) -> bool: 319 """Returns True if value is in snake_case format (contains underscores).""" 320 # "_" is underscore, not snakeCase 321 if isinstance(value, str): 322 return "_" in value and len(value) > 1
Returns True if value is in snake_case format (contains underscores).
325def canLigate(value: str) -> bool: 326 """Returns True if value is a multi-character sequence that could form a ligature. 327 328 A ligaturable sequence is 2–3 alphanumeric characters that are not already 329 expressed as a snake_case ligature glyph name. 330 331 Args: 332 value: A glyph name, character sequence, or suffixed variant. 333 334 Returns: 335 True if the base value is a candidate ligature sequence. 336 337 Examples: 338 `canLigate("fi")` => True 339 `canLigate("ffl")` => True 340 `canLigate("tt")` => True 341 `canLigate("f_i")` => False (already a ligature glyph name) 342 `canLigate("A")` => False (single character) 343 """ 344 if not isinstance(value, str): 345 return False 346 347 base = toPartBase(value) if isSuffixed(value) else value 348 349 if isSnakeCase(base) or isFigureLike(base): 350 return False 351 352 # Allow 2–3 character sequences, excluding common non-ligature sequences 353 if len(base) == 2 and base not in ["CR", "LF", "at"]: 354 return True 355 elif len(base) == 3 and base.lower() not in ["eth", "eng", "bar", "yen"]: 356 return True 357 358 return False
Returns True if value is a multi-character sequence that could form a ligature.
A ligaturable sequence is 2–3 alphanumeric characters that are not already expressed as a snake_case ligature glyph name.
Arguments:
- value: A glyph name, character sequence, or suffixed variant.
Returns:
True if the base value is a candidate ligature sequence.
Examples:
canLigate("fi")=> TruecanLigate("ffl")=> TruecanLigate("tt")=> TruecanLigate("f_i")=> False (already a ligature glyph name)canLigate("A")=> False (single character)
361def isLigature(value: str) -> bool: 362 """Returns True if value represents an actual ligature glyph. 363 364 Detects ligatures either by snake_case glyph naming convention (e.g. `f_i`) 365 or by Unicode classification (e.g. `fi` → U+FB01 LATIN SMALL LIGATURE FI). 366 367 Args: 368 value: A glyph name, character sequence, or suffixed variant. 369 370 Returns: 371 True if the base value is an actual ligature glyph. 372 373 Examples: 374 `isLigature("f_i")` => True (snake_case ligature name) 375 `isLigature("fi")` => True (U+FB01 LATIN SMALL LIGATURE FI) 376 `isLigature("fl")` => True (U+FB02 LATIN SMALL LIGATURE FL) 377 `isLigature("tt")` => False (no Unicode ligature codepoint, not snake_case) 378 `isLigature("A")` => False 379 """ 380 if not isinstance(value, str): 381 return False 382 383 base = toPartBase(value) if isSuffixed(value) else value 384 385 if isSnakeCase(base): 386 return True 387 388 char = toChar(base, strict=True) 389 if char and isCharSingle(char): 390 try: 391 return "LIGATURE" in unicodedata.name(char) 392 except ValueError: 393 pass 394 395 return False
Returns True if value represents an actual ligature glyph.
Detects ligatures either by snake_case glyph naming convention (e.g. f_i)
or by Unicode classification (e.g. fi → U+FB01 LATIN SMALL LIGATURE FI).
Arguments:
- value: A glyph name, character sequence, or suffixed variant.
Returns:
True if the base value is an actual ligature glyph.
Examples:
isLigature("f_i")=> True (snake_case ligature name)isLigature("fi")=> True (U+FB01 LATIN SMALL LIGATURE FI)isLigature("fl")=> True (U+FB02 LATIN SMALL LIGATURE FL)isLigature("tt")=> False (no Unicode ligature codepoint, not snake_case)isLigature("A")=> False
398def isFigureLike(value: str) -> bool: 399 """Returns True if value is a figure-like glyph name or character. 400 401 Detects figure-like glyphs by Unicode category (Number, Symbol) or by 402 common suffixes (e.g., 'zero.osf', 'one.tf', 'two.numr'). 403 404 Args: 405 value: A glyph name, character sequence, or suffixed variant. 406 Returns: 407 True if the value is figure-like. 408 """ 409 if not isinstance(value, str): 410 return False 411 412 if isSuffixed(value): 413 value, suffix = toParts(value) 414 if any(s in suffix for s in ["osf", "tf", "numr", "dnom", "sups", "inf"]): 415 return True 416 417 char = toChar(value, strict=True) 418 if char and isCharSingle(char): 419 category = getCategory(char) 420 if category and category.startswith(("N", "S")): 421 return True 422 423 return False
Returns True if value is a figure-like glyph name or character.
Detects figure-like glyphs by Unicode category (Number, Symbol) or by common suffixes (e.g., 'zero.osf', 'one.tf', 'two.numr').
Arguments:
- value: A glyph name, character sequence, or suffixed variant.
Returns:
True if the value is figure-like.
432def findLigatureSequence(string: str) -> str | None: 433 """Finds the first ligature sequence in a string. 434 435 Example: `findLigatureSequence("office")` => `ffi` 436 """ 437 for liga in LIGATURE_SEQUENCES: 438 if liga in string: 439 return liga 440 return None
Finds the first ligature sequence in a string.
Example: findLigatureSequence("office") => ffi
443def getCategory(char: str) -> str | None: 444 """Returns the Unicode category or custom category for a character. 445 446 Args: 447 char: The character or glyph name. 448 449 Returns: 450 The Unicode category string, or a custom category for special cases. 451 """ 452 # May be a list of multiple glyphNames 453 char = helpers.pickFirst(char) 454 455 if isSuffixed(char): 456 char, suffix = toParts(char) 457 else: 458 suffix = "" 459 460 # Edge cases 461 # Old-style/tabular figures 462 if any([s in suffix for s in ["osf", "tf"]]): 463 # NdOsf 464 return "Nd" + suffix.strip(".").title() 465 # Denominators, numerators 466 elif any([s in suffix for s in ["dnom", "numr"]]): 467 # NoDnom, NoNumr 468 return "No" + suffix.strip(".").title() 469 elif "superior" in toName(char): 470 return "NoSups" # Number other 471 # Edge case for inferiors 472 elif "inferior" in toName(char): 473 return "NoInf" # Number other 474 # Circled numbers 475 elif "circle" in toName(char): 476 return "NoCiOt" 477 else: 478 # TODO: Check if in given unicode range 479 try: 480 MIN, MAX = map(lambda num: int(num, 16), ["2776", "277E"]) 481 if MIN <= int(toUni(char), 16) <= MAX: 482 return "NoCiSo" 483 except Exception as e: 484 logger.trace("Error checking Unicode range for {}: {}", char, e) 485 486 try: 487 return unicodedata.category(toChar(char)) 488 except Exception as e: 489 logger.trace("Error getting Unicode category for {}: {}", char, e)
Returns the Unicode category or custom category for a character.
Arguments:
- char: The character or glyph name.
Returns:
The Unicode category string, or a custom category for special cases.
491def sortByUnicode(char: str): 492 """Returns a sort key for a character based on Unicode collation.""" 493 if isSuffixed(char): 494 char, suffix = toParts(char) 495 else: 496 suffix = "" 497 498 charBase = toCharBase(char) + suffix 499 500 if charBase: 501 return collator.sort_key(charBase) 502 else: 503 logger.warning("Unable to sort: {}", char)
Returns a sort key for a character based on Unicode collation.
506def sortByCategory(glyphItem: str): 507 """ 508 Returns a priority index for sorting glyphs by Unicode category. 509 510 - `L` lowercase 511 - `N` number 512 - `P` punctuation 513 - `S` symbol 514 - `M` mark 515 - `C` control 516 - `Z` Space separator 517 518 Args: 519 glyphItem: The glyph item to categorize. 520 521 Returns: 522 An integer priority index for sorting. 523 """ 524 priorities = [ 525 "Lu", 526 "Ll", 527 "L", 528 "Nd", 529 "NdTnum", 530 "NdOsf", 531 "NoSups", 532 "NoNumr", 533 "NoDnom", 534 "N", 535 "P", 536 "S", 537 "M", 538 "C", 539 "Z", 540 ] 541 542 glyphCat = getCategory(glyphItem) 543 glyphName = toNameBase(glyphItem) 544 545 priority = helpers.findClosestIndex(priorities, glyphCat) 546 547 if not isinstance(priority, int): 548 if glyphCat: 549 logger.info("[No priority]", glyphCat, glyphName) 550 return 10 551 else: 552 return priority
Returns a priority index for sorting glyphs by Unicode category.
LlowercaseNnumberPpunctuationSsymbolMmarkCcontrolZSpace separator
Arguments:
- glyphItem: The glyph item to categorize.
Returns:
An integer priority index for sorting.