lib.glyphs
1import re 2import unicodedata 3from typing import Literal 4from loguru import logger 5from pyuca import Collator 6from icecream import ic 7 8from lib import helpers 9from .external import adobeGlyphList 10 11collator = Collator() 12 13 14def toChar(value: str, strict=False) -> str: 15 """Converts a glyph name or value to its corresponding character. 16 17 Args: 18 value: The glyph name or value to convert. 19 strict: If True, only returns a character if conversion is successful. 20 21 Returns: 22 The corresponding character, or the original value with suffix if not strict. 23 24 Example: 25 `toChar("004A")` => `J` 26 """ 27 if value is None: 28 return None 29 30 # List: Assume they’re names that all map to the same char => pick first 31 value = helpers.pickFirst(value) 32 33 # Remove snakeCase 34 value = toNormalCase(value) 35 36 if isCharSingle(value): 37 return value 38 else: 39 if isSuffixed(value): 40 value, suffix = toParts(value) 41 else: 42 suffix = "" 43 44 quad = toUni(value, "quad") 45 46 try: 47 if quad: 48 return chr(int(quad, 16)) + suffix 49 else: 50 if not strict: 51 return value + suffix 52 except Exception as e: 53 logger.warning("[toChar {}] {}", value, e) 54 55 56def toCharBase(value: str) -> str: 57 """Returns the base character for a given value. 58 59 Example: 60 `004A.ss01` => `J` 61 """ 62 value = toPartBase(value) 63 return toChar(value) 64 65 66def toParts(value: str) -> tuple | None: 67 """Splits a value into its base and suffix parts if suffixed. 68 69 Args: 70 value: The value to split. 71 72 Returns: 73 A tuple of (base, suffix) if suffixed, otherwise None. 74 """ 75 if not isinstance(value, str): 76 return None 77 78 # Matches foo.bar and also foo.bar.bar 79 match = re.compile(r"^(?P<base>[^\s\.]+)\.(?P<suffix>\S+)$").search(value) 80 if match: 81 return match.group("base"), "." + match.group("suffix") 82 83 84def toPartBase(value: str) -> str: 85 """Returns the base part of a value, removing any suffix. 86 87 Example: 88 `004A.ss01` => `004A` 89 """ 90 # List: Assume they’re names that all map to the same char => pick first 91 value = helpers.pickFirst(value) 92 93 if not isinstance(value, str): 94 return None 95 96 if isSuffixed(value): 97 value, _ = toParts(value) 98 99 return value 100 101 102def toName(value: str, strict=False) -> str: 103 """Converts a value to its Adobe Glyph List name. 104 105 Args: 106 value: The value to convert. 107 strict: If True, only returns a name if conversion is successful. 108 109 Returns: 110 The glyph name, or the original value with suffix if not strict. 111 """ 112 if isSuffixed(value): 113 value, suffix = toParts(value) 114 else: 115 suffix = "" 116 117 try: 118 # Remove snakeCase 119 value = toNormalCase(value) 120 121 if not isUniQuad(value): 122 quad = toUni(value, "quad") 123 else: 124 quad = value 125 126 # Can be str or str[] 127 name = adobeGlyphList.UV2AGL[quad] 128 if isinstance(name, list): 129 return [n + suffix for n in name] 130 else: 131 return name + suffix 132 except: 133 try: 134 if not strict: 135 return value + suffix 136 except: 137 logger.warning("[Cannon convert toName] {}", value) 138 139 140def toNameBase(value: str) -> str: 141 """Returns the base glyph name for a given value.""" 142 value = toPartBase(value) 143 return toName(value) 144 145 146def toUni(value: str, mode: Literal["quad", "full"] = "quad") -> str: 147 """Converts a value to its Unicode codepoint string. 148 149 Args: 150 value: The value to convert. 151 mode: 'quad' for 4-digit hex, 'full' for 'uniXXXX' format. 152 153 Returns: 154 The Unicode codepoint string, or None if conversion fails. 155 """ 156 # List: Assume they’re names that all map to the same char => pick first 157 value = helpers.pickFirst(value) 158 159 if not isinstance(value, str): 160 return None 161 162 if isSuffixed(value): 163 value, suffix = toParts(value) 164 else: 165 suffix = "" 166 167 if isAGLName(value): 168 quad = adobeGlyphList.AGL2UV[value] 169 elif isChar(value): 170 value = toChar(value, strict=True) 171 if value: 172 quad = hex(ord(value))[2:].zfill(4).upper() 173 else: 174 return None 175 else: 176 if not value.startswith("uni") and not isUni(value): 177 logger.trace("[Cannot Convert to Uni] {}", value) 178 return None 179 quad = value.replace("uni", "") 180 181 valueUni = "uni" + quad if mode == "full" else quad 182 return valueUni + suffix 183 184 185def toSnakeCase(value: str) -> str: 186 """Converts a glyph name to snake_case format. 187 188 Example: 189 `ffj` => `f_f_j` 190 """ 191 192 def _processSnake(value: str) -> str: 193 short = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})$").match(value) 194 suffixed = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})\.(?P<rest>.+)$").match( 195 value 196 ) 197 match = short or suffixed 198 199 if match: 200 char = "_".join(list(match.group("char"))) 201 if suffixed: 202 return ".".join([char, suffixed.group("rest")]) 203 else: 204 return char 205 206 if not isinstance(value, str): 207 return None 208 209 if not isAGLName(value): 210 value = toName(value) 211 212 # Name can be str or str[] 213 if isinstance(value, list): 214 values = helpers.removeNone([_processSnake(v) for v in value]) 215 if values: 216 return values 217 else: 218 return _processSnake(value) 219 220 221def toNormalCase(value: str) -> str: 222 """Reverts snake_case in glyph names to normal case. 223 224 Example: 225 `f_f_j.liga` => `ffj.liga` 226 """ 227 if isSnakeCase(value): 228 return value.replace("_", "") 229 else: 230 return value 231 232 233def isChar(value: str) -> bool: 234 """Returns True if value is a single character (after removing suffix).""" 235 if isSuffixed(value): 236 value, _ = toParts(value) 237 238 return isCharSingle(value) 239 240 241def isCharSingle(value: str) -> bool: 242 """Returns True if value is a single character. 243 244 Example: 245 - `J` => True 246 - `č` => True 247 - `004A` => False 248 """ 249 return isinstance(value, str) and len(value) == 1 250 251 252def isAGLName(value: str) -> bool: 253 """Returns True if value is a glyph name in the Adobe Glyph List. 254 255 Example: 256 - `A` => True 257 - `ccaron` => True 258 - `004A` => False 259 """ 260 return value in adobeGlyphList.AGL2UV.keys() 261 262 263def isUni(value: str) -> str | bool: 264 """Checks if value is a Unicode codepoint string. 265 266 Args: 267 value: The value to check. 268 269 Returns: 270 - `full` if value is in 'uniXXXX' format, 271 - `quad` if value is a 4-digit hex, 272 - False otherwise. 273 """ 274 if not isinstance(value, str): 275 return None 276 277 match = re.compile("^(?P<prefix>uni)?(?P<quad>[A-Z0-9]{4})$").search(value) 278 if match: 279 isPrefix = match.group("prefix") 280 isQuad = match.group("quad") 281 isFull = isPrefix and isQuad 282 if isFull: 283 return "full" 284 elif isQuad: 285 return "quad" 286 287 return False 288 289 290def isUniFull(value: str) -> bool: 291 """Returns True if value is a full Unicode string (e.g., 'uniXXXX').""" 292 return isUni(value) == "full" 293 294 295def isUniQuad(value: str) -> bool: 296 """Returns True if value is a quad Unicode string (e.g., 'XXXX').""" 297 return isUni(value) == "quad" 298 299 300def isSuffixed(value: str) -> bool: 301 """Returns True if value has a suffix (e.g., 'name.suffix').""" 302 return bool(toParts(value)) 303 304 305def isSnakeCase(value: str) -> bool: 306 """Returns True if value is in snake_case format (contains underscores).""" 307 # "_" is underscore, not snakeCase 308 if isinstance(value, str): 309 return "_" in value and len(value) > 1 310 311 312def getCategory(char): 313 """Returns the Unicode category or custom category for a character. 314 315 Args: 316 char: The character or glyph name. 317 318 Returns: 319 The Unicode category string, or a custom category for special cases. 320 """ 321 # May be a list of multiple glyphNames 322 char = helpers.pickFirst(char) 323 324 if isSuffixed(char): 325 char, suffix = toParts(char) 326 else: 327 suffix = "" 328 329 # Edge cases 330 # Old-style/tabular figures 331 if any([s in suffix for s in ["osf", "tf"]]): 332 # NdOsf 333 return "Nd" + suffix.strip(".").title() 334 # Denominators, numerators 335 elif any([s in suffix for s in ["dnom", "numr"]]): 336 # NoDnom, NoNumr 337 return "No" + suffix.strip(".").title() 338 elif "superior" in toName(char): 339 return "NoSups" # Number other 340 # Edge case for inferiors 341 elif "inferior" in toName(char): 342 return "NoInf" # Number other 343 # Circled numbers 344 elif "circle" in toName(char): 345 return "NoCiOt" 346 else: 347 # TODO: Check if in given unicode range 348 try: 349 MIN, MAX = map(lambda num: int(num, 16), ["2776", "277E"]) 350 if MIN <= int(toUni(char), 16) <= MAX: 351 return "NoCiSo" 352 except: 353 pass 354 355 try: 356 return unicodedata.category(toChar(char)) 357 except: 358 pass 359 360 361def sortByUnicode(char: str): 362 """Returns a sort key for a character based on Unicode collation.""" 363 if isSuffixed(char): 364 char, suffix = toParts(char) 365 else: 366 suffix = "" 367 368 charBase = toCharBase(char) + suffix 369 370 if charBase: 371 return collator.sort_key(charBase) 372 else: 373 logger.warning("Unable to sort: {}", char) 374 375 376def sortByCategory(glyphItem: str): 377 """ 378 Returns a priority index for sorting glyphs by Unicode category. 379 380 - `L` lowercase 381 - `N` number 382 - `P` punctuation 383 - `S` symbol 384 - `M` mark 385 - `C` control 386 - `Z` Space separator 387 388 Args: 389 glyphItem: The glyph item to categorize. 390 391 Returns: 392 An integer priority index for sorting. 393 """ 394 priorities = [ 395 "Lu", 396 "Ll", 397 "L", 398 "Nd", 399 "NdTnum", 400 "NdOsf", 401 "NoSups", 402 "NoNumr", 403 "NoDnom", 404 "N", 405 "P", 406 "S", 407 "M", 408 "C", 409 "Z", 410 ] 411 412 glyphCat = getCategory(glyphItem) 413 glyphName = toNameBase(glyphItem) 414 415 priority = helpers.findClosestIndex(priorities, glyphCat) 416 417 if not isinstance(priority, int): 418 if glyphCat: 419 logger.info("[No priority]", glyphCat, glyphName) 420 return 10 421 else: 422 return priority
15def toChar(value: str, strict=False) -> str: 16 """Converts a glyph name or value to its corresponding character. 17 18 Args: 19 value: The glyph name or value to convert. 20 strict: If True, only returns a character if conversion is successful. 21 22 Returns: 23 The corresponding character, or the original value with suffix if not strict. 24 25 Example: 26 `toChar("004A")` => `J` 27 """ 28 if value is None: 29 return None 30 31 # List: Assume they’re names that all map to the same char => pick first 32 value = helpers.pickFirst(value) 33 34 # Remove snakeCase 35 value = toNormalCase(value) 36 37 if isCharSingle(value): 38 return value 39 else: 40 if isSuffixed(value): 41 value, suffix = toParts(value) 42 else: 43 suffix = "" 44 45 quad = toUni(value, "quad") 46 47 try: 48 if quad: 49 return chr(int(quad, 16)) + suffix 50 else: 51 if not strict: 52 return value + suffix 53 except Exception as e: 54 logger.warning("[toChar {}] {}", value, e)
Converts a glyph name or value to its corresponding character.
Arguments:
- value: The glyph name or value to convert.
- strict: If True, only returns a character if conversion is successful.
Returns:
The corresponding character, or the original value with suffix if not strict.
Example:
toChar("004A")=>J
57def toCharBase(value: str) -> str: 58 """Returns the base character for a given value. 59 60 Example: 61 `004A.ss01` => `J` 62 """ 63 value = toPartBase(value) 64 return toChar(value)
Returns the base character for a given value.
Example:
004A.ss01=>J
67def toParts(value: str) -> tuple | None: 68 """Splits a value into its base and suffix parts if suffixed. 69 70 Args: 71 value: The value to split. 72 73 Returns: 74 A tuple of (base, suffix) if suffixed, otherwise None. 75 """ 76 if not isinstance(value, str): 77 return None 78 79 # Matches foo.bar and also foo.bar.bar 80 match = re.compile(r"^(?P<base>[^\s\.]+)\.(?P<suffix>\S+)$").search(value) 81 if match: 82 return match.group("base"), "." + match.group("suffix")
Splits a value into its base and suffix parts if suffixed.
Arguments:
- value: The value to split.
Returns:
A tuple of (base, suffix) if suffixed, otherwise None.
85def toPartBase(value: str) -> str: 86 """Returns the base part of a value, removing any suffix. 87 88 Example: 89 `004A.ss01` => `004A` 90 """ 91 # List: Assume they’re names that all map to the same char => pick first 92 value = helpers.pickFirst(value) 93 94 if not isinstance(value, str): 95 return None 96 97 if isSuffixed(value): 98 value, _ = toParts(value) 99 100 return value
Returns the base part of a value, removing any suffix.
Example:
004A.ss01=>004A
103def toName(value: str, strict=False) -> str: 104 """Converts a value to its Adobe Glyph List name. 105 106 Args: 107 value: The value to convert. 108 strict: If True, only returns a name if conversion is successful. 109 110 Returns: 111 The glyph name, or the original value with suffix if not strict. 112 """ 113 if isSuffixed(value): 114 value, suffix = toParts(value) 115 else: 116 suffix = "" 117 118 try: 119 # Remove snakeCase 120 value = toNormalCase(value) 121 122 if not isUniQuad(value): 123 quad = toUni(value, "quad") 124 else: 125 quad = value 126 127 # Can be str or str[] 128 name = adobeGlyphList.UV2AGL[quad] 129 if isinstance(name, list): 130 return [n + suffix for n in name] 131 else: 132 return name + suffix 133 except: 134 try: 135 if not strict: 136 return value + suffix 137 except: 138 logger.warning("[Cannon convert toName] {}", value)
Converts a value to its Adobe Glyph List name.
Arguments:
- value: The value to convert.
- strict: If True, only returns a name if conversion is successful.
Returns:
The glyph name, or the original value with suffix if not strict.
141def toNameBase(value: str) -> str: 142 """Returns the base glyph name for a given value.""" 143 value = toPartBase(value) 144 return toName(value)
Returns the base glyph name for a given value.
147def toUni(value: str, mode: Literal["quad", "full"] = "quad") -> str: 148 """Converts a value to its Unicode codepoint string. 149 150 Args: 151 value: The value to convert. 152 mode: 'quad' for 4-digit hex, 'full' for 'uniXXXX' format. 153 154 Returns: 155 The Unicode codepoint string, or None if conversion fails. 156 """ 157 # List: Assume they’re names that all map to the same char => pick first 158 value = helpers.pickFirst(value) 159 160 if not isinstance(value, str): 161 return None 162 163 if isSuffixed(value): 164 value, suffix = toParts(value) 165 else: 166 suffix = "" 167 168 if isAGLName(value): 169 quad = adobeGlyphList.AGL2UV[value] 170 elif isChar(value): 171 value = toChar(value, strict=True) 172 if value: 173 quad = hex(ord(value))[2:].zfill(4).upper() 174 else: 175 return None 176 else: 177 if not value.startswith("uni") and not isUni(value): 178 logger.trace("[Cannot Convert to Uni] {}", value) 179 return None 180 quad = value.replace("uni", "") 181 182 valueUni = "uni" + quad if mode == "full" else quad 183 return valueUni + suffix
Converts a value to its Unicode codepoint string.
Arguments:
- value: The value to convert.
- mode: 'quad' for 4-digit hex, 'full' for 'uniXXXX' format.
Returns:
The Unicode codepoint string, or None if conversion fails.
186def toSnakeCase(value: str) -> str: 187 """Converts a glyph name to snake_case format. 188 189 Example: 190 `ffj` => `f_f_j` 191 """ 192 193 def _processSnake(value: str) -> str: 194 short = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})$").match(value) 195 suffixed = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})\.(?P<rest>.+)$").match( 196 value 197 ) 198 match = short or suffixed 199 200 if match: 201 char = "_".join(list(match.group("char"))) 202 if suffixed: 203 return ".".join([char, suffixed.group("rest")]) 204 else: 205 return char 206 207 if not isinstance(value, str): 208 return None 209 210 if not isAGLName(value): 211 value = toName(value) 212 213 # Name can be str or str[] 214 if isinstance(value, list): 215 values = helpers.removeNone([_processSnake(v) for v in value]) 216 if values: 217 return values 218 else: 219 return _processSnake(value)
Converts a glyph name to snake_case format.
Example:
ffj=>f_f_j
222def toNormalCase(value: str) -> str: 223 """Reverts snake_case in glyph names to normal case. 224 225 Example: 226 `f_f_j.liga` => `ffj.liga` 227 """ 228 if isSnakeCase(value): 229 return value.replace("_", "") 230 else: 231 return value
Reverts snake_case in glyph names to normal case.
Example:
f_f_j.liga=>ffj.liga
234def isChar(value: str) -> bool: 235 """Returns True if value is a single character (after removing suffix).""" 236 if isSuffixed(value): 237 value, _ = toParts(value) 238 239 return isCharSingle(value)
Returns True if value is a single character (after removing suffix).
242def isCharSingle(value: str) -> bool: 243 """Returns True if value is a single character. 244 245 Example: 246 - `J` => True 247 - `č` => True 248 - `004A` => False 249 """ 250 return isinstance(value, str) and len(value) == 1
Returns True if value is a single character.
Example:
J=> Trueč=> True004A=> False
253def isAGLName(value: str) -> bool: 254 """Returns True if value is a glyph name in the Adobe Glyph List. 255 256 Example: 257 - `A` => True 258 - `ccaron` => True 259 - `004A` => False 260 """ 261 return value in adobeGlyphList.AGL2UV.keys()
Returns True if value is a glyph name in the Adobe Glyph List.
Example:
A=> Trueccaron=> True004A=> False
264def isUni(value: str) -> str | bool: 265 """Checks if value is a Unicode codepoint string. 266 267 Args: 268 value: The value to check. 269 270 Returns: 271 - `full` if value is in 'uniXXXX' format, 272 - `quad` if value is a 4-digit hex, 273 - False otherwise. 274 """ 275 if not isinstance(value, str): 276 return None 277 278 match = re.compile("^(?P<prefix>uni)?(?P<quad>[A-Z0-9]{4})$").search(value) 279 if match: 280 isPrefix = match.group("prefix") 281 isQuad = match.group("quad") 282 isFull = isPrefix and isQuad 283 if isFull: 284 return "full" 285 elif isQuad: 286 return "quad" 287 288 return False
Checks if value is a Unicode codepoint string.
Arguments:
- value: The value to check.
Returns:
fullif value is in 'uniXXXX' format,quadif value is a 4-digit hex,- False otherwise.
291def isUniFull(value: str) -> bool: 292 """Returns True if value is a full Unicode string (e.g., 'uniXXXX').""" 293 return isUni(value) == "full"
Returns True if value is a full Unicode string (e.g., 'uniXXXX').
296def isUniQuad(value: str) -> bool: 297 """Returns True if value is a quad Unicode string (e.g., 'XXXX').""" 298 return isUni(value) == "quad"
Returns True if value is a quad Unicode string (e.g., 'XXXX').
301def isSuffixed(value: str) -> bool: 302 """Returns True if value has a suffix (e.g., 'name.suffix').""" 303 return bool(toParts(value))
Returns True if value has a suffix (e.g., 'name.suffix').
306def isSnakeCase(value: str) -> bool: 307 """Returns True if value is in snake_case format (contains underscores).""" 308 # "_" is underscore, not snakeCase 309 if isinstance(value, str): 310 return "_" in value and len(value) > 1
Returns True if value is in snake_case format (contains underscores).
313def getCategory(char): 314 """Returns the Unicode category or custom category for a character. 315 316 Args: 317 char: The character or glyph name. 318 319 Returns: 320 The Unicode category string, or a custom category for special cases. 321 """ 322 # May be a list of multiple glyphNames 323 char = helpers.pickFirst(char) 324 325 if isSuffixed(char): 326 char, suffix = toParts(char) 327 else: 328 suffix = "" 329 330 # Edge cases 331 # Old-style/tabular figures 332 if any([s in suffix for s in ["osf", "tf"]]): 333 # NdOsf 334 return "Nd" + suffix.strip(".").title() 335 # Denominators, numerators 336 elif any([s in suffix for s in ["dnom", "numr"]]): 337 # NoDnom, NoNumr 338 return "No" + suffix.strip(".").title() 339 elif "superior" in toName(char): 340 return "NoSups" # Number other 341 # Edge case for inferiors 342 elif "inferior" in toName(char): 343 return "NoInf" # Number other 344 # Circled numbers 345 elif "circle" in toName(char): 346 return "NoCiOt" 347 else: 348 # TODO: Check if in given unicode range 349 try: 350 MIN, MAX = map(lambda num: int(num, 16), ["2776", "277E"]) 351 if MIN <= int(toUni(char), 16) <= MAX: 352 return "NoCiSo" 353 except: 354 pass 355 356 try: 357 return unicodedata.category(toChar(char)) 358 except: 359 pass
Returns the Unicode category or custom category for a character.
Arguments:
- char: The character or glyph name.
Returns:
The Unicode category string, or a custom category for special cases.
362def sortByUnicode(char: str): 363 """Returns a sort key for a character based on Unicode collation.""" 364 if isSuffixed(char): 365 char, suffix = toParts(char) 366 else: 367 suffix = "" 368 369 charBase = toCharBase(char) + suffix 370 371 if charBase: 372 return collator.sort_key(charBase) 373 else: 374 logger.warning("Unable to sort: {}", char)
Returns a sort key for a character based on Unicode collation.
377def sortByCategory(glyphItem: str): 378 """ 379 Returns a priority index for sorting glyphs by Unicode category. 380 381 - `L` lowercase 382 - `N` number 383 - `P` punctuation 384 - `S` symbol 385 - `M` mark 386 - `C` control 387 - `Z` Space separator 388 389 Args: 390 glyphItem: The glyph item to categorize. 391 392 Returns: 393 An integer priority index for sorting. 394 """ 395 priorities = [ 396 "Lu", 397 "Ll", 398 "L", 399 "Nd", 400 "NdTnum", 401 "NdOsf", 402 "NoSups", 403 "NoNumr", 404 "NoDnom", 405 "N", 406 "P", 407 "S", 408 "M", 409 "C", 410 "Z", 411 ] 412 413 glyphCat = getCategory(glyphItem) 414 glyphName = toNameBase(glyphItem) 415 416 priority = helpers.findClosestIndex(priorities, glyphCat) 417 418 if not isinstance(priority, int): 419 if glyphCat: 420 logger.info("[No priority]", glyphCat, glyphName) 421 return 10 422 else: 423 return priority
Returns a priority index for sorting glyphs by Unicode category.
LlowercaseNnumberPpunctuationSsymbolMmarkCcontrolZSpace separator
Arguments:
- glyphItem: The glyph item to categorize.
Returns:
An integer priority index for sorting.