lib.glyphs
1import re 2import unicodedata 3from typing import Literal 4from loguru import logger 5from pyuca import Collator 6from icecream import ic 7 8from lib import helpers 9from .external import adobeGlyphList 10 11collator = Collator() 12 13 14def toChar(value: str, strict=False) -> str: 15 """Converts a glyph name or value to its corresponding character. 16 17 Args: 18 value: The glyph name or value to convert. 19 strict: If True, only returns a character if conversion is successful. 20 21 Returns: 22 The corresponding character, or the original value with suffix if not strict. 23 """ 24 if value is None: 25 return None 26 27 # List: Assume they’re names that all map to the same char => pick first 28 value = helpers.pickFirst(value) 29 30 # Remove snakeCase 31 value = toNormalCase(value) 32 33 if isCharSingle(value): 34 return value 35 else: 36 if isSuffixed(value): 37 value, suffix = toParts(value) 38 else: 39 suffix = "" 40 41 quad = toUni(value, "quad") 42 43 try: 44 if quad: 45 return chr(int(quad, 16)) + suffix 46 else: 47 if not strict: 48 return value + suffix 49 except Exception as e: 50 logger.warning("[toChar {}] {}", value, e) 51 52 53def toCharBase(value: str) -> str: 54 """Returns the base character for a given value.""" 55 value = toPartBase(value) 56 return toChar(value) 57 58 59def toParts(value: str) -> tuple | None: 60 """Splits a value into its base and suffix parts if suffixed. 61 62 Args: 63 value: The value to split. 64 65 Returns: 66 A tuple of (base, suffix) if suffixed, otherwise None. 67 """ 68 if not isinstance(value, str): 69 return None 70 71 # Matches foo.bar and also foo.bar.bar 72 match = re.compile(r"^(?P<base>[^\s\.]+)\.(?P<suffix>\S+)$").search(value) 73 if match: 74 return match.group("base"), "." + match.group("suffix") 75 76 77def toPartBase(value: str) -> str: 78 """Returns the base part of a value, removing any suffix.""" 79 # List: Assume they’re names that all map to the same char => pick first 80 value = helpers.pickFirst(value) 81 82 if not isinstance(value, str): 83 return None 84 85 if isSuffixed(value): 86 value, _ = toParts(value) 87 88 return value 89 90 91def toName(value: str, strict=False) -> str: 92 """Converts a value to its Adobe Glyph List name. 93 94 Args: 95 value: The value to convert. 96 strict: If True, only returns a name if conversion is successful. 97 98 Returns: 99 The glyph name, or the original value with suffix if not strict. 100 """ 101 if isSuffixed(value): 102 value, suffix = toParts(value) 103 else: 104 suffix = "" 105 106 try: 107 # Remove snakeCase 108 value = toNormalCase(value) 109 110 if not isUniQuad(value): 111 quad = toUni(value, "quad") 112 else: 113 quad = value 114 115 # Can be str or str[] 116 name = adobeGlyphList.UV2AGL[quad] 117 if isinstance(name, list): 118 return [n + suffix for n in name] 119 else: 120 return name + suffix 121 except: 122 try: 123 if not strict: 124 return value + suffix 125 except: 126 logger.warning("[Cannon convert toName] {}", value) 127 128 129def toNameBase(value: str) -> str: 130 """Returns the base glyph name for a given value.""" 131 value = toPartBase(value) 132 return toName(value) 133 134 135def toUni(value: str, mode: Literal["quad", "full"] = "quad") -> str: 136 """Converts a value to its Unicode codepoint string. 137 138 Args: 139 value: The value to convert. 140 mode: 'quad' for 4-digit hex, 'full' for 'uniXXXX' format. 141 142 Returns: 143 The Unicode codepoint string, or None if conversion fails. 144 """ 145 # List: Assume they’re names that all map to the same char => pick first 146 value = helpers.pickFirst(value) 147 148 if not isinstance(value, str): 149 return None 150 151 if isSuffixed(value): 152 value, suffix = toParts(value) 153 else: 154 suffix = "" 155 156 if isName(value): 157 quad = adobeGlyphList.AGL2UV[value] 158 elif isChar(value): 159 value = toChar(value, strict=True) 160 if value: 161 quad = hex(ord(value))[2:].zfill(4).upper() 162 else: 163 return None 164 else: 165 if not value.startswith("uni") and not isUni(value): 166 logger.trace("[Cannot Convert to Uni] {}", value) 167 return None 168 quad = value.replace("uni", "") 169 170 valueUni = "uni" + quad if mode == "full" else quad 171 return valueUni + suffix 172 173 174def toSnakeCase(value: str) -> str: 175 """Converts a glyph name to snake_case format. 176 177 Example: 178 `ffj` => `f_f_j` 179 """ 180 181 def _processSnake(value: str) -> str: 182 short = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})$").match(value) 183 suffixed = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})\.(?P<rest>.+)$").match( 184 value 185 ) 186 match = short or suffixed 187 188 if match: 189 char = "_".join(list(match.group("char"))) 190 if suffixed: 191 return ".".join([char, suffixed.group("rest")]) 192 else: 193 return char 194 195 if not isinstance(value, str): 196 return None 197 198 if not isName(value): 199 value = toName(value) 200 201 # Name can be str or str[] 202 if isinstance(value, list): 203 values = helpers.removeNone([_processSnake(v) for v in value]) 204 if values: 205 return values 206 else: 207 return _processSnake(value) 208 209 210def toNormalCase(value: str) -> str: 211 """Reverts snake_case in glyph names to normal case. 212 213 Example: 214 `f_f_j.liga` => `ffj.liga` 215 """ 216 if isSnakeCase(value): 217 return value.replace("_", "") 218 else: 219 return value 220 221 222def isChar(value: str) -> bool: 223 """Returns True if value is a single character (after removing suffix).""" 224 if isSuffixed(value): 225 value, _ = toParts(value) 226 227 return isCharSingle(value) 228 229 230def isCharSingle(value: str) -> bool: 231 """Returns True if value is a single character.""" 232 return isinstance(value, str) and len(value) == 1 233 234 235def isName(value: str) -> bool: 236 """Returns True if value is a glyph name in the Adobe Glyph List.""" 237 return value in adobeGlyphList.AGL2UV.keys() 238 239 240def isUni(value: str) -> str | bool: 241 """Checks if value is a Unicode codepoint string. 242 243 Args: 244 value: The value to check. 245 246 Returns: 247 'full' if value is in 'uniXXXX' format, 248 'quad' if value is a 4-digit hex, 249 False otherwise. 250 """ 251 if not isinstance(value, str): 252 return None 253 254 match = re.compile("^(?P<prefix>uni)?(?P<quad>[A-Z0-9]{4})$").search(value) 255 if match: 256 isPrefix = match.group("prefix") 257 isQuad = match.group("quad") 258 isFull = isPrefix and isQuad 259 if isFull: 260 return "full" 261 elif isQuad: 262 return "quad" 263 264 return False 265 266 267def isUniFull(value: str) -> bool: 268 """Returns True if value is a full Unicode string (e.g., 'uniXXXX').""" 269 return isUni(value) == "full" 270 271 272def isUniQuad(value: str) -> bool: 273 """Returns True if value is a quad Unicode string (e.g., 'XXXX').""" 274 return isUni(value) == "quad" 275 276 277def isSuffixed(value: str) -> bool: 278 """Returns True if value has a suffix (e.g., 'name.suffix').""" 279 return bool(toParts(value)) 280 281 282def isSnakeCase(value: str) -> bool: 283 """Returns True if value is in snake_case format (contains underscores).""" 284 # "_" is underscore, not snakeCase 285 if isinstance(value, str): 286 return "_" in value and len(value) > 1 287 288 289def getCategory(char): 290 """Returns the Unicode category or custom category for a character. 291 292 Args: 293 char: The character or glyph name. 294 295 Returns: 296 The Unicode category string, or a custom category for special cases. 297 """ 298 # May be a list of multiple glyphNames 299 char = helpers.pickFirst(char) 300 301 if isSuffixed(char): 302 char, suffix = toParts(char) 303 else: 304 suffix = "" 305 306 # Edge cases 307 # Old-style/tabular figures 308 if any([s in suffix for s in ["osf", "tf"]]): 309 # NdOsf 310 return "Nd" + suffix.strip(".").title() 311 # Denominators, numerators 312 elif any([s in suffix for s in ["dnom", "numr"]]): 313 # NoDnom, NoNumr 314 return "No" + suffix.strip(".").title() 315 elif "superior" in toName(char): 316 return "NoSups" # Number other 317 # Edge case for inferiors 318 elif "inferior" in toName(char): 319 return "NoInf" # Number other 320 # Circled numbers 321 elif "circle" in toName(char): 322 return "NoCiOt" 323 else: 324 # TODO: Check if in given unicode range 325 try: 326 MIN, MAX = map(lambda num: int(num, 16), ["2776", "277E"]) 327 if MIN <= int(toUni(char), 16) <= MAX: 328 return "NoCiSo" 329 except: 330 pass 331 332 try: 333 return unicodedata.category(toChar(char)) 334 except: 335 pass 336 337 338def sortByUnicode(char: str): 339 """Returns a sort key for a character based on Unicode collation.""" 340 if isSuffixed(char): 341 char, suffix = toParts(char) 342 else: 343 suffix = "" 344 345 charBase = toCharBase(char) + suffix 346 347 if charBase: 348 return collator.sort_key(charBase) 349 else: 350 logger.warning("Unable to sort: {}", char) 351 352 353def sortByCategory(glyphItem: str): 354 """ 355 Returns a priority index for sorting glyphs by Unicode category. 356 357 - `L` lowercase 358 - `N` number 359 - `P` punctuation 360 - `S` symbol 361 - `M` mark 362 - `C` control 363 - `Z` Space separator 364 365 Args: 366 glyphItem: The glyph item to categorize. 367 368 Returns: 369 An integer priority index for sorting. 370 """ 371 priorities = [ 372 "Lu", 373 "Ll", 374 "L", 375 "Nd", 376 "NdTnum", 377 "NdOsf", 378 "NoSups", 379 "NoNumr", 380 "NoDnom", 381 "N", 382 "P", 383 "S", 384 "M", 385 "C", 386 "Z", 387 ] 388 389 glyphCat = getCategory(glyphItem) 390 glyphName = toNameBase(glyphItem) 391 392 priority = helpers.findClosestIndex(priorities, glyphCat) 393 394 if not isinstance(priority, int): 395 if glyphCat: 396 logger.info("[No priority]", glyphCat, glyphName) 397 return 10 398 else: 399 return priority
15def toChar(value: str, strict=False) -> str: 16 """Converts a glyph name or value to its corresponding character. 17 18 Args: 19 value: The glyph name or value to convert. 20 strict: If True, only returns a character if conversion is successful. 21 22 Returns: 23 The corresponding character, or the original value with suffix if not strict. 24 """ 25 if value is None: 26 return None 27 28 # List: Assume they’re names that all map to the same char => pick first 29 value = helpers.pickFirst(value) 30 31 # Remove snakeCase 32 value = toNormalCase(value) 33 34 if isCharSingle(value): 35 return value 36 else: 37 if isSuffixed(value): 38 value, suffix = toParts(value) 39 else: 40 suffix = "" 41 42 quad = toUni(value, "quad") 43 44 try: 45 if quad: 46 return chr(int(quad, 16)) + suffix 47 else: 48 if not strict: 49 return value + suffix 50 except Exception as e: 51 logger.warning("[toChar {}] {}", value, e)
Converts a glyph name or value to its corresponding character.
Arguments:
- value: The glyph name or value to convert.
- strict: If True, only returns a character if conversion is successful.
Returns:
The corresponding character, or the original value with suffix if not strict.
54def toCharBase(value: str) -> str: 55 """Returns the base character for a given value.""" 56 value = toPartBase(value) 57 return toChar(value)
Returns the base character for a given value.
60def toParts(value: str) -> tuple | None: 61 """Splits a value into its base and suffix parts if suffixed. 62 63 Args: 64 value: The value to split. 65 66 Returns: 67 A tuple of (base, suffix) if suffixed, otherwise None. 68 """ 69 if not isinstance(value, str): 70 return None 71 72 # Matches foo.bar and also foo.bar.bar 73 match = re.compile(r"^(?P<base>[^\s\.]+)\.(?P<suffix>\S+)$").search(value) 74 if match: 75 return match.group("base"), "." + match.group("suffix")
Splits a value into its base and suffix parts if suffixed.
Arguments:
- value: The value to split.
Returns:
A tuple of (base, suffix) if suffixed, otherwise None.
78def toPartBase(value: str) -> str: 79 """Returns the base part of a value, removing any suffix.""" 80 # List: Assume they’re names that all map to the same char => pick first 81 value = helpers.pickFirst(value) 82 83 if not isinstance(value, str): 84 return None 85 86 if isSuffixed(value): 87 value, _ = toParts(value) 88 89 return value
Returns the base part of a value, removing any suffix.
92def toName(value: str, strict=False) -> str: 93 """Converts a value to its Adobe Glyph List name. 94 95 Args: 96 value: The value to convert. 97 strict: If True, only returns a name if conversion is successful. 98 99 Returns: 100 The glyph name, or the original value with suffix if not strict. 101 """ 102 if isSuffixed(value): 103 value, suffix = toParts(value) 104 else: 105 suffix = "" 106 107 try: 108 # Remove snakeCase 109 value = toNormalCase(value) 110 111 if not isUniQuad(value): 112 quad = toUni(value, "quad") 113 else: 114 quad = value 115 116 # Can be str or str[] 117 name = adobeGlyphList.UV2AGL[quad] 118 if isinstance(name, list): 119 return [n + suffix for n in name] 120 else: 121 return name + suffix 122 except: 123 try: 124 if not strict: 125 return value + suffix 126 except: 127 logger.warning("[Cannon convert toName] {}", value)
Converts a value to its Adobe Glyph List name.
Arguments:
- value: The value to convert.
- strict: If True, only returns a name if conversion is successful.
Returns:
The glyph name, or the original value with suffix if not strict.
130def toNameBase(value: str) -> str: 131 """Returns the base glyph name for a given value.""" 132 value = toPartBase(value) 133 return toName(value)
Returns the base glyph name for a given value.
136def toUni(value: str, mode: Literal["quad", "full"] = "quad") -> str: 137 """Converts a value to its Unicode codepoint string. 138 139 Args: 140 value: The value to convert. 141 mode: 'quad' for 4-digit hex, 'full' for 'uniXXXX' format. 142 143 Returns: 144 The Unicode codepoint string, or None if conversion fails. 145 """ 146 # List: Assume they’re names that all map to the same char => pick first 147 value = helpers.pickFirst(value) 148 149 if not isinstance(value, str): 150 return None 151 152 if isSuffixed(value): 153 value, suffix = toParts(value) 154 else: 155 suffix = "" 156 157 if isName(value): 158 quad = adobeGlyphList.AGL2UV[value] 159 elif isChar(value): 160 value = toChar(value, strict=True) 161 if value: 162 quad = hex(ord(value))[2:].zfill(4).upper() 163 else: 164 return None 165 else: 166 if not value.startswith("uni") and not isUni(value): 167 logger.trace("[Cannot Convert to Uni] {}", value) 168 return None 169 quad = value.replace("uni", "") 170 171 valueUni = "uni" + quad if mode == "full" else quad 172 return valueUni + suffix
Converts a value to its Unicode codepoint string.
Arguments:
- value: The value to convert.
- mode: 'quad' for 4-digit hex, 'full' for 'uniXXXX' format.
Returns:
The Unicode codepoint string, or None if conversion fails.
175def toSnakeCase(value: str) -> str: 176 """Converts a glyph name to snake_case format. 177 178 Example: 179 `ffj` => `f_f_j` 180 """ 181 182 def _processSnake(value: str) -> str: 183 short = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})$").match(value) 184 suffixed = re.compile(r"^(?P<char>[A-Za-z0-9]{2,3})\.(?P<rest>.+)$").match( 185 value 186 ) 187 match = short or suffixed 188 189 if match: 190 char = "_".join(list(match.group("char"))) 191 if suffixed: 192 return ".".join([char, suffixed.group("rest")]) 193 else: 194 return char 195 196 if not isinstance(value, str): 197 return None 198 199 if not isName(value): 200 value = toName(value) 201 202 # Name can be str or str[] 203 if isinstance(value, list): 204 values = helpers.removeNone([_processSnake(v) for v in value]) 205 if values: 206 return values 207 else: 208 return _processSnake(value)
Converts a glyph name to snake_case format.
Example:
ffj=>f_f_j
211def toNormalCase(value: str) -> str: 212 """Reverts snake_case in glyph names to normal case. 213 214 Example: 215 `f_f_j.liga` => `ffj.liga` 216 """ 217 if isSnakeCase(value): 218 return value.replace("_", "") 219 else: 220 return value
Reverts snake_case in glyph names to normal case.
Example:
f_f_j.liga=>ffj.liga
223def isChar(value: str) -> bool: 224 """Returns True if value is a single character (after removing suffix).""" 225 if isSuffixed(value): 226 value, _ = toParts(value) 227 228 return isCharSingle(value)
Returns True if value is a single character (after removing suffix).
231def isCharSingle(value: str) -> bool: 232 """Returns True if value is a single character.""" 233 return isinstance(value, str) and len(value) == 1
Returns True if value is a single character.
236def isName(value: str) -> bool: 237 """Returns True if value is a glyph name in the Adobe Glyph List.""" 238 return value in adobeGlyphList.AGL2UV.keys()
Returns True if value is a glyph name in the Adobe Glyph List.
241def isUni(value: str) -> str | bool: 242 """Checks if value is a Unicode codepoint string. 243 244 Args: 245 value: The value to check. 246 247 Returns: 248 'full' if value is in 'uniXXXX' format, 249 'quad' if value is a 4-digit hex, 250 False otherwise. 251 """ 252 if not isinstance(value, str): 253 return None 254 255 match = re.compile("^(?P<prefix>uni)?(?P<quad>[A-Z0-9]{4})$").search(value) 256 if match: 257 isPrefix = match.group("prefix") 258 isQuad = match.group("quad") 259 isFull = isPrefix and isQuad 260 if isFull: 261 return "full" 262 elif isQuad: 263 return "quad" 264 265 return False
Checks if value is a Unicode codepoint string.
Arguments:
- value: The value to check.
Returns:
'full' if value is in 'uniXXXX' format, 'quad' if value is a 4-digit hex, False otherwise.
268def isUniFull(value: str) -> bool: 269 """Returns True if value is a full Unicode string (e.g., 'uniXXXX').""" 270 return isUni(value) == "full"
Returns True if value is a full Unicode string (e.g., 'uniXXXX').
273def isUniQuad(value: str) -> bool: 274 """Returns True if value is a quad Unicode string (e.g., 'XXXX').""" 275 return isUni(value) == "quad"
Returns True if value is a quad Unicode string (e.g., 'XXXX').
278def isSuffixed(value: str) -> bool: 279 """Returns True if value has a suffix (e.g., 'name.suffix').""" 280 return bool(toParts(value))
Returns True if value has a suffix (e.g., 'name.suffix').
283def isSnakeCase(value: str) -> bool: 284 """Returns True if value is in snake_case format (contains underscores).""" 285 # "_" is underscore, not snakeCase 286 if isinstance(value, str): 287 return "_" in value and len(value) > 1
Returns True if value is in snake_case format (contains underscores).
290def getCategory(char): 291 """Returns the Unicode category or custom category for a character. 292 293 Args: 294 char: The character or glyph name. 295 296 Returns: 297 The Unicode category string, or a custom category for special cases. 298 """ 299 # May be a list of multiple glyphNames 300 char = helpers.pickFirst(char) 301 302 if isSuffixed(char): 303 char, suffix = toParts(char) 304 else: 305 suffix = "" 306 307 # Edge cases 308 # Old-style/tabular figures 309 if any([s in suffix for s in ["osf", "tf"]]): 310 # NdOsf 311 return "Nd" + suffix.strip(".").title() 312 # Denominators, numerators 313 elif any([s in suffix for s in ["dnom", "numr"]]): 314 # NoDnom, NoNumr 315 return "No" + suffix.strip(".").title() 316 elif "superior" in toName(char): 317 return "NoSups" # Number other 318 # Edge case for inferiors 319 elif "inferior" in toName(char): 320 return "NoInf" # Number other 321 # Circled numbers 322 elif "circle" in toName(char): 323 return "NoCiOt" 324 else: 325 # TODO: Check if in given unicode range 326 try: 327 MIN, MAX = map(lambda num: int(num, 16), ["2776", "277E"]) 328 if MIN <= int(toUni(char), 16) <= MAX: 329 return "NoCiSo" 330 except: 331 pass 332 333 try: 334 return unicodedata.category(toChar(char)) 335 except: 336 pass
Returns the Unicode category or custom category for a character.
Arguments:
- char: The character or glyph name.
Returns:
The Unicode category string, or a custom category for special cases.
339def sortByUnicode(char: str): 340 """Returns a sort key for a character based on Unicode collation.""" 341 if isSuffixed(char): 342 char, suffix = toParts(char) 343 else: 344 suffix = "" 345 346 charBase = toCharBase(char) + suffix 347 348 if charBase: 349 return collator.sort_key(charBase) 350 else: 351 logger.warning("Unable to sort: {}", char)
Returns a sort key for a character based on Unicode collation.
354def sortByCategory(glyphItem: str): 355 """ 356 Returns a priority index for sorting glyphs by Unicode category. 357 358 - `L` lowercase 359 - `N` number 360 - `P` punctuation 361 - `S` symbol 362 - `M` mark 363 - `C` control 364 - `Z` Space separator 365 366 Args: 367 glyphItem: The glyph item to categorize. 368 369 Returns: 370 An integer priority index for sorting. 371 """ 372 priorities = [ 373 "Lu", 374 "Ll", 375 "L", 376 "Nd", 377 "NdTnum", 378 "NdOsf", 379 "NoSups", 380 "NoNumr", 381 "NoDnom", 382 "N", 383 "P", 384 "S", 385 "M", 386 "C", 387 "Z", 388 ] 389 390 glyphCat = getCategory(glyphItem) 391 glyphName = toNameBase(glyphItem) 392 393 priority = helpers.findClosestIndex(priorities, glyphCat) 394 395 if not isinstance(priority, int): 396 if glyphCat: 397 logger.info("[No priority]", glyphCat, glyphName) 398 return 10 399 else: 400 return priority
Returns a priority index for sorting glyphs by Unicode category.
LlowercaseNnumberPpunctuationSsymbolMmarkCcontrolZSpace separator
Arguments:
- glyphItem: The glyph item to categorize.
Returns:
An integer priority index for sorting.