lib.content
1import re 2import drawBot 3import random 4from typing import Literal, TypeAlias, Union 5from itertools import cycle, islice, dropwhile 6from string import capwords 7import regex 8from caseconverter import camelcase, kebabcase, snakecase, pascalcase 9from loguru import logger 10from icecream import ic 11 12from lib import helpers, layout, fonts 13 14WordShape: TypeAlias = Literal["descender", "ascender", "caps"] 15"""Categories to filter words by their shape. 16 17See `classes.c32_pool.KPool.getItemByWidth` for usage. 18""" 19 20TextCase: TypeAlias = Literal["UPPER", "lower", "Title", "Caps"] 21"""Defines the supported text casing styles for conversion. 22 23- `UPPER`: Converts all letters to uppercase: `hi ibm` → `HI IBM` 24- `lower`: Converts all letters to lowercase: `HI IBM` → `hi ibm` 25- `Title`: Capitalizes the first letter of each word, lowercases the rest: `hi ibm` → `Hi Ibm` 26- `Caps`: Like Title, but preserves acronyms in uppercase: `hi ibm USA` → `Hi Ibm USA` 27""" 28 29CharacterToken: TypeAlias = Literal["word", "nonword"] 30"""Type alias for character token types. 31 32See `filterByTokens` for usage. 33""" 34 35 36# ? Common words that should be lowercased in title case (unless at the start/end) 37commonWords: list[str] = [ 38 "a", 39 "an", 40 "the", 41 "as", 42 "is", 43 "are", 44 "and", 45 "but", 46 "at", 47 "in", 48 "on", 49 "of", 50 "for", 51 "by", 52 "to", 53] 54"""List of common words to be lowercased in title case (unless at start/end).""" 55 56 57def toPascalCase(input: str, space: bool = True) -> str: 58 """Convert string to PascalCase, optionally inserting spaces between words and numbers. 59 60 Args: 61 input: The input string. 62 space: If True, insert spaces between (default: True). 63 64 Example: 65 `One123Four` => `One 123 Four` 66 """ 67 # ? Not using caseconverter.pascalcase() because it alters allcaps words 68 separator = " " if space else "" 69 # A Aa, a A, A 0 70 expressions = ["([A-Z0-9])([A-Z][a-z])", "([a-z])([A-Z])", "([A-Za-z])([0-9])"] 71 for exp in expressions: 72 input = re.sub(rf"{exp}", rf"\1{separator}\2", input) 73 return input[:1].upper() + input[1:] 74 75 76def toCamelCase(input: str): 77 """Convert string to camelCase. 78 79 Example: 80 `Hello World` => `helloWorld` 81 """ 82 return camelcase(input) 83 84 85def toKebabCase(input: str) -> str: 86 """Convert string to kebab-case. 87 88 Example: 89 `Hello World` => `hello-world` 90 """ 91 return kebabcase(input) 92 93 94def toSnakeCase(input: str) -> str: 95 """Convert string to snake_case. 96 97 Example: 98 `Hello World` => `hello_world` 99 """ 100 return snakecase(input) 101 102 103def toTitleCase(input: str, retainUpper: bool = True) -> str: 104 """ 105 Convert string to title case, handling special cases and acronyms. 106 107 Args: 108 input: The input string. 109 retainUpper: If True, retain uppercase acronyms: `True` USA, `False` Usa 110 111 Returns: 112 Title-cased string. 113 114 Example: 115 `sON Of The USA` => `Son of the USA` 116 """ 117 specialChars: list[str] = ["-", "/"] 118 119 def _hasSpecialChars(word: str) -> bool: 120 """Returns True if word contains special characters.""" 121 return any(char in word for char in specialChars) 122 123 def _handleSpecialChars(word: str) -> str: 124 """Apply title case to each part of a word split by special characters.""" 125 for char in specialChars: 126 if char in word: 127 # ? Split by special char and apply title case to each part 128 parts = word.split(char) 129 return char.join([capwords(part) for part in parts]) 130 131 def _processWord(word: str) -> str: 132 """Process a single word for title casing.""" 133 # ? Always lowercase common words in continuous text 134 isOnEitherSide = helpers.isFirst(words, word) or helpers.isLast(words, word) 135 isCommon = word.casefold() in commonWords 136 if isCommon and not isOnEitherSide: 137 return word.lower() 138 139 # ? Handle special characters 140 if _hasSpecialChars(word): 141 return _handleSpecialChars(word) 142 143 # Uppercase and punctuation 2+ times 144 isCaps = regex.match(r"[\p{Lu}|\p{P}]{2,}", word) 145 146 # capwords() better .title() => retains lowercase ’s 147 return word if isCaps and retainUpper else capwords(word) 148 149 words = input.split(" ") 150 words = [_processWord(word) for word in words] 151 return " ".join(words) 152 153 154def changeCase( 155 input: list[str] | str, 156 case: TextCase = "Title", 157) -> list[str] | str: 158 """ 159 Change the case of a string or list of strings. 160 161 Args: 162 input: String or list of strings to change case. 163 case: Desired case ("upper", "lower", "Title", "Caps"). 164 165 Returns: 166 String or list of strings with changed case. 167 168 Example: 169 `the USA` 170 - `upper` => `THE USA` 171 - `lower` => `the usa` 172 - `Title` => `The Usa` 173 - `Caps` => `The USA` 174 """ 175 176 def _change(item): 177 if case.casefold() == "upper": 178 return item.upper() 179 elif case.casefold() == "lower": 180 return item.lower() 181 elif case.casefold() == "title": 182 return toTitleCase(item, False) 183 elif case.casefold() == "caps": 184 return toTitleCase(item) 185 else: 186 logger.warning("Unable to change case: {}", case) 187 188 if not case: 189 return input # Pass through unchanged 190 191 if isinstance(input, list): 192 return [_change(item) for item in input] 193 else: 194 return _change(input) 195 196 197def isTitleCase(input: str) -> bool: 198 """Returns True if all words in the string are title case.""" 199 return all([regex.match(r"^[\p{Lu}][\p{Ll}]+$", part) for part in input.split(" ")]) 200 201 202def prettifyText(text: str) -> str: 203 """ 204 Removes Wikipedia formatting tokens, extra whitespace and dumb quotes. 205 206 Args: 207 text: The input text. 208 209 Returns: 210 Prettified text. 211 """ 212 removals = [ 213 # Hair space 214 r" ", 215 # Remove citation references 216 r"\[[A-Za-z\d]+\](?::?\d+(?:[-–]\d+)?)?", 217 # Remove [citation needed] 218 r"\[citation needed\]", 219 r"\[clarification needed\]", 220 r"\[note [\d+]]", 221 r"\[NB [\d+]]", 222 ] 223 for removal in removals: 224 text = re.sub(rf"{removal}", "", text) 225 226 replacements = [ 227 # Multiple spaces to single space 228 (r"[ ]{2,}", " "), 229 # Replace dumb single quotes 230 (r"\'([A-Za-z]+)\'", r"‘\1’"), 231 # Replace dumb single quotes used as contractions: it's => it’s 232 (r"([A-Za-z])\'([A-Za-z])?", r"\1’\2"), 233 # Replace dumb double quotes 234 (r"(\s?|^)\"([^\"]+)\"", r"\1“\2”"), 235 # Add missing space before ( { [ in text 236 (r"(\w)(\(|\[|\{)", r"\1 \2"), 237 # Add missing space after ) } ] in text 238 (r"(\)|\]|\})(\w)", r"\1 \2"), 239 ] 240 241 for before, after in replacements: 242 text = re.sub(before, after, text) 243 244 return text.strip() 245 246 247def omitMissing( 248 input: str | list[str], 249 font: str = None, 250 mode: Literal["glyphs", "words", "lines"] = "words", 251 debug=False, 252): 253 """ 254 Omit missing characters from text or list of text blocks. 255 256 Args: 257 input: A single string or a list of strings to check for missing glyphs. 258 font: Font to use for checking glyphs (optional). 259 mode: Determines the omission granularity: 260 - `glyphs`: Omit only the missing characters, preserving the rest of the text. 261 - `words`: Omit entire words that contain missing glyphs. 262 - `lines`: Omit entire lines that contain missing glyphs. 263 debug: If True, log omitted units. 264 265 Returns: 266 Filtered text or list of text blocks with missing data omitted, depending on mode. 267 """ 268 if font: 269 drawBot.font(font) 270 271 isInputString = isinstance(input, str) 272 match (mode): 273 case "glyphs": 274 glue = "" 275 case "words": 276 glue = " " 277 case "lines": 278 glue = "\n" 279 blocks = [input] if isInputString else input 280 281 output = [] 282 for block in blocks: 283 units = list(block) if mode == "glyphs" else block.split(glue) 284 filtered = [unit for unit in units if drawBot.fontContainsCharacters(unit)] 285 286 if debug: 287 # Log omitted fontName if available 288 logMessage = lambda unit: ( 289 ("[Omitted] {} for {}", unit, fonts.getFontName(font)) 290 if font 291 else ("[Omitted] {}", unit) 292 ) 293 [ 294 logger.trace(*logMessage(unit)) 295 for unit in units 296 if not drawBot.fontContainsCharacters(unit) 297 ] 298 299 # Do not add empty list 300 if filtered: 301 output.append(glue.join(filtered)) 302 303 return glue.join(output) if isInputString else output 304 305 306def splitStringToSentences(input: str) -> list[str]: 307 """ 308 Split running text into a list of sentences. 309 310 Args: 311 input: The input text. 312 313 Example: 314 `I am a sentence. I am another one.` => `["I am a sentence.", "I am another one."]` 315 """ 316 replacements = [ 317 # Newlines with spaces 318 (r"\n", " "), 319 # Multiple spaces to single space 320 (r"\s{2,}", " "), 321 ] 322 for [before, after] in replacements: 323 input = re.sub(rf"{before}", after, input) 324 325 # Skip abbreviations: (F. Elastica), Ficus var. elastica 326 sentenceExp = r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<![A-Z]\.)(?<=\.|\?)\s(?![a-z])" 327 return re.split(sentenceExp, input) 328 329 330def rotateList(input: list) -> list: 331 """ 332 Rotate a list to produce all cyclic permutations. 333 334 Args: 335 input: The input list. 336 337 Example: 338 `[A, B, C]` => `[[A, B, C], [B, C, A], [C, A, B]]` 339 """ 340 output = [] 341 342 for item in input: 343 cycled = cycle(input) 344 skipped = dropwhile(lambda x: x != item, cycled) 345 sliced = islice(skipped, None, len(input)) 346 347 output.append(list(sliced)) 348 349 return output 350 351 352def chopSequence(input: str | list[str], limit: int = None, glue=" ", split=" "): 353 """ 354 Split input into meaningful parts, optionally limiting the number of words: `A B C` => `A, AB, ABC`. 355 356 Args: 357 input: String or list of strings to chop. 358 limit: Limit to `n` words. 359 glue: String to join parts. 360 split: String to split input. 361 362 Example: 363 - input: single sentence 364 - `"I was late."` => `["I", "I was", "I was late."]` 365 - input: list of sentences 366 - `["For me.", "Right?"]` => `["For me.", "For me. Right?"]` 367 - limit: 2 368 - `["I", "I was"]` 369 """ 370 if split and isinstance(input, str): 371 input = input.split(split) 372 373 inputLen = len(input) 374 # Limit size if provided 375 stop = min(limit, inputLen) if limit else inputLen 376 377 return [glue.join(input[:i]) for i in range(1, stop + 1)] 378 379 380def chopList( 381 input: list[str], 382 clamp: int = None, 383 mode: Literal["separate", "connected"] = "separate", 384 shuffle=False, 385) -> list[str]: 386 """ 387 Chop a list of sentences into smaller parts, optionally connecting or shuffling them. 388 389 Args: 390 input: List of sentences. 391 clamp: Limit to n words per iteration. 392 mode: "separate" to chop individually, "connected" to connect chopped sentences. 393 shuffle: If True, shuffle input before chopping. 394 395 Example: 396 `["Hello there.", "Hi you."]` => 397 - (separate) `["Hello", "Hello there.", "Hi", "Hi you."]` 398 - (connected) `["Hello", "Hello there.", "Hello there. Hi", ...]` 399 """ 400 if shuffle: 401 random.shuffle(input) 402 403 if mode == "connected": 404 input = [" ".join(item) for item in rotateList(input)] 405 406 return helpers.flatten([chopSequence(item, clamp) for item in input]) 407 408 409def permutate(input: list, clamp=20, shuffle=True) -> list: 410 """ 411 Permutate and chop a list of sentences into connected sequences. 412 413 Args: 414 input: List of sentences. 415 clamp: Limit to `n` words per sequence. 416 shuffle: If True, shuffle input before permutation. 417 418 Example: 419 - `["Hi Tim", "Foo bar"]` => list of 420 - `["Hi", "Hi Tim", "Hi Tim Foo", ...], ["Foo", "Foo bar", "Foo bar Hi", ...]` 421 """ 422 return chopList(input, clamp, "connected", shuffle) 423 424 425def fillTextOver(container: tuple, content: list, shuffle: bool = True) -> str: 426 """ 427 Returns a string that fills the container up to overflow. 428 429 - Font properties need to be already set 430 431 Args: 432 container: Tuple specifying container dimensions. 433 content: List of possible sentences/items. 434 shuffle: If True, shuffle content before filling. 435 """ 436 containerW, containerH = layout.toDimensions(container) 437 438 if shuffle: 439 content = helpers.shuffleAtRandomSegment(content) 440 441 strings = [] 442 443 for string in content: 444 strings.append(string) 445 stream = " ".join(strings) 446 _, textH = drawBot.textSize(stream, width=containerW) 447 if textH >= containerH: 448 break 449 450 return stream 451 452 453def getStringForWidth(pool: list, width: int, threshold: float = 0.995) -> str: 454 """ 455 Get a string from the pool that fits within the specified width. 456 457 - Font properties need to be set already 458 459 Args: 460 pool: List of candidate strings. 461 width: Target width. 462 threshold: Minimum width threshold. 463 """ 464 465 def _isWidthAppropriate(candidateWidth: int): 466 return minWidth <= candidateWidth <= maxWidth 467 468 minWidth, maxWidth = width * threshold, width 469 470 candidateWidths = [] 471 match = None 472 473 for candidate in pool: 474 candidateWidth, _ = drawBot.textSize(candidate) 475 candidateWidths.append(candidateWidth) 476 477 if _isWidthAppropriate(candidateWidth): 478 match = candidate 479 break 480 481 if match: 482 return match 483 else: 484 closestWidth = helpers.findClosestValue( 485 candidateWidths, width, discardLarger=True 486 ) 487 i = ( 488 candidateWidths.index(closestWidth) 489 if closestWidth in candidateWidths 490 else 0 491 ) 492 return pool[i] 493 494 495def filterByShape(items: list[str], shape: WordShape | list[WordShape]) -> list[str]: 496 """ 497 Filter words by descender, ascender, or caps shape. 498 499 Args: 500 items: List of words. 501 shape: Shape(s) to filter by. 502 503 Returns: 504 List of words matching the shape criteria. 505 506 Example: 507 - `["hi", "hey"], "descender"` => `["hi"]` 508 """ 509 510 def _isNotShaped(pattern: str, item: str): 511 return not bool(re.search(pattern, item)) 512 513 def _checkShape(shape): 514 wordShape = wordShapes.get(shape) 515 return filter(lambda item: _isNotShaped(wordShape, item), items) 516 517 wordShapes = dict(caps="[A-Z0-9]", ascender="[bdfihklt]", descender="[Qgjqpy/,]") 518 shapeSubsets = [_checkShape(shape) for shape in helpers.coerceList(shape)] 519 return helpers.intersect(shapeSubsets, retainOrder=False) 520 521 522def filterByTokens( 523 items: list[str], tokens: list[CharacterToken] = ["word", "nonword"] 524) -> list[str]: 525 """ 526 Filter items by Unicode character token. 527 528 Args: 529 items: List of strings to filter. 530 tokens: List of token types to filter by. 531 532 Returns: 533 List of items matching the token criteria. 534 535 Example: 536 - `word`, `nonword` => `["A4", "R&B"]` 537 """ 538 539 possiblePatterns = dict( 540 word=r"\p{Letter}", nonword=r"\p{Symbol}|\p{Number}|\p{Punctuation}" 541 ) 542 patterns = [possiblePatterns.get(m) for m in tokens] 543 544 def _filterSingleToken(items: list[str], pattern: str): 545 return [item for item in items if bool(regex.search(pattern, item))] 546 547 individual = [_filterSingleToken(items, p) for p in patterns] 548 549 return helpers.intersect(individual) 550 551 552def isRagPretty( 553 content: Union[str, drawBot.drawBotDrawingTools.FormattedString], coords: tuple 554) -> tuple[bool, bool]: 555 """ 556 Evaluate if a paragraph is nicely typeset. 557 558 Args: 559 content: Text content or `FormattedString`. 560 coords: Tuple specifying text box coordinates. 561 562 Returns: 563 Tuple of booleans (isGreat, isOkay). 564 - `isGreat`: All quite long, some very long 565 - `isOkay`: All quite long 566 """ 567 568 def _calcLineWidths(): 569 """Returns widths for all lines except last and for last line.""" 570 textBounds = drawBot.textBoxCharacterBounds(content, coords) 571 linesByY = dict() 572 573 for segment in textBounds: 574 bounds, _, _ = segment 575 _, y, w, _ = bounds 576 577 if not linesByY.get(y): 578 linesByY[y] = 0 579 580 linesByY[y] += w 581 582 last = linesByY.pop(list(linesByY)[-1]) 583 return linesByY.values(), last 584 585 try: 586 _, _, width, _ = coords 587 bodyWidths, lastWidth = _calcLineWidths() 588 # All lines are quite long 589 areAllGood = all([w >= width * 0.9 for w in bodyWidths]) 590 # A portion of lines are very long 591 areSomeGreat = ( 592 len([True for w in bodyWidths if w >= width * 0.95]) >= len(bodyWidths) / 3 593 ) 594 # Last line is not longest and not an widow 595 isLastGood = max(bodyWidths) >= lastWidth >= width * 2 / 3 596 597 isOkay = areAllGood and isLastGood 598 # isGreat, isOkay 599 return (isOkay and areSomeGreat), isOkay 600 except Exception as e: 601 logger.warning("Failed isRagPretty: {}", e) 602 return False 603 604 605def filterForbidden(input: list[str]) -> list[str]: 606 """ 607 Filter out explicit content using a prohibited terms list (found in `forbidden-....txt`). 608 609 Args: 610 input: List of strings to filter. 611 612 Returns: 613 Filtered list with inappropriate content removed. 614 """ 615 with open( 616 "/Users/christianjansky/Library/CloudStorage/Dropbox/KOMETA-Draw/01 Content/forbidden-any.txt", 617 encoding="utf-8", 618 ) as f: 619 prohibited_terms = f.read().splitlines() 620 clean = [] 621 622 for item in input: 623 is_clean = True 624 item_lower = item.lower() 625 626 for term in prohibited_terms: 627 if term.lower().strip() and term.lower() in item_lower: 628 is_clean = False 629 break 630 631 if is_clean: 632 clean.append(item) 633 634 return clean
Categories to filter words by their shape.
See classes.c32_pool.KPool.getItemByWidth for usage.
Defines the supported text casing styles for conversion.
UPPER: Converts all letters to uppercase:hi ibm→HI IBMlower: Converts all letters to lowercase:HI IBM→hi ibmTitle: Capitalizes the first letter of each word, lowercases the rest:hi ibm→Hi IbmCaps: Like Title, but preserves acronyms in uppercase:hi ibm USA→Hi Ibm USA
Type alias for character token types.
See filterByTokens for usage.
List of common words to be lowercased in title case (unless at start/end).
58def toPascalCase(input: str, space: bool = True) -> str: 59 """Convert string to PascalCase, optionally inserting spaces between words and numbers. 60 61 Args: 62 input: The input string. 63 space: If True, insert spaces between (default: True). 64 65 Example: 66 `One123Four` => `One 123 Four` 67 """ 68 # ? Not using caseconverter.pascalcase() because it alters allcaps words 69 separator = " " if space else "" 70 # A Aa, a A, A 0 71 expressions = ["([A-Z0-9])([A-Z][a-z])", "([a-z])([A-Z])", "([A-Za-z])([0-9])"] 72 for exp in expressions: 73 input = re.sub(rf"{exp}", rf"\1{separator}\2", input) 74 return input[:1].upper() + input[1:]
Convert string to PascalCase, optionally inserting spaces between words and numbers.
Arguments:
- input: The input string.
- space: If True, insert spaces between (default: True).
Example:
One123Four=>One 123 Four
77def toCamelCase(input: str): 78 """Convert string to camelCase. 79 80 Example: 81 `Hello World` => `helloWorld` 82 """ 83 return camelcase(input)
Convert string to camelCase.
Example:
Hello World=>helloWorld
86def toKebabCase(input: str) -> str: 87 """Convert string to kebab-case. 88 89 Example: 90 `Hello World` => `hello-world` 91 """ 92 return kebabcase(input)
Convert string to kebab-case.
Example:
Hello World=>hello-world
95def toSnakeCase(input: str) -> str: 96 """Convert string to snake_case. 97 98 Example: 99 `Hello World` => `hello_world` 100 """ 101 return snakecase(input)
Convert string to snake_case.
Example:
Hello World=>hello_world
104def toTitleCase(input: str, retainUpper: bool = True) -> str: 105 """ 106 Convert string to title case, handling special cases and acronyms. 107 108 Args: 109 input: The input string. 110 retainUpper: If True, retain uppercase acronyms: `True` USA, `False` Usa 111 112 Returns: 113 Title-cased string. 114 115 Example: 116 `sON Of The USA` => `Son of the USA` 117 """ 118 specialChars: list[str] = ["-", "/"] 119 120 def _hasSpecialChars(word: str) -> bool: 121 """Returns True if word contains special characters.""" 122 return any(char in word for char in specialChars) 123 124 def _handleSpecialChars(word: str) -> str: 125 """Apply title case to each part of a word split by special characters.""" 126 for char in specialChars: 127 if char in word: 128 # ? Split by special char and apply title case to each part 129 parts = word.split(char) 130 return char.join([capwords(part) for part in parts]) 131 132 def _processWord(word: str) -> str: 133 """Process a single word for title casing.""" 134 # ? Always lowercase common words in continuous text 135 isOnEitherSide = helpers.isFirst(words, word) or helpers.isLast(words, word) 136 isCommon = word.casefold() in commonWords 137 if isCommon and not isOnEitherSide: 138 return word.lower() 139 140 # ? Handle special characters 141 if _hasSpecialChars(word): 142 return _handleSpecialChars(word) 143 144 # Uppercase and punctuation 2+ times 145 isCaps = regex.match(r"[\p{Lu}|\p{P}]{2,}", word) 146 147 # capwords() better .title() => retains lowercase ’s 148 return word if isCaps and retainUpper else capwords(word) 149 150 words = input.split(" ") 151 words = [_processWord(word) for word in words] 152 return " ".join(words)
Convert string to title case, handling special cases and acronyms.
Arguments:
- input: The input string.
- retainUpper: If True, retain uppercase acronyms:
TrueUSA,FalseUsa
Returns:
Title-cased string.
Example:
sON Of The USA=>Son of the USA
155def changeCase( 156 input: list[str] | str, 157 case: TextCase = "Title", 158) -> list[str] | str: 159 """ 160 Change the case of a string or list of strings. 161 162 Args: 163 input: String or list of strings to change case. 164 case: Desired case ("upper", "lower", "Title", "Caps"). 165 166 Returns: 167 String or list of strings with changed case. 168 169 Example: 170 `the USA` 171 - `upper` => `THE USA` 172 - `lower` => `the usa` 173 - `Title` => `The Usa` 174 - `Caps` => `The USA` 175 """ 176 177 def _change(item): 178 if case.casefold() == "upper": 179 return item.upper() 180 elif case.casefold() == "lower": 181 return item.lower() 182 elif case.casefold() == "title": 183 return toTitleCase(item, False) 184 elif case.casefold() == "caps": 185 return toTitleCase(item) 186 else: 187 logger.warning("Unable to change case: {}", case) 188 189 if not case: 190 return input # Pass through unchanged 191 192 if isinstance(input, list): 193 return [_change(item) for item in input] 194 else: 195 return _change(input)
Change the case of a string or list of strings.
Arguments:
- input: String or list of strings to change case.
- case: Desired case ("upper", "lower", "Title", "Caps").
Returns:
String or list of strings with changed case.
Example:
the USA
upper=>THE USAlower=>the usaTitle=>The UsaCaps=>The USA
198def isTitleCase(input: str) -> bool: 199 """Returns True if all words in the string are title case.""" 200 return all([regex.match(r"^[\p{Lu}][\p{Ll}]+$", part) for part in input.split(" ")])
Returns True if all words in the string are title case.
203def prettifyText(text: str) -> str: 204 """ 205 Removes Wikipedia formatting tokens, extra whitespace and dumb quotes. 206 207 Args: 208 text: The input text. 209 210 Returns: 211 Prettified text. 212 """ 213 removals = [ 214 # Hair space 215 r" ", 216 # Remove citation references 217 r"\[[A-Za-z\d]+\](?::?\d+(?:[-–]\d+)?)?", 218 # Remove [citation needed] 219 r"\[citation needed\]", 220 r"\[clarification needed\]", 221 r"\[note [\d+]]", 222 r"\[NB [\d+]]", 223 ] 224 for removal in removals: 225 text = re.sub(rf"{removal}", "", text) 226 227 replacements = [ 228 # Multiple spaces to single space 229 (r"[ ]{2,}", " "), 230 # Replace dumb single quotes 231 (r"\'([A-Za-z]+)\'", r"‘\1’"), 232 # Replace dumb single quotes used as contractions: it's => it’s 233 (r"([A-Za-z])\'([A-Za-z])?", r"\1’\2"), 234 # Replace dumb double quotes 235 (r"(\s?|^)\"([^\"]+)\"", r"\1“\2”"), 236 # Add missing space before ( { [ in text 237 (r"(\w)(\(|\[|\{)", r"\1 \2"), 238 # Add missing space after ) } ] in text 239 (r"(\)|\]|\})(\w)", r"\1 \2"), 240 ] 241 242 for before, after in replacements: 243 text = re.sub(before, after, text) 244 245 return text.strip()
Removes Wikipedia formatting tokens, extra whitespace and dumb quotes.
Arguments:
- text: The input text.
Returns:
Prettified text.
248def omitMissing( 249 input: str | list[str], 250 font: str = None, 251 mode: Literal["glyphs", "words", "lines"] = "words", 252 debug=False, 253): 254 """ 255 Omit missing characters from text or list of text blocks. 256 257 Args: 258 input: A single string or a list of strings to check for missing glyphs. 259 font: Font to use for checking glyphs (optional). 260 mode: Determines the omission granularity: 261 - `glyphs`: Omit only the missing characters, preserving the rest of the text. 262 - `words`: Omit entire words that contain missing glyphs. 263 - `lines`: Omit entire lines that contain missing glyphs. 264 debug: If True, log omitted units. 265 266 Returns: 267 Filtered text or list of text blocks with missing data omitted, depending on mode. 268 """ 269 if font: 270 drawBot.font(font) 271 272 isInputString = isinstance(input, str) 273 match (mode): 274 case "glyphs": 275 glue = "" 276 case "words": 277 glue = " " 278 case "lines": 279 glue = "\n" 280 blocks = [input] if isInputString else input 281 282 output = [] 283 for block in blocks: 284 units = list(block) if mode == "glyphs" else block.split(glue) 285 filtered = [unit for unit in units if drawBot.fontContainsCharacters(unit)] 286 287 if debug: 288 # Log omitted fontName if available 289 logMessage = lambda unit: ( 290 ("[Omitted] {} for {}", unit, fonts.getFontName(font)) 291 if font 292 else ("[Omitted] {}", unit) 293 ) 294 [ 295 logger.trace(*logMessage(unit)) 296 for unit in units 297 if not drawBot.fontContainsCharacters(unit) 298 ] 299 300 # Do not add empty list 301 if filtered: 302 output.append(glue.join(filtered)) 303 304 return glue.join(output) if isInputString else output
Omit missing characters from text or list of text blocks.
Arguments:
- input: A single string or a list of strings to check for missing glyphs.
- font: Font to use for checking glyphs (optional).
- mode: Determines the omission granularity:
glyphs: Omit only the missing characters, preserving the rest of the text.words: Omit entire words that contain missing glyphs.lines: Omit entire lines that contain missing glyphs.
- debug: If True, log omitted units.
Returns:
Filtered text or list of text blocks with missing data omitted, depending on mode.
307def splitStringToSentences(input: str) -> list[str]: 308 """ 309 Split running text into a list of sentences. 310 311 Args: 312 input: The input text. 313 314 Example: 315 `I am a sentence. I am another one.` => `["I am a sentence.", "I am another one."]` 316 """ 317 replacements = [ 318 # Newlines with spaces 319 (r"\n", " "), 320 # Multiple spaces to single space 321 (r"\s{2,}", " "), 322 ] 323 for [before, after] in replacements: 324 input = re.sub(rf"{before}", after, input) 325 326 # Skip abbreviations: (F. Elastica), Ficus var. elastica 327 sentenceExp = r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<![A-Z]\.)(?<=\.|\?)\s(?![a-z])" 328 return re.split(sentenceExp, input)
Split running text into a list of sentences.
Arguments:
- input: The input text.
Example:
I am a sentence. I am another one.=>["I am a sentence.", "I am another one."]
331def rotateList(input: list) -> list: 332 """ 333 Rotate a list to produce all cyclic permutations. 334 335 Args: 336 input: The input list. 337 338 Example: 339 `[A, B, C]` => `[[A, B, C], [B, C, A], [C, A, B]]` 340 """ 341 output = [] 342 343 for item in input: 344 cycled = cycle(input) 345 skipped = dropwhile(lambda x: x != item, cycled) 346 sliced = islice(skipped, None, len(input)) 347 348 output.append(list(sliced)) 349 350 return output
Rotate a list to produce all cyclic permutations.
Arguments:
- input: The input list.
Example:
[A, B, C]=>[[A, B, C], [B, C, A], [C, A, B]]
353def chopSequence(input: str | list[str], limit: int = None, glue=" ", split=" "): 354 """ 355 Split input into meaningful parts, optionally limiting the number of words: `A B C` => `A, AB, ABC`. 356 357 Args: 358 input: String or list of strings to chop. 359 limit: Limit to `n` words. 360 glue: String to join parts. 361 split: String to split input. 362 363 Example: 364 - input: single sentence 365 - `"I was late."` => `["I", "I was", "I was late."]` 366 - input: list of sentences 367 - `["For me.", "Right?"]` => `["For me.", "For me. Right?"]` 368 - limit: 2 369 - `["I", "I was"]` 370 """ 371 if split and isinstance(input, str): 372 input = input.split(split) 373 374 inputLen = len(input) 375 # Limit size if provided 376 stop = min(limit, inputLen) if limit else inputLen 377 378 return [glue.join(input[:i]) for i in range(1, stop + 1)]
Split input into meaningful parts, optionally limiting the number of words: A B C => A, AB, ABC.
Arguments:
- input: String or list of strings to chop.
- limit: Limit to
nwords. - glue: String to join parts.
- split: String to split input.
Example:
- input: single sentence
"I was late."=>["I", "I was", "I was late."]
- input: list of sentences
["For me.", "Right?"]=>["For me.", "For me. Right?"]
- limit: 2
["I", "I was"]
381def chopList( 382 input: list[str], 383 clamp: int = None, 384 mode: Literal["separate", "connected"] = "separate", 385 shuffle=False, 386) -> list[str]: 387 """ 388 Chop a list of sentences into smaller parts, optionally connecting or shuffling them. 389 390 Args: 391 input: List of sentences. 392 clamp: Limit to n words per iteration. 393 mode: "separate" to chop individually, "connected" to connect chopped sentences. 394 shuffle: If True, shuffle input before chopping. 395 396 Example: 397 `["Hello there.", "Hi you."]` => 398 - (separate) `["Hello", "Hello there.", "Hi", "Hi you."]` 399 - (connected) `["Hello", "Hello there.", "Hello there. Hi", ...]` 400 """ 401 if shuffle: 402 random.shuffle(input) 403 404 if mode == "connected": 405 input = [" ".join(item) for item in rotateList(input)] 406 407 return helpers.flatten([chopSequence(item, clamp) for item in input])
Chop a list of sentences into smaller parts, optionally connecting or shuffling them.
Arguments:
- input: List of sentences.
- clamp: Limit to n words per iteration.
- mode: "separate" to chop individually, "connected" to connect chopped sentences.
- shuffle: If True, shuffle input before chopping.
Example:
["Hello there.", "Hi you."]=>
- (separate)
["Hello", "Hello there.", "Hi", "Hi you."]- (connected)
["Hello", "Hello there.", "Hello there. Hi", ...]
410def permutate(input: list, clamp=20, shuffle=True) -> list: 411 """ 412 Permutate and chop a list of sentences into connected sequences. 413 414 Args: 415 input: List of sentences. 416 clamp: Limit to `n` words per sequence. 417 shuffle: If True, shuffle input before permutation. 418 419 Example: 420 - `["Hi Tim", "Foo bar"]` => list of 421 - `["Hi", "Hi Tim", "Hi Tim Foo", ...], ["Foo", "Foo bar", "Foo bar Hi", ...]` 422 """ 423 return chopList(input, clamp, "connected", shuffle)
Permutate and chop a list of sentences into connected sequences.
Arguments:
- input: List of sentences.
- clamp: Limit to
nwords per sequence. - shuffle: If True, shuffle input before permutation.
Example:
["Hi Tim", "Foo bar"]=> list of["Hi", "Hi Tim", "Hi Tim Foo", ...], ["Foo", "Foo bar", "Foo bar Hi", ...]
426def fillTextOver(container: tuple, content: list, shuffle: bool = True) -> str: 427 """ 428 Returns a string that fills the container up to overflow. 429 430 - Font properties need to be already set 431 432 Args: 433 container: Tuple specifying container dimensions. 434 content: List of possible sentences/items. 435 shuffle: If True, shuffle content before filling. 436 """ 437 containerW, containerH = layout.toDimensions(container) 438 439 if shuffle: 440 content = helpers.shuffleAtRandomSegment(content) 441 442 strings = [] 443 444 for string in content: 445 strings.append(string) 446 stream = " ".join(strings) 447 _, textH = drawBot.textSize(stream, width=containerW) 448 if textH >= containerH: 449 break 450 451 return stream
Returns a string that fills the container up to overflow.
- Font properties need to be already set
Arguments:
- container: Tuple specifying container dimensions.
- content: List of possible sentences/items.
- shuffle: If True, shuffle content before filling.
454def getStringForWidth(pool: list, width: int, threshold: float = 0.995) -> str: 455 """ 456 Get a string from the pool that fits within the specified width. 457 458 - Font properties need to be set already 459 460 Args: 461 pool: List of candidate strings. 462 width: Target width. 463 threshold: Minimum width threshold. 464 """ 465 466 def _isWidthAppropriate(candidateWidth: int): 467 return minWidth <= candidateWidth <= maxWidth 468 469 minWidth, maxWidth = width * threshold, width 470 471 candidateWidths = [] 472 match = None 473 474 for candidate in pool: 475 candidateWidth, _ = drawBot.textSize(candidate) 476 candidateWidths.append(candidateWidth) 477 478 if _isWidthAppropriate(candidateWidth): 479 match = candidate 480 break 481 482 if match: 483 return match 484 else: 485 closestWidth = helpers.findClosestValue( 486 candidateWidths, width, discardLarger=True 487 ) 488 i = ( 489 candidateWidths.index(closestWidth) 490 if closestWidth in candidateWidths 491 else 0 492 ) 493 return pool[i]
Get a string from the pool that fits within the specified width.
- Font properties need to be set already
Arguments:
- pool: List of candidate strings.
- width: Target width.
- threshold: Minimum width threshold.
496def filterByShape(items: list[str], shape: WordShape | list[WordShape]) -> list[str]: 497 """ 498 Filter words by descender, ascender, or caps shape. 499 500 Args: 501 items: List of words. 502 shape: Shape(s) to filter by. 503 504 Returns: 505 List of words matching the shape criteria. 506 507 Example: 508 - `["hi", "hey"], "descender"` => `["hi"]` 509 """ 510 511 def _isNotShaped(pattern: str, item: str): 512 return not bool(re.search(pattern, item)) 513 514 def _checkShape(shape): 515 wordShape = wordShapes.get(shape) 516 return filter(lambda item: _isNotShaped(wordShape, item), items) 517 518 wordShapes = dict(caps="[A-Z0-9]", ascender="[bdfihklt]", descender="[Qgjqpy/,]") 519 shapeSubsets = [_checkShape(shape) for shape in helpers.coerceList(shape)] 520 return helpers.intersect(shapeSubsets, retainOrder=False)
Filter words by descender, ascender, or caps shape.
Arguments:
- items: List of words.
- shape: Shape(s) to filter by.
Returns:
List of words matching the shape criteria.
Example:
["hi", "hey"], "descender"=>["hi"]
523def filterByTokens( 524 items: list[str], tokens: list[CharacterToken] = ["word", "nonword"] 525) -> list[str]: 526 """ 527 Filter items by Unicode character token. 528 529 Args: 530 items: List of strings to filter. 531 tokens: List of token types to filter by. 532 533 Returns: 534 List of items matching the token criteria. 535 536 Example: 537 - `word`, `nonword` => `["A4", "R&B"]` 538 """ 539 540 possiblePatterns = dict( 541 word=r"\p{Letter}", nonword=r"\p{Symbol}|\p{Number}|\p{Punctuation}" 542 ) 543 patterns = [possiblePatterns.get(m) for m in tokens] 544 545 def _filterSingleToken(items: list[str], pattern: str): 546 return [item for item in items if bool(regex.search(pattern, item))] 547 548 individual = [_filterSingleToken(items, p) for p in patterns] 549 550 return helpers.intersect(individual)
Filter items by Unicode character token.
Arguments:
- items: List of strings to filter.
- tokens: List of token types to filter by.
Returns:
List of items matching the token criteria.
Example:
word,nonword=>["A4", "R&B"]
553def isRagPretty( 554 content: Union[str, drawBot.drawBotDrawingTools.FormattedString], coords: tuple 555) -> tuple[bool, bool]: 556 """ 557 Evaluate if a paragraph is nicely typeset. 558 559 Args: 560 content: Text content or `FormattedString`. 561 coords: Tuple specifying text box coordinates. 562 563 Returns: 564 Tuple of booleans (isGreat, isOkay). 565 - `isGreat`: All quite long, some very long 566 - `isOkay`: All quite long 567 """ 568 569 def _calcLineWidths(): 570 """Returns widths for all lines except last and for last line.""" 571 textBounds = drawBot.textBoxCharacterBounds(content, coords) 572 linesByY = dict() 573 574 for segment in textBounds: 575 bounds, _, _ = segment 576 _, y, w, _ = bounds 577 578 if not linesByY.get(y): 579 linesByY[y] = 0 580 581 linesByY[y] += w 582 583 last = linesByY.pop(list(linesByY)[-1]) 584 return linesByY.values(), last 585 586 try: 587 _, _, width, _ = coords 588 bodyWidths, lastWidth = _calcLineWidths() 589 # All lines are quite long 590 areAllGood = all([w >= width * 0.9 for w in bodyWidths]) 591 # A portion of lines are very long 592 areSomeGreat = ( 593 len([True for w in bodyWidths if w >= width * 0.95]) >= len(bodyWidths) / 3 594 ) 595 # Last line is not longest and not an widow 596 isLastGood = max(bodyWidths) >= lastWidth >= width * 2 / 3 597 598 isOkay = areAllGood and isLastGood 599 # isGreat, isOkay 600 return (isOkay and areSomeGreat), isOkay 601 except Exception as e: 602 logger.warning("Failed isRagPretty: {}", e) 603 return False
Evaluate if a paragraph is nicely typeset.
Arguments:
- content: Text content or
FormattedString. - coords: Tuple specifying text box coordinates.
Returns:
Tuple of booleans (isGreat, isOkay).
isGreat: All quite long, some very longisOkay: All quite long
606def filterForbidden(input: list[str]) -> list[str]: 607 """ 608 Filter out explicit content using a prohibited terms list (found in `forbidden-....txt`). 609 610 Args: 611 input: List of strings to filter. 612 613 Returns: 614 Filtered list with inappropriate content removed. 615 """ 616 with open( 617 "/Users/christianjansky/Library/CloudStorage/Dropbox/KOMETA-Draw/01 Content/forbidden-any.txt", 618 encoding="utf-8", 619 ) as f: 620 prohibited_terms = f.read().splitlines() 621 clean = [] 622 623 for item in input: 624 is_clean = True 625 item_lower = item.lower() 626 627 for term in prohibited_terms: 628 if term.lower().strip() and term.lower() in item_lower: 629 is_clean = False 630 break 631 632 if is_clean: 633 clean.append(item) 634 635 return clean
Filter out explicit content using a prohibited terms list (found in forbidden-....txt).
Arguments:
- input: List of strings to filter.
Returns:
Filtered list with inappropriate content removed.