lib.content
1import re 2import drawBot 3import random 4from typing import Literal, TypeAlias, Union 5from itertools import cycle, islice, dropwhile 6from string import capwords 7import regex 8from caseconverter import camelcase, kebabcase, snakecase 9from loguru import logger 10from icecream import ic 11 12from lib import helpers, layout 13 14WordShape: TypeAlias = Literal["descender", "ascender", "caps"] 15"""Categories to filter words by their shape. 16 17See `classes.c32_pool.KPool.getItemByWidth` for usage. 18""" 19 20TextCase: TypeAlias = Literal["UPPER", "lower", "Title", "Caps"] 21"""Defines the supported text casing styles for conversion. 22 23- `UPPER`: Converts all letters to uppercase: `hi ibm` → `HI IBM` 24- `lower`: Converts all letters to lowercase: `HI IBM` → `hi ibm` 25- `Title`: Capitalizes the first letter of each word, lowercases the rest: `hi ibm` → `Hi Ibm` 26- `Caps`: Like Title, but preserves acronyms in uppercase: `hi ibm USA` → `Hi Ibm USA` 27""" 28 29CharacterToken: TypeAlias = Literal["word", "nonword"] 30"""Type alias for character token types. 31 32See `filterByTokens` for usage. 33""" 34 35 36# ? Common words that should be lowercased in title case (unless at the start/end) 37commonWords: list[str] = [ 38 "a", 39 "an", 40 "the", 41 "as", 42 "is", 43 "are", 44 "and", 45 "but", 46 "at", 47 "in", 48 "on", 49 "of", 50 "for", 51 "by", 52 "to", 53] 54"""List of common words to be lowercased in title case (unless at start/end).""" 55 56 57def parseCamelCase(string): 58 """Split camel case and numbers into separate words. 59 60 Example: 61 `One123Four` => `One 123 Four` 62 """ 63 # A Aa, a A, A 0 64 expressions = ["([A-Z0-9])([A-Z][a-z])", "([a-z])([A-Z])", "([A-Za-z])([0-9])"] 65 for exp in expressions: 66 string = re.sub(rf"{exp}", r"\1 \2", string) 67 return string 68 69 70def toCamelCase(input: str): 71 """Convert string to camelCase. 72 73 Example: 74 `Hello World` => `helloWorld` 75 """ 76 return camelcase(input) 77 78 79def toKebabCase(input: str): 80 """Convert string to kebab-case. 81 82 Example: 83 `Hello World` => `hello-world` 84 """ 85 return kebabcase(input) 86 87 88def toSnakeCase(input: str): 89 """Convert string to snake_case. 90 91 Example: 92 `Hello World` => `hello_world` 93 """ 94 return snakecase(input) 95 96 97def toTitleCase(input: str, retainUpper: bool = True) -> str: 98 """ 99 Convert string to title case, handling special cases and acronyms. 100 101 Args: 102 input: The input string. 103 retainUpper: If True, retain uppercase acronyms: `True` USA, `False` Usa 104 105 Returns: 106 Title-cased string. 107 108 Example: 109 `sON Of The USA` => `Son of the USA` 110 """ 111 specialChars: list[str] = ["-", "/"] 112 113 def _hasSpecialChars(word: str) -> bool: 114 """Returns True if word contains special characters.""" 115 return any(char in word for char in specialChars) 116 117 def _handleSpecialChars(word: str) -> str: 118 """Apply title case to each part of a word split by special characters.""" 119 for char in specialChars: 120 if char in word: 121 # ? Split by special char and apply title case to each part 122 parts = word.split(char) 123 return char.join([capwords(part) for part in parts]) 124 125 def _processWord(word: str) -> str: 126 """Process a single word for title casing.""" 127 # ? Always lowercase common words in continuous text 128 isOnEitherSide = helpers.isFirst(words, word) or helpers.isLast(words, word) 129 isCommon = word.casefold() in commonWords 130 if isCommon and not isOnEitherSide: 131 return word.lower() 132 133 # ? Handle special characters 134 if _hasSpecialChars(word): 135 return _handleSpecialChars(word) 136 137 # Uppercase and punctuation 2+ times 138 isCaps = regex.match(r"[\p{Lu}|\p{P}]{2,}", word) 139 140 # capwords() better .title() => retains lowercase ’s 141 return word if isCaps and retainUpper else capwords(word) 142 143 words = input.split(" ") 144 words = [_processWord(word) for word in words] 145 return " ".join(words) 146 147 148def changeCase( 149 input: list[str] | str, 150 case: TextCase = "Title", 151) -> list[str] | str: 152 """ 153 Change the case of a string or list of strings. 154 155 Args: 156 input: String or list of strings to change case. 157 case: Desired case ("upper", "lower", "Title", "Caps"). 158 159 Returns: 160 String or list of strings with changed case. 161 162 Example: 163 `the USA` 164 - `upper` => `THE USA` 165 - `lower` => `the usa` 166 - `Title` => `The Usa` 167 - `Caps` => `The USA` 168 """ 169 170 def _change(item): 171 if case.casefold() == "upper": 172 return item.upper() 173 elif case.casefold() == "lower": 174 return item.lower() 175 elif case.casefold() == "title": 176 return toTitleCase(item, False) 177 elif case.casefold() == "caps": 178 return toTitleCase(item) 179 else: 180 logger.warning("Unable to change case: {}", case) 181 182 if not case: 183 return input # Pass through unchanged 184 185 if isinstance(input, list): 186 return [_change(item) for item in input] 187 else: 188 return _change(input) 189 190 191def isTitleCase(input: str) -> bool: 192 """Returns True if all words in the string are title case.""" 193 return all([regex.match(r"^[\p{Lu}][\p{Ll}]+$", part) for part in input.split(" ")]) 194 195 196def prettifyText(text: str) -> str: 197 """ 198 Removes Wikipedia formatting tokens, extra whitespace and dumb quotes. 199 200 Args: 201 text: The input text. 202 203 Returns: 204 Prettified text. 205 """ 206 removals = [ 207 # Hair space 208 r" ", 209 # Remove citation references 210 r"\[[A-Za-z\d]+\](?::?\d+(?:[-–]\d+)?)?", 211 # Remove [citation needed] 212 r"\[citation needed\]", 213 r"\[clarification needed\]", 214 r"\[note [\d+]]", 215 r"\[NB [\d+]]", 216 ] 217 for removal in removals: 218 text = re.sub(rf"{removal}", "", text) 219 220 replacements = [ 221 # Multiple spaces to single space 222 (r"[ ]{2,}", " "), 223 # Replace dumb single quotes 224 (r"\'([A-Za-z]+)\'", r"‘\1’"), 225 # Replace dumb single quotes used as contractions: it's => it’s 226 (r"([A-Za-z])\'([A-Za-z])?", r"\1’\2"), 227 # Replace dumb double quotes 228 (r"(\s?|^)\"([^\"]+)\"", r"\1“\2”"), 229 # Add missing space before ( { [ in text 230 (r"(\w)(\(|\[|\{)", r"\1 \2"), 231 # Add missing space after ) } ] in text 232 (r"(\)|\]|\})(\w)", r"\1 \2"), 233 ] 234 235 for before, after in replacements: 236 text = re.sub(before, after, text) 237 238 return text.strip() 239 240 241def omitMissing( 242 input: str | list[str], 243 font: str = None, 244 mode: Literal["words", "glyphs"] = "words", 245 debug=False, 246): 247 """ 248 Omit missing characters from text or list of text blocks. 249 250 Args: 251 input: A single string or a list of strings to check for missing glyphs. 252 font: Font to use for checking glyphs (optional). 253 mode: Determines the omission granularity: 254 - `words`: Omit entire words that contain missing glyphs. 255 - `glyphs`: Omit only the missing characters, preserving the rest of the text. 256 debug: If True, log omitted units. 257 258 Returns: 259 Filtered text or list of text blocks with missing characters or words omitted, depending on mode. 260 """ 261 if font: 262 drawBot.font(font) 263 264 isInputString = isinstance(input, str) 265 isModeWords = mode == "words" 266 glue = " " if isModeWords else "" 267 blocks = [input] if isInputString else input 268 269 output = [] 270 for block in blocks: 271 units = block.split(glue) if isModeWords else list(block) 272 filtered = [unit for unit in units if drawBot.fontContainsCharacters(unit)] 273 274 if debug: 275 [ 276 logger.trace("[Omitted] {}", unit) 277 for unit in units 278 if not drawBot.fontContainsCharacters(unit) 279 ] 280 281 # Do not add empty list 282 if filtered: 283 output.append(glue.join(filtered)) 284 285 return glue.join(output) if isInputString else output 286 287 288def splitStringToSentences(input: str) -> list[str]: 289 """ 290 Split running text into a list of sentences. 291 292 Args: 293 input: The input text. 294 295 Example: 296 `I am a sentence. I am another one.` => `["I am a sentence.", "I am another one."]` 297 """ 298 replacements = [ 299 # Newlines with spaces 300 (r"\n", " "), 301 # Multiple spaces to single space 302 (r"\s{2,}", " "), 303 ] 304 for [before, after] in replacements: 305 input = re.sub(rf"{before}", after, input) 306 307 # Skip abbreviations: (F. Elastica), Ficus var. elastica 308 sentenceExp = r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<![A-Z]\.)(?<=\.|\?)\s(?![a-z])" 309 return re.split(sentenceExp, input) 310 311 312def rotateList(input: list) -> list: 313 """ 314 Rotate a list to produce all cyclic permutations. 315 316 Args: 317 input: The input list. 318 319 Example: 320 `[A, B, C]` => `[[A, B, C], [B, C, A], [C, A, B]]` 321 """ 322 output = [] 323 324 for item in input: 325 cycled = cycle(input) 326 skipped = dropwhile(lambda x: x != item, cycled) 327 sliced = islice(skipped, None, len(input)) 328 329 output.append(list(sliced)) 330 331 return output 332 333 334def chopSequence(input: str | list[str], limit: int = None, glue=" ", split=" "): 335 """ 336 Split input into meaningful parts, optionally limiting the number of words: `A B C` => `A, AB, ABC`. 337 338 Args: 339 input: String or list of strings to chop. 340 limit: Limit to `n` words. 341 glue: String to join parts. 342 split: String to split input. 343 344 Example: 345 - input: single sentence 346 - `"I was late."` => `["I", "I was", "I was late."]` 347 - input: list of sentences 348 - `["For me.", "Right?"]` => `["For me.", "For me. Right?"]` 349 - limit: 2 350 - `["I", "I was"]` 351 """ 352 if split and isinstance(input, str): 353 input = input.split(split) 354 355 inputLen = len(input) 356 # Limit size if provided 357 stop = min(limit, inputLen) if limit else inputLen 358 359 return [glue.join(input[:i]) for i in range(1, stop + 1)] 360 361 362def chopList( 363 input: list[str], 364 clamp: int = None, 365 mode: Literal["separate", "connected"] = "separate", 366 shuffle=False, 367) -> list[str]: 368 """ 369 Chop a list of sentences into smaller parts, optionally connecting or shuffling them. 370 371 Args: 372 input: List of sentences. 373 clamp: Limit to n words per iteration. 374 mode: "separate" to chop individually, "connected" to connect chopped sentences. 375 shuffle: If True, shuffle input before chopping. 376 377 Example: 378 `["Hello there.", "Hi you."]` => 379 - (separate) `["Hello", "Hello there.", "Hi", "Hi you."]` 380 - (connected) `["Hello", "Hello there.", "Hello there. Hi", ...]` 381 """ 382 if shuffle: 383 random.shuffle(input) 384 385 if mode == "connected": 386 input = [" ".join(item) for item in rotateList(input)] 387 388 return helpers.flatten([chopSequence(item, clamp) for item in input]) 389 390 391def permutate(input: list, clamp=20, shuffle=True) -> list: 392 """ 393 Permutate and chop a list of sentences into connected sequences. 394 395 Args: 396 input: List of sentences. 397 clamp: Limit to `n` words per sequence. 398 shuffle: If True, shuffle input before permutation. 399 400 Example: 401 - `["Hi Tim", "Foo bar"]` => list of 402 - `["Hi", "Hi Tim", "Hi Tim Foo", ...], ["Foo", "Foo bar", "Foo bar Hi", ...]` 403 """ 404 return chopList(input, clamp, "connected", shuffle) 405 406 407def fillTextOver(container: tuple, content: list, shuffle: bool = True) -> str: 408 """ 409 Returns a string that fills the container up to overflow. 410 411 - Font properties need to be already set 412 413 Args: 414 container: Tuple specifying container dimensions. 415 content: List of possible sentences/items. 416 shuffle: If True, shuffle content before filling. 417 """ 418 containerW, containerH = layout.toDimensions(container) 419 420 if shuffle: 421 content = helpers.shuffleAtRandomSegment(content) 422 423 strings = [] 424 425 for string in content: 426 strings.append(string) 427 stream = " ".join(strings) 428 _, textH = drawBot.textSize(stream, width=containerW) 429 if textH >= containerH: 430 break 431 432 return stream 433 434 435def getStringForWidth(pool: list, width: int, threshold: float = 0.995) -> str: 436 """ 437 Get a string from the pool that fits within the specified width. 438 439 - Font properties need to be set already 440 441 Args: 442 pool: List of candidate strings. 443 width: Target width. 444 threshold: Minimum width threshold. 445 """ 446 447 def _isWidthAppropriate(candidateWidth: int): 448 return minWidth <= candidateWidth <= maxWidth 449 450 minWidth, maxWidth = width * threshold, width 451 452 candidateWidths = [] 453 match = None 454 455 for candidate in pool: 456 candidateWidth, _ = drawBot.textSize(candidate) 457 candidateWidths.append(candidateWidth) 458 459 if _isWidthAppropriate(candidateWidth): 460 match = candidate 461 break 462 463 if match: 464 return match 465 else: 466 closestWidth = helpers.findClosestValue( 467 candidateWidths, width, discardLarger=True 468 ) 469 i = ( 470 candidateWidths.index(closestWidth) 471 if closestWidth in candidateWidths 472 else 0 473 ) 474 return pool[i] 475 476 477def filterByShape(items: list[str], shape: WordShape | list[WordShape]) -> list[str]: 478 """ 479 Filter words by descender, ascender, or caps shape. 480 481 Args: 482 items: List of words. 483 shape: Shape(s) to filter by. 484 485 Returns: 486 List of words matching the shape criteria. 487 488 Example: 489 - `["hi", "hey"], "descender"` => `["hi"]` 490 """ 491 492 def _isNotShaped(pattern: str, item: str): 493 return not bool(re.search(pattern, item)) 494 495 def _checkShape(shape): 496 wordShape = wordShapes.get(shape) 497 return filter(lambda item: _isNotShaped(wordShape, item), items) 498 499 wordShapes = dict(caps="[A-Z0-9]", ascender="[bdfihklt]", descender="[Qgjqpy/,]") 500 shapeSubsets = [_checkShape(shape) for shape in helpers.coerceList(shape)] 501 return helpers.intersect(shapeSubsets, retainOrder=False) 502 503 504def filterByTokens( 505 items: list[str], tokens: list[CharacterToken] = ["word", "nonword"] 506) -> list[str]: 507 """ 508 Filter items by Unicode character token. 509 510 Args: 511 items: List of strings to filter. 512 tokens: List of token types to filter by. 513 514 Returns: 515 List of items matching the token criteria. 516 517 Example: 518 - `word`, `nonword` => `["A4", "R&B"]` 519 """ 520 521 possiblePatterns = dict( 522 word=r"\p{Letter}", nonword=r"\p{Symbol}|\p{Number}|\p{Punctuation}" 523 ) 524 patterns = [possiblePatterns.get(m) for m in tokens] 525 526 def _filterSingleToken(items: list[str], pattern: str): 527 return [item for item in items if bool(regex.search(pattern, item))] 528 529 individual = [_filterSingleToken(items, p) for p in patterns] 530 531 return helpers.intersect(individual) 532 533 534def isRagPretty( 535 content: Union[str, drawBot.drawBotDrawingTools.FormattedString], coords: tuple 536) -> tuple[bool, bool]: 537 """ 538 Evaluate if a paragraph is nicely typeset. 539 540 Args: 541 content: Text content or `FormattedString`. 542 coords: Tuple specifying text box coordinates. 543 544 Returns: 545 Tuple of booleans (isGreat, isOkay). 546 - `isGreat`: All quite long, some very long 547 - `isOkay`: All quite long 548 """ 549 550 def _calcLineWidths(): 551 """Returns widths for all lines except last and for last line.""" 552 textBounds = drawBot.textBoxCharacterBounds(content, coords) 553 linesByY = dict() 554 555 for segment in textBounds: 556 bounds, _, _ = segment 557 _, y, w, _ = bounds 558 559 if not linesByY.get(y): 560 linesByY[y] = 0 561 562 linesByY[y] += w 563 564 last = linesByY.pop(list(linesByY)[-1]) 565 return linesByY.values(), last 566 567 try: 568 _, _, width, _ = coords 569 bodyWidths, lastWidth = _calcLineWidths() 570 # All lines are quite long 571 areAllGood = all([w >= width * 0.9 for w in bodyWidths]) 572 # A portion of lines are very long 573 areSomeGreat = ( 574 len([True for w in bodyWidths if w >= width * 0.95]) >= len(bodyWidths) / 3 575 ) 576 # Last line is not longest and not an widow 577 isLastGood = max(bodyWidths) >= lastWidth >= width * 2 / 3 578 579 isOkay = areAllGood and isLastGood 580 # isGreat, isOkay 581 return (isOkay and areSomeGreat), isOkay 582 except Exception as e: 583 logger.warning("Failed isRagPretty: {}", e) 584 return False 585 586 587def sanitize(input: list[str]) -> list[str]: 588 """ 589 Filter out explicit content using a prohibited terms list (found in `sanitize-....txt`). 590 591 Args: 592 input: List of strings to filter. 593 594 Returns: 595 Filtered list with inappropriate content removed. 596 """ 597 prohibited_terms = ( 598 open( 599 "/Users/christianjansky/Library/CloudStorage/Dropbox/KOMETA-Work/40 Scripts/03 DrawBot/01 Content/sanitize-any.txt", 600 encoding="utf-8", 601 ) 602 .read() 603 .splitlines() 604 ) 605 sanitized = [] 606 607 for item in input: 608 is_clean = True 609 item_lower = item.lower() 610 611 for term in prohibited_terms: 612 if term.lower().strip() and term.lower() in item_lower: 613 is_clean = False 614 break 615 616 if is_clean: 617 sanitized.append(item) 618 619 return sanitized
Categories to filter words by their shape.
See classes.c32_pool.KPool.getItemByWidth for usage.
Defines the supported text casing styles for conversion.
UPPER: Converts all letters to uppercase:hi ibm→HI IBMlower: Converts all letters to lowercase:HI IBM→hi ibmTitle: Capitalizes the first letter of each word, lowercases the rest:hi ibm→Hi IbmCaps: Like Title, but preserves acronyms in uppercase:hi ibm USA→Hi Ibm USA
Type alias for character token types.
See filterByTokens for usage.
List of common words to be lowercased in title case (unless at start/end).
58def parseCamelCase(string): 59 """Split camel case and numbers into separate words. 60 61 Example: 62 `One123Four` => `One 123 Four` 63 """ 64 # A Aa, a A, A 0 65 expressions = ["([A-Z0-9])([A-Z][a-z])", "([a-z])([A-Z])", "([A-Za-z])([0-9])"] 66 for exp in expressions: 67 string = re.sub(rf"{exp}", r"\1 \2", string) 68 return string
Split camel case and numbers into separate words.
Example:
One123Four=>One 123 Four
71def toCamelCase(input: str): 72 """Convert string to camelCase. 73 74 Example: 75 `Hello World` => `helloWorld` 76 """ 77 return camelcase(input)
Convert string to camelCase.
Example:
Hello World=>helloWorld
80def toKebabCase(input: str): 81 """Convert string to kebab-case. 82 83 Example: 84 `Hello World` => `hello-world` 85 """ 86 return kebabcase(input)
Convert string to kebab-case.
Example:
Hello World=>hello-world
89def toSnakeCase(input: str): 90 """Convert string to snake_case. 91 92 Example: 93 `Hello World` => `hello_world` 94 """ 95 return snakecase(input)
Convert string to snake_case.
Example:
Hello World=>hello_world
98def toTitleCase(input: str, retainUpper: bool = True) -> str: 99 """ 100 Convert string to title case, handling special cases and acronyms. 101 102 Args: 103 input: The input string. 104 retainUpper: If True, retain uppercase acronyms: `True` USA, `False` Usa 105 106 Returns: 107 Title-cased string. 108 109 Example: 110 `sON Of The USA` => `Son of the USA` 111 """ 112 specialChars: list[str] = ["-", "/"] 113 114 def _hasSpecialChars(word: str) -> bool: 115 """Returns True if word contains special characters.""" 116 return any(char in word for char in specialChars) 117 118 def _handleSpecialChars(word: str) -> str: 119 """Apply title case to each part of a word split by special characters.""" 120 for char in specialChars: 121 if char in word: 122 # ? Split by special char and apply title case to each part 123 parts = word.split(char) 124 return char.join([capwords(part) for part in parts]) 125 126 def _processWord(word: str) -> str: 127 """Process a single word for title casing.""" 128 # ? Always lowercase common words in continuous text 129 isOnEitherSide = helpers.isFirst(words, word) or helpers.isLast(words, word) 130 isCommon = word.casefold() in commonWords 131 if isCommon and not isOnEitherSide: 132 return word.lower() 133 134 # ? Handle special characters 135 if _hasSpecialChars(word): 136 return _handleSpecialChars(word) 137 138 # Uppercase and punctuation 2+ times 139 isCaps = regex.match(r"[\p{Lu}|\p{P}]{2,}", word) 140 141 # capwords() better .title() => retains lowercase ’s 142 return word if isCaps and retainUpper else capwords(word) 143 144 words = input.split(" ") 145 words = [_processWord(word) for word in words] 146 return " ".join(words)
Convert string to title case, handling special cases and acronyms.
Arguments:
- input: The input string.
- retainUpper: If True, retain uppercase acronyms:
TrueUSA,FalseUsa
Returns:
Title-cased string.
Example:
sON Of The USA=>Son of the USA
149def changeCase( 150 input: list[str] | str, 151 case: TextCase = "Title", 152) -> list[str] | str: 153 """ 154 Change the case of a string or list of strings. 155 156 Args: 157 input: String or list of strings to change case. 158 case: Desired case ("upper", "lower", "Title", "Caps"). 159 160 Returns: 161 String or list of strings with changed case. 162 163 Example: 164 `the USA` 165 - `upper` => `THE USA` 166 - `lower` => `the usa` 167 - `Title` => `The Usa` 168 - `Caps` => `The USA` 169 """ 170 171 def _change(item): 172 if case.casefold() == "upper": 173 return item.upper() 174 elif case.casefold() == "lower": 175 return item.lower() 176 elif case.casefold() == "title": 177 return toTitleCase(item, False) 178 elif case.casefold() == "caps": 179 return toTitleCase(item) 180 else: 181 logger.warning("Unable to change case: {}", case) 182 183 if not case: 184 return input # Pass through unchanged 185 186 if isinstance(input, list): 187 return [_change(item) for item in input] 188 else: 189 return _change(input)
Change the case of a string or list of strings.
Arguments:
- input: String or list of strings to change case.
- case: Desired case ("upper", "lower", "Title", "Caps").
Returns:
String or list of strings with changed case.
Example:
the USA
upper=>THE USAlower=>the usaTitle=>The UsaCaps=>The USA
192def isTitleCase(input: str) -> bool: 193 """Returns True if all words in the string are title case.""" 194 return all([regex.match(r"^[\p{Lu}][\p{Ll}]+$", part) for part in input.split(" ")])
Returns True if all words in the string are title case.
197def prettifyText(text: str) -> str: 198 """ 199 Removes Wikipedia formatting tokens, extra whitespace and dumb quotes. 200 201 Args: 202 text: The input text. 203 204 Returns: 205 Prettified text. 206 """ 207 removals = [ 208 # Hair space 209 r" ", 210 # Remove citation references 211 r"\[[A-Za-z\d]+\](?::?\d+(?:[-–]\d+)?)?", 212 # Remove [citation needed] 213 r"\[citation needed\]", 214 r"\[clarification needed\]", 215 r"\[note [\d+]]", 216 r"\[NB [\d+]]", 217 ] 218 for removal in removals: 219 text = re.sub(rf"{removal}", "", text) 220 221 replacements = [ 222 # Multiple spaces to single space 223 (r"[ ]{2,}", " "), 224 # Replace dumb single quotes 225 (r"\'([A-Za-z]+)\'", r"‘\1’"), 226 # Replace dumb single quotes used as contractions: it's => it’s 227 (r"([A-Za-z])\'([A-Za-z])?", r"\1’\2"), 228 # Replace dumb double quotes 229 (r"(\s?|^)\"([^\"]+)\"", r"\1“\2”"), 230 # Add missing space before ( { [ in text 231 (r"(\w)(\(|\[|\{)", r"\1 \2"), 232 # Add missing space after ) } ] in text 233 (r"(\)|\]|\})(\w)", r"\1 \2"), 234 ] 235 236 for before, after in replacements: 237 text = re.sub(before, after, text) 238 239 return text.strip()
Removes Wikipedia formatting tokens, extra whitespace and dumb quotes.
Arguments:
- text: The input text.
Returns:
Prettified text.
242def omitMissing( 243 input: str | list[str], 244 font: str = None, 245 mode: Literal["words", "glyphs"] = "words", 246 debug=False, 247): 248 """ 249 Omit missing characters from text or list of text blocks. 250 251 Args: 252 input: A single string or a list of strings to check for missing glyphs. 253 font: Font to use for checking glyphs (optional). 254 mode: Determines the omission granularity: 255 - `words`: Omit entire words that contain missing glyphs. 256 - `glyphs`: Omit only the missing characters, preserving the rest of the text. 257 debug: If True, log omitted units. 258 259 Returns: 260 Filtered text or list of text blocks with missing characters or words omitted, depending on mode. 261 """ 262 if font: 263 drawBot.font(font) 264 265 isInputString = isinstance(input, str) 266 isModeWords = mode == "words" 267 glue = " " if isModeWords else "" 268 blocks = [input] if isInputString else input 269 270 output = [] 271 for block in blocks: 272 units = block.split(glue) if isModeWords else list(block) 273 filtered = [unit for unit in units if drawBot.fontContainsCharacters(unit)] 274 275 if debug: 276 [ 277 logger.trace("[Omitted] {}", unit) 278 for unit in units 279 if not drawBot.fontContainsCharacters(unit) 280 ] 281 282 # Do not add empty list 283 if filtered: 284 output.append(glue.join(filtered)) 285 286 return glue.join(output) if isInputString else output
Omit missing characters from text or list of text blocks.
Arguments:
- input: A single string or a list of strings to check for missing glyphs.
- font: Font to use for checking glyphs (optional).
- mode: Determines the omission granularity:
words: Omit entire words that contain missing glyphs.glyphs: Omit only the missing characters, preserving the rest of the text.
- debug: If True, log omitted units.
Returns:
Filtered text or list of text blocks with missing characters or words omitted, depending on mode.
289def splitStringToSentences(input: str) -> list[str]: 290 """ 291 Split running text into a list of sentences. 292 293 Args: 294 input: The input text. 295 296 Example: 297 `I am a sentence. I am another one.` => `["I am a sentence.", "I am another one."]` 298 """ 299 replacements = [ 300 # Newlines with spaces 301 (r"\n", " "), 302 # Multiple spaces to single space 303 (r"\s{2,}", " "), 304 ] 305 for [before, after] in replacements: 306 input = re.sub(rf"{before}", after, input) 307 308 # Skip abbreviations: (F. Elastica), Ficus var. elastica 309 sentenceExp = r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<![A-Z]\.)(?<=\.|\?)\s(?![a-z])" 310 return re.split(sentenceExp, input)
Split running text into a list of sentences.
Arguments:
- input: The input text.
Example:
I am a sentence. I am another one.=>["I am a sentence.", "I am another one."]
313def rotateList(input: list) -> list: 314 """ 315 Rotate a list to produce all cyclic permutations. 316 317 Args: 318 input: The input list. 319 320 Example: 321 `[A, B, C]` => `[[A, B, C], [B, C, A], [C, A, B]]` 322 """ 323 output = [] 324 325 for item in input: 326 cycled = cycle(input) 327 skipped = dropwhile(lambda x: x != item, cycled) 328 sliced = islice(skipped, None, len(input)) 329 330 output.append(list(sliced)) 331 332 return output
Rotate a list to produce all cyclic permutations.
Arguments:
- input: The input list.
Example:
[A, B, C]=>[[A, B, C], [B, C, A], [C, A, B]]
335def chopSequence(input: str | list[str], limit: int = None, glue=" ", split=" "): 336 """ 337 Split input into meaningful parts, optionally limiting the number of words: `A B C` => `A, AB, ABC`. 338 339 Args: 340 input: String or list of strings to chop. 341 limit: Limit to `n` words. 342 glue: String to join parts. 343 split: String to split input. 344 345 Example: 346 - input: single sentence 347 - `"I was late."` => `["I", "I was", "I was late."]` 348 - input: list of sentences 349 - `["For me.", "Right?"]` => `["For me.", "For me. Right?"]` 350 - limit: 2 351 - `["I", "I was"]` 352 """ 353 if split and isinstance(input, str): 354 input = input.split(split) 355 356 inputLen = len(input) 357 # Limit size if provided 358 stop = min(limit, inputLen) if limit else inputLen 359 360 return [glue.join(input[:i]) for i in range(1, stop + 1)]
Split input into meaningful parts, optionally limiting the number of words: A B C => A, AB, ABC.
Arguments:
- input: String or list of strings to chop.
- limit: Limit to
nwords. - glue: String to join parts.
- split: String to split input.
Example:
- input: single sentence
"I was late."=>["I", "I was", "I was late."]
- input: list of sentences
["For me.", "Right?"]=>["For me.", "For me. Right?"]
- limit: 2
["I", "I was"]
363def chopList( 364 input: list[str], 365 clamp: int = None, 366 mode: Literal["separate", "connected"] = "separate", 367 shuffle=False, 368) -> list[str]: 369 """ 370 Chop a list of sentences into smaller parts, optionally connecting or shuffling them. 371 372 Args: 373 input: List of sentences. 374 clamp: Limit to n words per iteration. 375 mode: "separate" to chop individually, "connected" to connect chopped sentences. 376 shuffle: If True, shuffle input before chopping. 377 378 Example: 379 `["Hello there.", "Hi you."]` => 380 - (separate) `["Hello", "Hello there.", "Hi", "Hi you."]` 381 - (connected) `["Hello", "Hello there.", "Hello there. Hi", ...]` 382 """ 383 if shuffle: 384 random.shuffle(input) 385 386 if mode == "connected": 387 input = [" ".join(item) for item in rotateList(input)] 388 389 return helpers.flatten([chopSequence(item, clamp) for item in input])
Chop a list of sentences into smaller parts, optionally connecting or shuffling them.
Arguments:
- input: List of sentences.
- clamp: Limit to n words per iteration.
- mode: "separate" to chop individually, "connected" to connect chopped sentences.
- shuffle: If True, shuffle input before chopping.
Example:
["Hello there.", "Hi you."]=>
- (separate)
["Hello", "Hello there.", "Hi", "Hi you."]- (connected)
["Hello", "Hello there.", "Hello there. Hi", ...]
392def permutate(input: list, clamp=20, shuffle=True) -> list: 393 """ 394 Permutate and chop a list of sentences into connected sequences. 395 396 Args: 397 input: List of sentences. 398 clamp: Limit to `n` words per sequence. 399 shuffle: If True, shuffle input before permutation. 400 401 Example: 402 - `["Hi Tim", "Foo bar"]` => list of 403 - `["Hi", "Hi Tim", "Hi Tim Foo", ...], ["Foo", "Foo bar", "Foo bar Hi", ...]` 404 """ 405 return chopList(input, clamp, "connected", shuffle)
Permutate and chop a list of sentences into connected sequences.
Arguments:
- input: List of sentences.
- clamp: Limit to
nwords per sequence. - shuffle: If True, shuffle input before permutation.
Example:
["Hi Tim", "Foo bar"]=> list of["Hi", "Hi Tim", "Hi Tim Foo", ...], ["Foo", "Foo bar", "Foo bar Hi", ...]
408def fillTextOver(container: tuple, content: list, shuffle: bool = True) -> str: 409 """ 410 Returns a string that fills the container up to overflow. 411 412 - Font properties need to be already set 413 414 Args: 415 container: Tuple specifying container dimensions. 416 content: List of possible sentences/items. 417 shuffle: If True, shuffle content before filling. 418 """ 419 containerW, containerH = layout.toDimensions(container) 420 421 if shuffle: 422 content = helpers.shuffleAtRandomSegment(content) 423 424 strings = [] 425 426 for string in content: 427 strings.append(string) 428 stream = " ".join(strings) 429 _, textH = drawBot.textSize(stream, width=containerW) 430 if textH >= containerH: 431 break 432 433 return stream
Returns a string that fills the container up to overflow.
- Font properties need to be already set
Arguments:
- container: Tuple specifying container dimensions.
- content: List of possible sentences/items.
- shuffle: If True, shuffle content before filling.
436def getStringForWidth(pool: list, width: int, threshold: float = 0.995) -> str: 437 """ 438 Get a string from the pool that fits within the specified width. 439 440 - Font properties need to be set already 441 442 Args: 443 pool: List of candidate strings. 444 width: Target width. 445 threshold: Minimum width threshold. 446 """ 447 448 def _isWidthAppropriate(candidateWidth: int): 449 return minWidth <= candidateWidth <= maxWidth 450 451 minWidth, maxWidth = width * threshold, width 452 453 candidateWidths = [] 454 match = None 455 456 for candidate in pool: 457 candidateWidth, _ = drawBot.textSize(candidate) 458 candidateWidths.append(candidateWidth) 459 460 if _isWidthAppropriate(candidateWidth): 461 match = candidate 462 break 463 464 if match: 465 return match 466 else: 467 closestWidth = helpers.findClosestValue( 468 candidateWidths, width, discardLarger=True 469 ) 470 i = ( 471 candidateWidths.index(closestWidth) 472 if closestWidth in candidateWidths 473 else 0 474 ) 475 return pool[i]
Get a string from the pool that fits within the specified width.
- Font properties need to be set already
Arguments:
- pool: List of candidate strings.
- width: Target width.
- threshold: Minimum width threshold.
478def filterByShape(items: list[str], shape: WordShape | list[WordShape]) -> list[str]: 479 """ 480 Filter words by descender, ascender, or caps shape. 481 482 Args: 483 items: List of words. 484 shape: Shape(s) to filter by. 485 486 Returns: 487 List of words matching the shape criteria. 488 489 Example: 490 - `["hi", "hey"], "descender"` => `["hi"]` 491 """ 492 493 def _isNotShaped(pattern: str, item: str): 494 return not bool(re.search(pattern, item)) 495 496 def _checkShape(shape): 497 wordShape = wordShapes.get(shape) 498 return filter(lambda item: _isNotShaped(wordShape, item), items) 499 500 wordShapes = dict(caps="[A-Z0-9]", ascender="[bdfihklt]", descender="[Qgjqpy/,]") 501 shapeSubsets = [_checkShape(shape) for shape in helpers.coerceList(shape)] 502 return helpers.intersect(shapeSubsets, retainOrder=False)
Filter words by descender, ascender, or caps shape.
Arguments:
- items: List of words.
- shape: Shape(s) to filter by.
Returns:
List of words matching the shape criteria.
Example:
["hi", "hey"], "descender"=>["hi"]
505def filterByTokens( 506 items: list[str], tokens: list[CharacterToken] = ["word", "nonword"] 507) -> list[str]: 508 """ 509 Filter items by Unicode character token. 510 511 Args: 512 items: List of strings to filter. 513 tokens: List of token types to filter by. 514 515 Returns: 516 List of items matching the token criteria. 517 518 Example: 519 - `word`, `nonword` => `["A4", "R&B"]` 520 """ 521 522 possiblePatterns = dict( 523 word=r"\p{Letter}", nonword=r"\p{Symbol}|\p{Number}|\p{Punctuation}" 524 ) 525 patterns = [possiblePatterns.get(m) for m in tokens] 526 527 def _filterSingleToken(items: list[str], pattern: str): 528 return [item for item in items if bool(regex.search(pattern, item))] 529 530 individual = [_filterSingleToken(items, p) for p in patterns] 531 532 return helpers.intersect(individual)
Filter items by Unicode character token.
Arguments:
- items: List of strings to filter.
- tokens: List of token types to filter by.
Returns:
List of items matching the token criteria.
Example:
word,nonword=>["A4", "R&B"]
535def isRagPretty( 536 content: Union[str, drawBot.drawBotDrawingTools.FormattedString], coords: tuple 537) -> tuple[bool, bool]: 538 """ 539 Evaluate if a paragraph is nicely typeset. 540 541 Args: 542 content: Text content or `FormattedString`. 543 coords: Tuple specifying text box coordinates. 544 545 Returns: 546 Tuple of booleans (isGreat, isOkay). 547 - `isGreat`: All quite long, some very long 548 - `isOkay`: All quite long 549 """ 550 551 def _calcLineWidths(): 552 """Returns widths for all lines except last and for last line.""" 553 textBounds = drawBot.textBoxCharacterBounds(content, coords) 554 linesByY = dict() 555 556 for segment in textBounds: 557 bounds, _, _ = segment 558 _, y, w, _ = bounds 559 560 if not linesByY.get(y): 561 linesByY[y] = 0 562 563 linesByY[y] += w 564 565 last = linesByY.pop(list(linesByY)[-1]) 566 return linesByY.values(), last 567 568 try: 569 _, _, width, _ = coords 570 bodyWidths, lastWidth = _calcLineWidths() 571 # All lines are quite long 572 areAllGood = all([w >= width * 0.9 for w in bodyWidths]) 573 # A portion of lines are very long 574 areSomeGreat = ( 575 len([True for w in bodyWidths if w >= width * 0.95]) >= len(bodyWidths) / 3 576 ) 577 # Last line is not longest and not an widow 578 isLastGood = max(bodyWidths) >= lastWidth >= width * 2 / 3 579 580 isOkay = areAllGood and isLastGood 581 # isGreat, isOkay 582 return (isOkay and areSomeGreat), isOkay 583 except Exception as e: 584 logger.warning("Failed isRagPretty: {}", e) 585 return False
Evaluate if a paragraph is nicely typeset.
Arguments:
- content: Text content or
FormattedString. - coords: Tuple specifying text box coordinates.
Returns:
Tuple of booleans (isGreat, isOkay).
isGreat: All quite long, some very longisOkay: All quite long
588def sanitize(input: list[str]) -> list[str]: 589 """ 590 Filter out explicit content using a prohibited terms list (found in `sanitize-....txt`). 591 592 Args: 593 input: List of strings to filter. 594 595 Returns: 596 Filtered list with inappropriate content removed. 597 """ 598 prohibited_terms = ( 599 open( 600 "/Users/christianjansky/Library/CloudStorage/Dropbox/KOMETA-Work/40 Scripts/03 DrawBot/01 Content/sanitize-any.txt", 601 encoding="utf-8", 602 ) 603 .read() 604 .splitlines() 605 ) 606 sanitized = [] 607 608 for item in input: 609 is_clean = True 610 item_lower = item.lower() 611 612 for term in prohibited_terms: 613 if term.lower().strip() and term.lower() in item_lower: 614 is_clean = False 615 break 616 617 if is_clean: 618 sanitized.append(item) 619 620 return sanitized
Filter out explicit content using a prohibited terms list (found in sanitize-....txt).
Arguments:
- input: List of strings to filter.
Returns:
Filtered list with inappropriate content removed.