lib.content

View Source

  1import re
  2import drawBot
  3import random
  4from typing import Literal, TypeAlias, Union
  5from itertools import cycle, islice, dropwhile
  6from string import capwords
  7import regex
  8from caseconverter import camelcase, kebabcase, snakecase
  9from loguru import logger
 10from icecream import ic
 11
 12from lib import helpers, layout
 13
 14WordShape: TypeAlias = Literal["descender", "ascender", "caps"]
 15"""Categories to filter words by their shape.
 16
 17See `classes.c32_pool.KPool.getItemByWidth` for usage.
 18"""
 19
 20TextCase: TypeAlias = Literal["UPPER", "lower", "Title", "Caps"]
 21"""Defines the supported text casing styles for conversion.
 22
 23- `UPPER`: Converts all letters to uppercase: `hi ibm` → `HI IBM`
 24- `lower`: Converts all letters to lowercase: `HI IBM` → `hi ibm`
 25- `Title`: Capitalizes the first letter of each word, lowercases the rest: `hi ibm` → `Hi Ibm`
 26- `Caps`: Like Title, but preserves acronyms in uppercase: `hi ibm USA` → `Hi Ibm USA`
 27"""
 28
 29CharacterToken: TypeAlias = Literal["word", "nonword"]
 30"""Type alias for character token types.
 31
 32See `filterByTokens` for usage.
 33"""
 34
 35
 36# ? Common words that should be lowercased in title case (unless at the start/end)
 37commonWords: list[str] = [
 38    "a",
 39    "an",
 40    "the",
 41    "as",
 42    "is",
 43    "are",
 44    "and",
 45    "but",
 46    "at",
 47    "in",
 48    "on",
 49    "of",
 50    "for",
 51    "by",
 52    "to",
 53]
 54"""List of common words to be lowercased in title case (unless at start/end)."""
 55
 56
 57def parseCamelCase(string):
 58    """Split camel case and numbers into separate words.
 59
 60    Example:
 61        `One123Four` => `One 123 Four`
 62    """
 63    # A Aa, a A, A 0
 64    expressions = ["([A-Z0-9])([A-Z][a-z])", "([a-z])([A-Z])", "([A-Za-z])([0-9])"]
 65    for exp in expressions:
 66        string = re.sub(rf"{exp}", r"\1 \2", string)
 67    return string
 68
 69
 70def toCamelCase(input: str):
 71    """Convert string to camelCase.
 72
 73    Example:
 74        `Hello World` => `helloWorld`
 75    """
 76    return camelcase(input)
 77
 78
 79def toKebabCase(input: str):
 80    """Convert string to kebab-case.
 81
 82    Example:
 83        `Hello World` => `hello-world`
 84    """
 85    return kebabcase(input)
 86
 87
 88def toSnakeCase(input: str):
 89    """Convert string to snake_case.
 90
 91    Example:
 92        `Hello World` => `hello_world`
 93    """
 94    return snakecase(input)
 95
 96
 97def toTitleCase(input: str, retainUpper: bool = True) -> str:
 98    """
 99    Convert string to title case, handling special cases and acronyms.
100
101    Args:
102        input: The input string.
103        retainUpper: If True, retain uppercase acronyms: `True` USA, `False` Usa
104
105    Returns:
106        Title-cased string.
107
108    Example:
109        `sON Of The USA` => `Son of the USA`
110    """
111    specialChars: list[str] = ["-", "/"]
112
113    def _hasSpecialChars(word: str) -> bool:
114        """Returns True if word contains special characters."""
115        return any(char in word for char in specialChars)
116
117    def _handleSpecialChars(word: str) -> str:
118        """Apply title case to each part of a word split by special characters."""
119        for char in specialChars:
120            if char in word:
121                # ? Split by special char and apply title case to each part
122                parts = word.split(char)
123                return char.join([capwords(part) for part in parts])
124
125    def _processWord(word: str) -> str:
126        """Process a single word for title casing."""
127        # ? Always lowercase common words in continuous text
128        isOnEitherSide = helpers.isFirst(words, word) or helpers.isLast(words, word)
129        isCommon = word.casefold() in commonWords
130        if isCommon and not isOnEitherSide:
131            return word.lower()
132
133        # ? Handle special characters
134        if _hasSpecialChars(word):
135            return _handleSpecialChars(word)
136
137        # Uppercase and punctuation 2+ times
138        isCaps = regex.match(r"[\p{Lu}|\p{P}]{2,}", word)
139
140        # capwords() better .title() => retains lowercase ’s
141        return word if isCaps and retainUpper else capwords(word)
142
143    words = input.split(" ")
144    words = [_processWord(word) for word in words]
145    return " ".join(words)
146
147
148def changeCase(
149    input: list[str] | str,
150    case: TextCase = "Title",
151) -> list[str] | str:
152    """
153    Change the case of a string or list of strings.
154
155    Args:
156        input: String or list of strings to change case.
157        case: Desired case ("upper", "lower", "Title", "Caps").
158
159    Returns:
160        String or list of strings with changed case.
161
162    Example:
163        `the USA`
164        - `upper` => `THE USA`
165        - `lower` => `the usa`
166        - `Title` => `The Usa`
167        - `Caps`  => `The USA`
168    """
169
170    def _change(item):
171        if case.casefold() == "upper":
172            return item.upper()
173        elif case.casefold() == "lower":
174            return item.lower()
175        elif case.casefold() == "title":
176            return toTitleCase(item, False)
177        elif case.casefold() == "caps":
178            return toTitleCase(item)
179        else:
180            logger.warning("Unable to change case: {}", case)
181
182    if not case:
183        return input  # Pass through unchanged
184
185    if isinstance(input, list):
186        return [_change(item) for item in input]
187    else:
188        return _change(input)
189
190
191def isTitleCase(input: str) -> bool:
192    """Returns True if all words in the string are title case."""
193    return all([regex.match(r"^[\p{Lu}][\p{Ll}]+$", part) for part in input.split(" ")])
194
195
196def prettifyText(text: str) -> str:
197    """
198    Removes Wikipedia formatting tokens, extra whitespace and dumb quotes.
199
200    Args:
201        text: The input text.
202
203    Returns:
204        Prettified text.
205    """
206    removals = [
207        # Hair space
208        r" ",
209        # Remove citation references
210        r"\[[A-Za-z\d]+\](?::?\d+(?:[-–]\d+)?)?",
211        # Remove [citation needed]
212        r"\[citation needed\]",
213        r"\[clarification needed\]",
214        r"\[note [\d+]]",
215        r"\[NB [\d+]]",
216    ]
217    for removal in removals:
218        text = re.sub(rf"{removal}", "", text)
219
220    replacements = [
221        # Multiple spaces to single space
222        (r"[ ]{2,}", " "),
223        # Replace dumb single quotes
224        (r"\'([A-Za-z]+)\'", r"‘\1’"),
225        # Replace dumb single quotes used as contractions: it's => it’s
226        (r"([A-Za-z])\'([A-Za-z])?", r"\1’\2"),
227        # Replace dumb double quotes
228        (r"(\s?|^)\"([^\"]+)\"", r"\1“\2”"),
229        # Add missing space before ( { [ in text
230        (r"(\w)(\(|\[|\{)", r"\1 \2"),
231        # Add missing space after ) } ] in text
232        (r"(\)|\]|\})(\w)", r"\1 \2"),
233    ]
234
235    for before, after in replacements:
236        text = re.sub(before, after, text)
237
238    return text.strip()
239
240
241def omitMissing(
242    input: str | list[str],
243    font: str = None,
244    mode: Literal["words", "glyphs"] = "words",
245    debug=False,
246):
247    """
248    Omit missing characters from text or list of text blocks.
249
250    Args:
251        input: A single string or a list of strings to check for missing glyphs.
252        font: Font to use for checking glyphs (optional).
253        mode: Determines the omission granularity:
254            - `words`: Omit entire words that contain missing glyphs.
255            - `glyphs`: Omit only the missing characters, preserving the rest of the text.
256        debug: If True, log omitted units.
257
258    Returns:
259        Filtered text or list of text blocks with missing characters or words omitted, depending on mode.
260    """
261    if font:
262        drawBot.font(font)
263
264    isInputString = isinstance(input, str)
265    isModeWords = mode == "words"
266    glue = " " if isModeWords else ""
267    blocks = [input] if isInputString else input
268
269    output = []
270    for block in blocks:
271        units = block.split(glue) if isModeWords else list(block)
272        filtered = [unit for unit in units if drawBot.fontContainsCharacters(unit)]
273
274        if debug:
275            [
276                logger.trace("[Omitted] {}", unit)
277                for unit in units
278                if not drawBot.fontContainsCharacters(unit)
279            ]
280
281        # Do not add empty list
282        if filtered:
283            output.append(glue.join(filtered))
284
285    return glue.join(output) if isInputString else output
286
287
288def splitStringToSentences(input: str) -> list[str]:
289    """
290    Split running text into a list of sentences.
291
292    Args:
293        input: The input text.
294
295    Example:
296        `I am a sentence. I am another one.` => `["I am a sentence.", "I am another one."]`
297    """
298    replacements = [
299        # Newlines with spaces
300        (r"\n", " "),
301        # Multiple spaces to single space
302        (r"\s{2,}", " "),
303    ]
304    for [before, after] in replacements:
305        input = re.sub(rf"{before}", after, input)
306
307    # Skip abbreviations: (F. Elastica), Ficus var. elastica
308    sentenceExp = r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<![A-Z]\.)(?<=\.|\?)\s(?![a-z])"
309    return re.split(sentenceExp, input)
310
311
312def rotateList(input: list) -> list:
313    """
314    Rotate a list to produce all cyclic permutations.
315
316    Args:
317        input: The input list.
318
319    Example:
320        `[A, B, C]` => `[[A, B, C], [B, C, A], [C, A, B]]`
321    """
322    output = []
323
324    for item in input:
325        cycled = cycle(input)
326        skipped = dropwhile(lambda x: x != item, cycled)
327        sliced = islice(skipped, None, len(input))
328
329        output.append(list(sliced))
330
331    return output
332
333
334def chopSequence(input: str | list[str], limit: int = None, glue=" ", split=" "):
335    """
336    Split input into meaningful parts, optionally limiting the number of words: `A B C` => `A, AB, ABC`.
337
338    Args:
339        input: String or list of strings to chop.
340        limit: Limit to `n` words.
341        glue: String to join parts.
342        split: String to split input.
343
344    Example:
345    - input: single sentence
346        - `"I was late."` => `["I", "I was", "I was late."]`
347    - input: list of sentences
348        - `["For me.", "Right?"]` => `["For me.", "For me. Right?"]`
349    - limit: 2
350        - `["I", "I was"]`
351    """
352    if split and isinstance(input, str):
353        input = input.split(split)
354
355    inputLen = len(input)
356    # Limit size if provided
357    stop = min(limit, inputLen) if limit else inputLen
358
359    return [glue.join(input[:i]) for i in range(1, stop + 1)]
360
361
362def chopList(
363    input: list[str],
364    clamp: int = None,
365    mode: Literal["separate", "connected"] = "separate",
366    shuffle=False,
367) -> list[str]:
368    """
369    Chop a list of sentences into smaller parts, optionally connecting or shuffling them.
370
371    Args:
372        input: List of sentences.
373        clamp: Limit to n words per iteration.
374        mode: "separate" to chop individually, "connected" to connect chopped sentences.
375        shuffle: If True, shuffle input before chopping.
376
377    Example:
378        `["Hello there.", "Hi you."]` =>
379        - (separate) `["Hello", "Hello there.", "Hi", "Hi you."]`
380        - (connected) `["Hello", "Hello there.", "Hello there. Hi", ...]`
381    """
382    if shuffle:
383        random.shuffle(input)
384
385    if mode == "connected":
386        input = [" ".join(item) for item in rotateList(input)]
387
388    return helpers.flatten([chopSequence(item, clamp) for item in input])
389
390
391def permutate(input: list, clamp=20, shuffle=True) -> list:
392    """
393    Permutate and chop a list of sentences into connected sequences.
394
395    Args:
396        input: List of sentences.
397        clamp: Limit to `n` words per sequence.
398        shuffle: If True, shuffle input before permutation.
399
400    Example:
401        - `["Hi Tim", "Foo bar"]` => list of
402        - `["Hi", "Hi Tim", "Hi Tim Foo", ...], ["Foo", "Foo bar", "Foo bar Hi", ...]`
403    """
404    return chopList(input, clamp, "connected", shuffle)
405
406
407def fillTextOver(container: tuple, content: list, shuffle: bool = True) -> str:
408    """
409    Returns a string that fills the container up to overflow.
410
411    - Font properties need to be already set
412
413    Args:
414        container: Tuple specifying container dimensions.
415        content: List of possible sentences/items.
416        shuffle: If True, shuffle content before filling.
417    """
418    containerW, containerH = layout.toDimensions(container)
419
420    if shuffle:
421        content = helpers.shuffleAtRandomSegment(content)
422
423    strings = []
424
425    for string in content:
426        strings.append(string)
427        stream = " ".join(strings)
428        _, textH = drawBot.textSize(stream, width=containerW)
429        if textH >= containerH:
430            break
431
432    return stream
433
434
435def getStringForWidth(pool: list, width: int, threshold: float = 0.995) -> str:
436    """
437    Get a string from the pool that fits within the specified width.
438
439    - Font properties need to be set already
440
441    Args:
442        pool: List of candidate strings.
443        width: Target width.
444        threshold: Minimum width threshold.
445    """
446
447    def _isWidthAppropriate(candidateWidth: int):
448        return minWidth <= candidateWidth <= maxWidth
449
450    minWidth, maxWidth = width * threshold, width
451
452    candidateWidths = []
453    match = None
454
455    for candidate in pool:
456        candidateWidth, _ = drawBot.textSize(candidate)
457        candidateWidths.append(candidateWidth)
458
459        if _isWidthAppropriate(candidateWidth):
460            match = candidate
461            break
462
463    if match:
464        return match
465    else:
466        closestWidth = helpers.findClosestValue(
467            candidateWidths, width, discardLarger=True
468        )
469        i = (
470            candidateWidths.index(closestWidth)
471            if closestWidth in candidateWidths
472            else 0
473        )
474        return pool[i]
475
476
477def filterByShape(items: list[str], shape: WordShape | list[WordShape]) -> list[str]:
478    """
479    Filter words by descender, ascender, or caps shape.
480
481    Args:
482        items: List of words.
483        shape: Shape(s) to filter by.
484
485    Returns:
486        List of words matching the shape criteria.
487
488    Example:
489        - `["hi", "hey"], "descender"` => `["hi"]`
490    """
491
492    def _isNotShaped(pattern: str, item: str):
493        return not bool(re.search(pattern, item))
494
495    def _checkShape(shape):
496        wordShape = wordShapes.get(shape)
497        return filter(lambda item: _isNotShaped(wordShape, item), items)
498
499    wordShapes = dict(caps="[A-Z0-9]", ascender="[bdfihklt]", descender="[Qgjqpy/,]")
500    shapeSubsets = [_checkShape(shape) for shape in helpers.coerceList(shape)]
501    return helpers.intersect(shapeSubsets, retainOrder=False)
502
503
504def filterByTokens(
505    items: list[str], tokens: list[CharacterToken] = ["word", "nonword"]
506) -> list[str]:
507    """
508    Filter items by Unicode character token.
509
510    Args:
511        items: List of strings to filter.
512        tokens: List of token types to filter by.
513
514    Returns:
515        List of items matching the token criteria.
516
517    Example:
518        - `word`, `nonword` => `["A4", "R&B"]`
519    """
520
521    possiblePatterns = dict(
522        word=r"\p{Letter}", nonword=r"\p{Symbol}|\p{Number}|\p{Punctuation}"
523    )
524    patterns = [possiblePatterns.get(m) for m in tokens]
525
526    def _filterSingleToken(items: list[str], pattern: str):
527        return [item for item in items if bool(regex.search(pattern, item))]
528
529    individual = [_filterSingleToken(items, p) for p in patterns]
530
531    return helpers.intersect(individual)
532
533
534def isRagPretty(
535    content: Union[str, drawBot.drawBotDrawingTools.FormattedString], coords: tuple
536) -> tuple[bool, bool]:
537    """
538    Evaluate if a paragraph is nicely typeset.
539
540    Args:
541        content: Text content or `FormattedString`.
542        coords: Tuple specifying text box coordinates.
543
544    Returns:
545        Tuple of booleans (isGreat, isOkay).
546        - `isGreat`: All quite long, some very long
547        - `isOkay`: All quite long
548    """
549
550    def _calcLineWidths():
551        """Returns widths for all lines except last and for last line."""
552        textBounds = drawBot.textBoxCharacterBounds(content, coords)
553        linesByY = dict()
554
555        for segment in textBounds:
556            bounds, _, _ = segment
557            _, y, w, _ = bounds
558
559            if not linesByY.get(y):
560                linesByY[y] = 0
561
562            linesByY[y] += w
563
564        last = linesByY.pop(list(linesByY)[-1])
565        return linesByY.values(), last
566
567    try:
568        _, _, width, _ = coords
569        bodyWidths, lastWidth = _calcLineWidths()
570        # All lines are quite long
571        areAllGood = all([w >= width * 0.9 for w in bodyWidths])
572        # A portion of lines are very long
573        areSomeGreat = (
574            len([True for w in bodyWidths if w >= width * 0.95]) >= len(bodyWidths) / 3
575        )
576        # Last line is not longest and not an widow
577        isLastGood = max(bodyWidths) >= lastWidth >= width * 2 / 3
578
579        isOkay = areAllGood and isLastGood
580        # isGreat, isOkay
581        return (isOkay and areSomeGreat), isOkay
582    except Exception as e:
583        logger.warning("Failed isRagPretty: {}", e)
584        return False
585
586
587def sanitize(input: list[str]) -> list[str]:
588    """
589    Filter out explicit content using a prohibited terms list (found in `sanitize-....txt`).
590
591    Args:
592        input: List of strings to filter.
593
594    Returns:
595        Filtered list with inappropriate content removed.
596    """
597    prohibited_terms = (
598        open(
599            "/Users/christianjansky/Library/CloudStorage/Dropbox/KOMETA-Work/40 Scripts/03 DrawBot/01 Content/sanitize-any.txt",
600            encoding="utf-8",
601        )
602        .read()
603        .splitlines()
604    )
605    sanitized = []
606
607    for item in input:
608        is_clean = True
609        item_lower = item.lower()
610
611        for term in prohibited_terms:
612            if term.lower().strip() and term.lower() in item_lower:
613                is_clean = False
614                break
615
616        if is_clean:
617            sanitized.append(item)
618
619    return sanitized

WordShape: TypeAlias = Literal['descender', 'ascender', 'caps']

Categories to filter words by their shape.

See classes.c32_pool.KPool.getItemByWidth for usage.

TextCase: TypeAlias = Literal['UPPER', 'lower', 'Title', 'Caps']

Defines the supported text casing styles for conversion.

UPPER: Converts all letters to uppercase: hi ibm → HI IBM
lower: Converts all letters to lowercase: HI IBM → hi ibm
Title: Capitalizes the first letter of each word, lowercases the rest: hi ibm → Hi Ibm
Caps: Like Title, but preserves acronyms in uppercase: hi ibm USA → Hi Ibm USA

CharacterToken: TypeAlias = Literal['word', 'nonword']

Type alias for character token types.

See filterByTokens for usage.

commonWords: list[str] = ['a', 'an', 'the', 'as', 'is', 'are', 'and', 'but', 'at', 'in', 'on', 'of', 'for', 'by', 'to']

List of common words to be lowercased in title case (unless at start/end).

def parseCamelCase(string): View Source

58def parseCamelCase(string):
59    """Split camel case and numbers into separate words.
60
61    Example:
62        `One123Four` => `One 123 Four`
63    """
64    # A Aa, a A, A 0
65    expressions = ["([A-Z0-9])([A-Z][a-z])", "([a-z])([A-Z])", "([A-Za-z])([0-9])"]
66    for exp in expressions:
67        string = re.sub(rf"{exp}", r"\1 \2", string)
68    return string

Split camel case and numbers into separate words.

Example:

One123Four => One 123 Four

def toCamelCase(input: str): View Source

71def toCamelCase(input: str):
72    """Convert string to camelCase.
73
74    Example:
75        `Hello World` => `helloWorld`
76    """
77    return camelcase(input)

Convert string to camelCase.

Example:

Hello World => helloWorld

def toKebabCase(input: str): View Source

80def toKebabCase(input: str):
81    """Convert string to kebab-case.
82
83    Example:
84        `Hello World` => `hello-world`
85    """
86    return kebabcase(input)

Convert string to kebab-case.

Example:

Hello World => hello-world

def toSnakeCase(input: str): View Source

89def toSnakeCase(input: str):
90    """Convert string to snake_case.
91
92    Example:
93        `Hello World` => `hello_world`
94    """
95    return snakecase(input)

Convert string to snake_case.

Example:

Hello World => hello_world

def toTitleCase(input: str, retainUpper: bool = True) -> str: View Source

 98def toTitleCase(input: str, retainUpper: bool = True) -> str:
 99    """
100    Convert string to title case, handling special cases and acronyms.
101
102    Args:
103        input: The input string.
104        retainUpper: If True, retain uppercase acronyms: `True` USA, `False` Usa
105
106    Returns:
107        Title-cased string.
108
109    Example:
110        `sON Of The USA` => `Son of the USA`
111    """
112    specialChars: list[str] = ["-", "/"]
113
114    def _hasSpecialChars(word: str) -> bool:
115        """Returns True if word contains special characters."""
116        return any(char in word for char in specialChars)
117
118    def _handleSpecialChars(word: str) -> str:
119        """Apply title case to each part of a word split by special characters."""
120        for char in specialChars:
121            if char in word:
122                # ? Split by special char and apply title case to each part
123                parts = word.split(char)
124                return char.join([capwords(part) for part in parts])
125
126    def _processWord(word: str) -> str:
127        """Process a single word for title casing."""
128        # ? Always lowercase common words in continuous text
129        isOnEitherSide = helpers.isFirst(words, word) or helpers.isLast(words, word)
130        isCommon = word.casefold() in commonWords
131        if isCommon and not isOnEitherSide:
132            return word.lower()
133
134        # ? Handle special characters
135        if _hasSpecialChars(word):
136            return _handleSpecialChars(word)
137
138        # Uppercase and punctuation 2+ times
139        isCaps = regex.match(r"[\p{Lu}|\p{P}]{2,}", word)
140
141        # capwords() better .title() => retains lowercase ’s
142        return word if isCaps and retainUpper else capwords(word)
143
144    words = input.split(" ")
145    words = [_processWord(word) for word in words]
146    return " ".join(words)

Convert string to title case, handling special cases and acronyms.

Arguments:

input: The input string.
retainUpper: If True, retain uppercase acronyms: True USA, False Usa

Returns:

Title-cased string.

Example:

sON Of The USA => Son of the USA

def changeCase( input: list[str] | str, case: Literal['UPPER', 'lower', 'Title', 'Caps'] = 'Title') -> list[str] | str: View Source

149def changeCase(
150    input: list[str] | str,
151    case: TextCase = "Title",
152) -> list[str] | str:
153    """
154    Change the case of a string or list of strings.
155
156    Args:
157        input: String or list of strings to change case.
158        case: Desired case ("upper", "lower", "Title", "Caps").
159
160    Returns:
161        String or list of strings with changed case.
162
163    Example:
164        `the USA`
165        - `upper` => `THE USA`
166        - `lower` => `the usa`
167        - `Title` => `The Usa`
168        - `Caps`  => `The USA`
169    """
170
171    def _change(item):
172        if case.casefold() == "upper":
173            return item.upper()
174        elif case.casefold() == "lower":
175            return item.lower()
176        elif case.casefold() == "title":
177            return toTitleCase(item, False)
178        elif case.casefold() == "caps":
179            return toTitleCase(item)
180        else:
181            logger.warning("Unable to change case: {}", case)
182
183    if not case:
184        return input  # Pass through unchanged
185
186    if isinstance(input, list):
187        return [_change(item) for item in input]
188    else:
189        return _change(input)

Change the case of a string or list of strings.

Arguments:

input: String or list of strings to change case.
case: Desired case ("upper", "lower", "Title", "Caps").

Returns:

String or list of strings with changed case.

Example:

the USA

upper => THE USA

lower => the usa

Title => The Usa

Caps => The USA

def isTitleCase(input: str) -> bool: View Source

192def isTitleCase(input: str) -> bool:
193    """Returns True if all words in the string are title case."""
194    return all([regex.match(r"^[\p{Lu}][\p{Ll}]+$", part) for part in input.split(" ")])

Returns True if all words in the string are title case.

def prettifyText(text: str) -> str: View Source

197def prettifyText(text: str) -> str:
198    """
199    Removes Wikipedia formatting tokens, extra whitespace and dumb quotes.
200
201    Args:
202        text: The input text.
203
204    Returns:
205        Prettified text.
206    """
207    removals = [
208        # Hair space
209        r" ",
210        # Remove citation references
211        r"\[[A-Za-z\d]+\](?::?\d+(?:[-–]\d+)?)?",
212        # Remove [citation needed]
213        r"\[citation needed\]",
214        r"\[clarification needed\]",
215        r"\[note [\d+]]",
216        r"\[NB [\d+]]",
217    ]
218    for removal in removals:
219        text = re.sub(rf"{removal}", "", text)
220
221    replacements = [
222        # Multiple spaces to single space
223        (r"[ ]{2,}", " "),
224        # Replace dumb single quotes
225        (r"\'([A-Za-z]+)\'", r"‘\1’"),
226        # Replace dumb single quotes used as contractions: it's => it’s
227        (r"([A-Za-z])\'([A-Za-z])?", r"\1’\2"),
228        # Replace dumb double quotes
229        (r"(\s?|^)\"([^\"]+)\"", r"\1“\2”"),
230        # Add missing space before ( { [ in text
231        (r"(\w)(\(|\[|\{)", r"\1 \2"),
232        # Add missing space after ) } ] in text
233        (r"(\)|\]|\})(\w)", r"\1 \2"),
234    ]
235
236    for before, after in replacements:
237        text = re.sub(before, after, text)
238
239    return text.strip()

Removes Wikipedia formatting tokens, extra whitespace and dumb quotes.

Arguments:

text: The input text.

Returns:

Prettified text.

def omitMissing( input: str | list[str], font: str = None, mode: Literal['words', 'glyphs'] = 'words', debug=False): View Source

242def omitMissing(
243    input: str | list[str],
244    font: str = None,
245    mode: Literal["words", "glyphs"] = "words",
246    debug=False,
247):
248    """
249    Omit missing characters from text or list of text blocks.
250
251    Args:
252        input: A single string or a list of strings to check for missing glyphs.
253        font: Font to use for checking glyphs (optional).
254        mode: Determines the omission granularity:
255            - `words`: Omit entire words that contain missing glyphs.
256            - `glyphs`: Omit only the missing characters, preserving the rest of the text.
257        debug: If True, log omitted units.
258
259    Returns:
260        Filtered text or list of text blocks with missing characters or words omitted, depending on mode.
261    """
262    if font:
263        drawBot.font(font)
264
265    isInputString = isinstance(input, str)
266    isModeWords = mode == "words"
267    glue = " " if isModeWords else ""
268    blocks = [input] if isInputString else input
269
270    output = []
271    for block in blocks:
272        units = block.split(glue) if isModeWords else list(block)
273        filtered = [unit for unit in units if drawBot.fontContainsCharacters(unit)]
274
275        if debug:
276            [
277                logger.trace("[Omitted] {}", unit)
278                for unit in units
279                if not drawBot.fontContainsCharacters(unit)
280            ]
281
282        # Do not add empty list
283        if filtered:
284            output.append(glue.join(filtered))
285
286    return glue.join(output) if isInputString else output

Omit missing characters from text or list of text blocks.

Arguments:

input: A single string or a list of strings to check for missing glyphs.
font: Font to use for checking glyphs (optional).
mode: Determines the omission granularity:
- words: Omit entire words that contain missing glyphs.
- glyphs: Omit only the missing characters, preserving the rest of the text.
debug: If True, log omitted units.

Returns:

Filtered text or list of text blocks with missing characters or words omitted, depending on mode.

def splitStringToSentences(input: str) -> list[str]: View Source

289def splitStringToSentences(input: str) -> list[str]:
290    """
291    Split running text into a list of sentences.
292
293    Args:
294        input: The input text.
295
296    Example:
297        `I am a sentence. I am another one.` => `["I am a sentence.", "I am another one."]`
298    """
299    replacements = [
300        # Newlines with spaces
301        (r"\n", " "),
302        # Multiple spaces to single space
303        (r"\s{2,}", " "),
304    ]
305    for [before, after] in replacements:
306        input = re.sub(rf"{before}", after, input)
307
308    # Skip abbreviations: (F. Elastica), Ficus var. elastica
309    sentenceExp = r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<![A-Z]\.)(?<=\.|\?)\s(?![a-z])"
310    return re.split(sentenceExp, input)

Split running text into a list of sentences.

Arguments:

input: The input text.

Example:

I am a sentence. I am another one. => ["I am a sentence.", "I am another one."]

def rotateList(input: list) -> list: View Source

313def rotateList(input: list) -> list:
314    """
315    Rotate a list to produce all cyclic permutations.
316
317    Args:
318        input: The input list.
319
320    Example:
321        `[A, B, C]` => `[[A, B, C], [B, C, A], [C, A, B]]`
322    """
323    output = []
324
325    for item in input:
326        cycled = cycle(input)
327        skipped = dropwhile(lambda x: x != item, cycled)
328        sliced = islice(skipped, None, len(input))
329
330        output.append(list(sliced))
331
332    return output

Rotate a list to produce all cyclic permutations.

Arguments:

input: The input list.

Example:

[A, B, C] => [[A, B, C], [B, C, A], [C, A, B]]

def chopSequence(input: str | list[str], limit: int = None, glue=' ', split=' '): View Source

335def chopSequence(input: str | list[str], limit: int = None, glue=" ", split=" "):
336    """
337    Split input into meaningful parts, optionally limiting the number of words: `A B C` => `A, AB, ABC`.
338
339    Args:
340        input: String or list of strings to chop.
341        limit: Limit to `n` words.
342        glue: String to join parts.
343        split: String to split input.
344
345    Example:
346    - input: single sentence
347        - `"I was late."` => `["I", "I was", "I was late."]`
348    - input: list of sentences
349        - `["For me.", "Right?"]` => `["For me.", "For me. Right?"]`
350    - limit: 2
351        - `["I", "I was"]`
352    """
353    if split and isinstance(input, str):
354        input = input.split(split)
355
356    inputLen = len(input)
357    # Limit size if provided
358    stop = min(limit, inputLen) if limit else inputLen
359
360    return [glue.join(input[:i]) for i in range(1, stop + 1)]

Split input into meaningful parts, optionally limiting the number of words: A B C => A, AB, ABC.

Arguments:

input: String or list of strings to chop.
limit: Limit to n words.
glue: String to join parts.
split: String to split input.

Example:

input: single sentence
- "I was late." => ["I", "I was", "I was late."]
input: list of sentences
- ["For me.", "Right?"] => ["For me.", "For me. Right?"]
limit: 2
- ["I", "I was"]

def chopList( input: list[str], clamp: int = None, mode: Literal['separate', 'connected'] = 'separate', shuffle=False) -> list[str]: View Source

363def chopList(
364    input: list[str],
365    clamp: int = None,
366    mode: Literal["separate", "connected"] = "separate",
367    shuffle=False,
368) -> list[str]:
369    """
370    Chop a list of sentences into smaller parts, optionally connecting or shuffling them.
371
372    Args:
373        input: List of sentences.
374        clamp: Limit to n words per iteration.
375        mode: "separate" to chop individually, "connected" to connect chopped sentences.
376        shuffle: If True, shuffle input before chopping.
377
378    Example:
379        `["Hello there.", "Hi you."]` =>
380        - (separate) `["Hello", "Hello there.", "Hi", "Hi you."]`
381        - (connected) `["Hello", "Hello there.", "Hello there. Hi", ...]`
382    """
383    if shuffle:
384        random.shuffle(input)
385
386    if mode == "connected":
387        input = [" ".join(item) for item in rotateList(input)]
388
389    return helpers.flatten([chopSequence(item, clamp) for item in input])

Chop a list of sentences into smaller parts, optionally connecting or shuffling them.

Arguments:

input: List of sentences.
clamp: Limit to n words per iteration.
mode: "separate" to chop individually, "connected" to connect chopped sentences.
shuffle: If True, shuffle input before chopping.

Example:

["Hello there.", "Hi you."] =>

(separate) ["Hello", "Hello there.", "Hi", "Hi you."]

(connected) ["Hello", "Hello there.", "Hello there. Hi", ...]

def permutate(input: list, clamp=20, shuffle=True) -> list: View Source

392def permutate(input: list, clamp=20, shuffle=True) -> list:
393    """
394    Permutate and chop a list of sentences into connected sequences.
395
396    Args:
397        input: List of sentences.
398        clamp: Limit to `n` words per sequence.
399        shuffle: If True, shuffle input before permutation.
400
401    Example:
402        - `["Hi Tim", "Foo bar"]` => list of
403        - `["Hi", "Hi Tim", "Hi Tim Foo", ...], ["Foo", "Foo bar", "Foo bar Hi", ...]`
404    """
405    return chopList(input, clamp, "connected", shuffle)

Permutate and chop a list of sentences into connected sequences.

Arguments:

input: List of sentences.
clamp: Limit to n words per sequence.
shuffle: If True, shuffle input before permutation.

Example:

["Hi Tim", "Foo bar"] => list of

["Hi", "Hi Tim", "Hi Tim Foo", ...], ["Foo", "Foo bar", "Foo bar Hi", ...]

def fillTextOver(container: tuple, content: list, shuffle: bool = True) -> str: View Source

408def fillTextOver(container: tuple, content: list, shuffle: bool = True) -> str:
409    """
410    Returns a string that fills the container up to overflow.
411
412    - Font properties need to be already set
413
414    Args:
415        container: Tuple specifying container dimensions.
416        content: List of possible sentences/items.
417        shuffle: If True, shuffle content before filling.
418    """
419    containerW, containerH = layout.toDimensions(container)
420
421    if shuffle:
422        content = helpers.shuffleAtRandomSegment(content)
423
424    strings = []
425
426    for string in content:
427        strings.append(string)
428        stream = " ".join(strings)
429        _, textH = drawBot.textSize(stream, width=containerW)
430        if textH >= containerH:
431            break
432
433    return stream

Returns a string that fills the container up to overflow.

Font properties need to be already set

Arguments:

container: Tuple specifying container dimensions.
content: List of possible sentences/items.
shuffle: If True, shuffle content before filling.

def getStringForWidth(pool: list, width: int, threshold: float = 0.995) -> str: View Source

436def getStringForWidth(pool: list, width: int, threshold: float = 0.995) -> str:
437    """
438    Get a string from the pool that fits within the specified width.
439
440    - Font properties need to be set already
441
442    Args:
443        pool: List of candidate strings.
444        width: Target width.
445        threshold: Minimum width threshold.
446    """
447
448    def _isWidthAppropriate(candidateWidth: int):
449        return minWidth <= candidateWidth <= maxWidth
450
451    minWidth, maxWidth = width * threshold, width
452
453    candidateWidths = []
454    match = None
455
456    for candidate in pool:
457        candidateWidth, _ = drawBot.textSize(candidate)
458        candidateWidths.append(candidateWidth)
459
460        if _isWidthAppropriate(candidateWidth):
461            match = candidate
462            break
463
464    if match:
465        return match
466    else:
467        closestWidth = helpers.findClosestValue(
468            candidateWidths, width, discardLarger=True
469        )
470        i = (
471            candidateWidths.index(closestWidth)
472            if closestWidth in candidateWidths
473            else 0
474        )
475        return pool[i]

Get a string from the pool that fits within the specified width.

Font properties need to be set already

Arguments:

pool: List of candidate strings.
width: Target width.
threshold: Minimum width threshold.

def filterByShape( items: list[str], shape: Union[Literal['descender', 'ascender', 'caps'], list[Literal['descender', 'ascender', 'caps']]]) -> list[str]: View Source

478def filterByShape(items: list[str], shape: WordShape | list[WordShape]) -> list[str]:
479    """
480    Filter words by descender, ascender, or caps shape.
481
482    Args:
483        items: List of words.
484        shape: Shape(s) to filter by.
485
486    Returns:
487        List of words matching the shape criteria.
488
489    Example:
490        - `["hi", "hey"], "descender"` => `["hi"]`
491    """
492
493    def _isNotShaped(pattern: str, item: str):
494        return not bool(re.search(pattern, item))
495
496    def _checkShape(shape):
497        wordShape = wordShapes.get(shape)
498        return filter(lambda item: _isNotShaped(wordShape, item), items)
499
500    wordShapes = dict(caps="[A-Z0-9]", ascender="[bdfihklt]", descender="[Qgjqpy/,]")
501    shapeSubsets = [_checkShape(shape) for shape in helpers.coerceList(shape)]
502    return helpers.intersect(shapeSubsets, retainOrder=False)

Filter words by descender, ascender, or caps shape.

Arguments:

items: List of words.
shape: Shape(s) to filter by.

Returns:

List of words matching the shape criteria.

Example:

["hi", "hey"], "descender" => ["hi"]

def filterByTokens( items: list[str], tokens: list[typing.Literal['word', 'nonword']] = ['word', 'nonword']) -> list[str]: View Source

505def filterByTokens(
506    items: list[str], tokens: list[CharacterToken] = ["word", "nonword"]
507) -> list[str]:
508    """
509    Filter items by Unicode character token.
510
511    Args:
512        items: List of strings to filter.
513        tokens: List of token types to filter by.
514
515    Returns:
516        List of items matching the token criteria.
517
518    Example:
519        - `word`, `nonword` => `["A4", "R&B"]`
520    """
521
522    possiblePatterns = dict(
523        word=r"\p{Letter}", nonword=r"\p{Symbol}|\p{Number}|\p{Punctuation}"
524    )
525    patterns = [possiblePatterns.get(m) for m in tokens]
526
527    def _filterSingleToken(items: list[str], pattern: str):
528        return [item for item in items if bool(regex.search(pattern, item))]
529
530    individual = [_filterSingleToken(items, p) for p in patterns]
531
532    return helpers.intersect(individual)

Filter items by Unicode character token.

Arguments:

items: List of strings to filter.
tokens: List of token types to filter by.

Returns:

List of items matching the token criteria.

Example:

word, nonword => ["A4", "R&B"]

def isRagPretty( content: Union[str, drawBot.context.baseContext.FormattedString], coords: tuple) -> tuple[bool, bool]: View Source

535def isRagPretty(
536    content: Union[str, drawBot.drawBotDrawingTools.FormattedString], coords: tuple
537) -> tuple[bool, bool]:
538    """
539    Evaluate if a paragraph is nicely typeset.
540
541    Args:
542        content: Text content or `FormattedString`.
543        coords: Tuple specifying text box coordinates.
544
545    Returns:
546        Tuple of booleans (isGreat, isOkay).
547        - `isGreat`: All quite long, some very long
548        - `isOkay`: All quite long
549    """
550
551    def _calcLineWidths():
552        """Returns widths for all lines except last and for last line."""
553        textBounds = drawBot.textBoxCharacterBounds(content, coords)
554        linesByY = dict()
555
556        for segment in textBounds:
557            bounds, _, _ = segment
558            _, y, w, _ = bounds
559
560            if not linesByY.get(y):
561                linesByY[y] = 0
562
563            linesByY[y] += w
564
565        last = linesByY.pop(list(linesByY)[-1])
566        return linesByY.values(), last
567
568    try:
569        _, _, width, _ = coords
570        bodyWidths, lastWidth = _calcLineWidths()
571        # All lines are quite long
572        areAllGood = all([w >= width * 0.9 for w in bodyWidths])
573        # A portion of lines are very long
574        areSomeGreat = (
575            len([True for w in bodyWidths if w >= width * 0.95]) >= len(bodyWidths) / 3
576        )
577        # Last line is not longest and not an widow
578        isLastGood = max(bodyWidths) >= lastWidth >= width * 2 / 3
579
580        isOkay = areAllGood and isLastGood
581        # isGreat, isOkay
582        return (isOkay and areSomeGreat), isOkay
583    except Exception as e:
584        logger.warning("Failed isRagPretty: {}", e)
585        return False

Evaluate if a paragraph is nicely typeset.

Arguments:

content: Text content or FormattedString.
coords: Tuple specifying text box coordinates.

Returns:

Tuple of booleans (isGreat, isOkay).

isGreat: All quite long, some very long

isOkay: All quite long

def sanitize(input: list[str]) -> list[str]: View Source

588def sanitize(input: list[str]) -> list[str]:
589    """
590    Filter out explicit content using a prohibited terms list (found in `sanitize-....txt`).
591
592    Args:
593        input: List of strings to filter.
594
595    Returns:
596        Filtered list with inappropriate content removed.
597    """
598    prohibited_terms = (
599        open(
600            "/Users/christianjansky/Library/CloudStorage/Dropbox/KOMETA-Work/40 Scripts/03 DrawBot/01 Content/sanitize-any.txt",
601            encoding="utf-8",
602        )
603        .read()
604        .splitlines()
605    )
606    sanitized = []
607
608    for item in input:
609        is_clean = True
610        item_lower = item.lower()
611
612        for term in prohibited_terms:
613            if term.lower().strip() and term.lower() in item_lower:
614                is_clean = False
615                break
616
617        if is_clean:
618            sanitized.append(item)
619
620    return sanitized

Filter out explicit content using a prohibited terms list (found in sanitize-....txt).

Arguments:

input: List of strings to filter.

Returns:

Filtered list with inappropriate content removed.