lib.content

View Source

  1import re
  2import drawBot
  3import random
  4from typing import Literal, TypeAlias, Union
  5from itertools import cycle, islice, dropwhile
  6from string import capwords
  7import regex
  8from caseconverter import camelcase, kebabcase, snakecase, pascalcase
  9from loguru import logger
 10from icecream import ic
 11
 12from lib import helpers, layout, fonts
 13
 14WordShape: TypeAlias = Literal["descender", "ascender", "caps"]
 15"""Categories to filter words by their shape.
 16
 17See `classes.c32_pool.KPool.getItemByWidth` for usage.
 18"""
 19
 20TextCase: TypeAlias = Literal["UPPER", "lower", "Title", "Caps"]
 21"""Defines the supported text casing styles for conversion.
 22
 23- `UPPER`: Converts all letters to uppercase: `hi ibm` → `HI IBM`
 24- `lower`: Converts all letters to lowercase: `HI IBM` → `hi ibm`
 25- `Title`: Capitalizes the first letter of each word, lowercases the rest: `hi ibm` → `Hi Ibm`
 26- `Caps`: Like Title, but preserves acronyms in uppercase: `hi ibm USA` → `Hi Ibm USA`
 27"""
 28
 29CharacterToken: TypeAlias = Literal["word", "nonword"]
 30"""Type alias for character token types.
 31
 32See `filterByTokens` for usage.
 33"""
 34
 35
 36# ? Common words that should be lowercased in title case (unless at the start/end)
 37commonWords: list[str] = [
 38    "a",
 39    "an",
 40    "the",
 41    "as",
 42    "is",
 43    "are",
 44    "and",
 45    "but",
 46    "at",
 47    "in",
 48    "on",
 49    "of",
 50    "for",
 51    "by",
 52    "to",
 53]
 54"""List of common words to be lowercased in title case (unless at start/end)."""
 55
 56
 57def toPascalCase(input: str, space: bool = True) -> str:
 58    """Convert string to PascalCase, optionally inserting spaces between words and numbers.
 59
 60    Args:
 61        input: The input string.
 62        space: If True, insert spaces between (default: True).
 63
 64    Example:
 65        `One123Four` => `One 123 Four`
 66    """
 67    # ? Not using caseconverter.pascalcase() because it alters allcaps words
 68    separator = " " if space else ""
 69    # A Aa, a A, A 0
 70    expressions = ["([A-Z0-9])([A-Z][a-z])", "([a-z])([A-Z])", "([A-Za-z])([0-9])"]
 71    for exp in expressions:
 72        input = re.sub(rf"{exp}", rf"\1{separator}\2", input)
 73    return input[:1].upper() + input[1:]
 74
 75
 76def toCamelCase(input: str):
 77    """Convert string to camelCase.
 78
 79    Example:
 80        `Hello World` => `helloWorld`
 81    """
 82    return camelcase(input)
 83
 84
 85def toKebabCase(input: str) -> str:
 86    """Convert string to kebab-case.
 87
 88    Example:
 89        `Hello World` => `hello-world`
 90    """
 91    return kebabcase(input)
 92
 93
 94def toSnakeCase(input: str) -> str:
 95    """Convert string to snake_case.
 96
 97    Example:
 98        `Hello World` => `hello_world`
 99    """
100    return snakecase(input)
101
102
103def toTitleCase(input: str, retainUpper: bool = True) -> str:
104    """
105    Convert string to title case, handling special cases and acronyms.
106
107    Args:
108        input: The input string.
109        retainUpper: If True, retain uppercase acronyms: `True` USA, `False` Usa
110
111    Returns:
112        Title-cased string.
113
114    Example:
115        `sON Of The USA` => `Son of the USA`
116    """
117    specialChars: list[str] = ["-", "/"]
118
119    def _hasSpecialChars(word: str) -> bool:
120        """Returns True if word contains special characters."""
121        return any(char in word for char in specialChars)
122
123    def _handleSpecialChars(word: str) -> str:
124        """Apply title case to each part of a word split by special characters."""
125        for char in specialChars:
126            if char in word:
127                # ? Split by special char and apply title case to each part
128                parts = word.split(char)
129                return char.join([capwords(part) for part in parts])
130
131    def _processWord(word: str) -> str:
132        """Process a single word for title casing."""
133        # ? Always lowercase common words in continuous text
134        isOnEitherSide = helpers.isFirst(words, word) or helpers.isLast(words, word)
135        isCommon = word.casefold() in commonWords
136        if isCommon and not isOnEitherSide:
137            return word.lower()
138
139        # ? Handle special characters
140        if _hasSpecialChars(word):
141            return _handleSpecialChars(word)
142
143        # Uppercase and punctuation 2+ times
144        isCaps = regex.match(r"[\p{Lu}|\p{P}]{2,}", word)
145
146        # capwords() better .title() => retains lowercase ’s
147        return word if isCaps and retainUpper else capwords(word)
148
149    words = input.split(" ")
150    words = [_processWord(word) for word in words]
151    return " ".join(words)
152
153
154def changeCase(
155    input: list[str] | str,
156    case: TextCase = "Title",
157) -> list[str] | str:
158    """
159    Change the case of a string or list of strings.
160
161    Args:
162        input: String or list of strings to change case.
163        case: Desired case ("upper", "lower", "Title", "Caps").
164
165    Returns:
166        String or list of strings with changed case.
167
168    Example:
169        `the USA`
170        - `upper` => `THE USA`
171        - `lower` => `the usa`
172        - `Title` => `The Usa`
173        - `Caps`  => `The USA`
174    """
175
176    def _change(item):
177        if case.casefold() == "upper":
178            return item.upper()
179        elif case.casefold() == "lower":
180            return item.lower()
181        elif case.casefold() == "title":
182            return toTitleCase(item, False)
183        elif case.casefold() == "caps":
184            return toTitleCase(item)
185        else:
186            logger.warning("Unable to change case: {}", case)
187
188    if not case:
189        return input  # Pass through unchanged
190
191    if isinstance(input, list):
192        return [_change(item) for item in input]
193    else:
194        return _change(input)
195
196
197def isTitleCase(input: str) -> bool:
198    """Returns True if all words in the string are title case."""
199    return all([regex.match(r"^[\p{Lu}][\p{Ll}]+$", part) for part in input.split(" ")])
200
201
202def prettifyText(text: str) -> str:
203    """
204    Removes Wikipedia formatting tokens, extra whitespace and dumb quotes.
205
206    Args:
207        text: The input text.
208
209    Returns:
210        Prettified text.
211    """
212    removals = [
213        # Hair space
214        r" ",
215        # Remove citation references
216        r"\[[A-Za-z\d]+\](?::?\d+(?:[-–]\d+)?)?",
217        # Remove [citation needed]
218        r"\[citation needed\]",
219        r"\[clarification needed\]",
220        r"\[note [\d+]]",
221        r"\[NB [\d+]]",
222    ]
223    for removal in removals:
224        text = re.sub(rf"{removal}", "", text)
225
226    replacements = [
227        # Multiple spaces to single space
228        (r"[ ]{2,}", " "),
229        # Replace dumb single quotes
230        (r"\'([A-Za-z]+)\'", r"‘\1’"),
231        # Replace dumb single quotes used as contractions: it's => it’s
232        (r"([A-Za-z])\'([A-Za-z])?", r"\1’\2"),
233        # Replace dumb double quotes
234        (r"(\s?|^)\"([^\"]+)\"", r"\1“\2”"),
235        # Add missing space before ( { [ in text
236        (r"(\w)(\(|\[|\{)", r"\1 \2"),
237        # Add missing space after ) } ] in text
238        (r"(\)|\]|\})(\w)", r"\1 \2"),
239    ]
240
241    for before, after in replacements:
242        text = re.sub(before, after, text)
243
244    return text.strip()
245
246
247def omitMissing(
248    input: str | list[str],
249    font: str = None,
250    mode: Literal["glyphs", "words", "lines"] = "words",
251    debug=False,
252):
253    """
254    Omit missing characters from text or list of text blocks.
255
256    Args:
257        input: A single string or a list of strings to check for missing glyphs.
258        font: Font to use for checking glyphs (optional).
259        mode: Determines the omission granularity:
260            - `glyphs`: Omit only the missing characters, preserving the rest of the text.
261            - `words`: Omit entire words that contain missing glyphs.
262            - `lines`: Omit entire lines that contain missing glyphs.
263        debug: If True, log omitted units.
264
265    Returns:
266        Filtered text or list of text blocks with missing data omitted, depending on mode.
267    """
268    if font:
269        drawBot.font(font)
270
271    isInputString = isinstance(input, str)
272    match (mode):
273        case "glyphs":
274            glue = ""
275        case "words":
276            glue = " "
277        case "lines":
278            glue = "\n"
279    blocks = [input] if isInputString else input
280
281    output = []
282    for block in blocks:
283        units = list(block) if mode == "glyphs" else block.split(glue)
284        filtered = [unit for unit in units if drawBot.fontContainsCharacters(unit)]
285
286        if debug:
287            # Log omitted fontName if available
288            logMessage = lambda unit: (
289                ("[Omitted] {} for {}", unit, fonts.getFontName(font))
290                if font
291                else ("[Omitted] {}", unit)
292            )
293            [
294                logger.trace(*logMessage(unit))
295                for unit in units
296                if not drawBot.fontContainsCharacters(unit)
297            ]
298
299        # Do not add empty list
300        if filtered:
301            output.append(glue.join(filtered))
302
303    return glue.join(output) if isInputString else output
304
305
306def splitStringToSentences(input: str) -> list[str]:
307    """
308    Split running text into a list of sentences.
309
310    Args:
311        input: The input text.
312
313    Example:
314        `I am a sentence. I am another one.` => `["I am a sentence.", "I am another one."]`
315    """
316    replacements = [
317        # Newlines with spaces
318        (r"\n", " "),
319        # Multiple spaces to single space
320        (r"\s{2,}", " "),
321    ]
322    for [before, after] in replacements:
323        input = re.sub(rf"{before}", after, input)
324
325    # Skip abbreviations: (F. Elastica), Ficus var. elastica
326    sentenceExp = r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<![A-Z]\.)(?<=\.|\?)\s(?![a-z])"
327    return re.split(sentenceExp, input)
328
329
330def rotateList(input: list) -> list:
331    """
332    Rotate a list to produce all cyclic permutations.
333
334    Args:
335        input: The input list.
336
337    Example:
338        `[A, B, C]` => `[[A, B, C], [B, C, A], [C, A, B]]`
339    """
340    output = []
341
342    for item in input:
343        cycled = cycle(input)
344        skipped = dropwhile(lambda x: x != item, cycled)
345        sliced = islice(skipped, None, len(input))
346
347        output.append(list(sliced))
348
349    return output
350
351
352def chopSequence(input: str | list[str], limit: int = None, glue=" ", split=" "):
353    """
354    Split input into meaningful parts, optionally limiting the number of words: `A B C` => `A, AB, ABC`.
355
356    Args:
357        input: String or list of strings to chop.
358        limit: Limit to `n` words.
359        glue: String to join parts.
360        split: String to split input.
361
362    Example:
363    - input: single sentence
364        - `"I was late."` => `["I", "I was", "I was late."]`
365    - input: list of sentences
366        - `["For me.", "Right?"]` => `["For me.", "For me. Right?"]`
367    - limit: 2
368        - `["I", "I was"]`
369    """
370    if split and isinstance(input, str):
371        input = input.split(split)
372
373    inputLen = len(input)
374    # Limit size if provided
375    stop = min(limit, inputLen) if limit else inputLen
376
377    return [glue.join(input[:i]) for i in range(1, stop + 1)]
378
379
380def chopList(
381    input: list[str],
382    clamp: int = None,
383    mode: Literal["separate", "connected"] = "separate",
384    shuffle=False,
385) -> list[str]:
386    """
387    Chop a list of sentences into smaller parts, optionally connecting or shuffling them.
388
389    Args:
390        input: List of sentences.
391        clamp: Limit to n words per iteration.
392        mode: "separate" to chop individually, "connected" to connect chopped sentences.
393        shuffle: If True, shuffle input before chopping.
394
395    Example:
396        `["Hello there.", "Hi you."]` =>
397        - (separate) `["Hello", "Hello there.", "Hi", "Hi you."]`
398        - (connected) `["Hello", "Hello there.", "Hello there. Hi", ...]`
399    """
400    if shuffle:
401        random.shuffle(input)
402
403    if mode == "connected":
404        input = [" ".join(item) for item in rotateList(input)]
405
406    return helpers.flatten([chopSequence(item, clamp) for item in input])
407
408
409def permutate(input: list, clamp=20, shuffle=True) -> list:
410    """
411    Permutate and chop a list of sentences into connected sequences.
412
413    Args:
414        input: List of sentences.
415        clamp: Limit to `n` words per sequence.
416        shuffle: If True, shuffle input before permutation.
417
418    Example:
419        - `["Hi Tim", "Foo bar"]` => list of
420        - `["Hi", "Hi Tim", "Hi Tim Foo", ...], ["Foo", "Foo bar", "Foo bar Hi", ...]`
421    """
422    return chopList(input, clamp, "connected", shuffle)
423
424
425def fillTextOver(container: tuple, content: list, shuffle: bool = True) -> str:
426    """
427    Returns a string that fills the container up to overflow.
428
429    - Font properties need to be already set
430
431    Args:
432        container: Tuple specifying container dimensions.
433        content: List of possible sentences/items.
434        shuffle: If True, shuffle content before filling.
435    """
436    containerW, containerH = layout.toDimensions(container)
437
438    if shuffle:
439        content = helpers.shuffleAtRandomSegment(content)
440
441    strings = []
442
443    for string in content:
444        strings.append(string)
445        stream = " ".join(strings)
446        _, textH = drawBot.textSize(stream, width=containerW)
447        if textH >= containerH:
448            break
449
450    return stream
451
452
453def getStringForWidth(pool: list, width: int, threshold: float = 0.995) -> str:
454    """
455    Get a string from the pool that fits within the specified width.
456
457    - Font properties need to be set already
458
459    Args:
460        pool: List of candidate strings.
461        width: Target width.
462        threshold: Minimum width threshold.
463    """
464
465    def _isWidthAppropriate(candidateWidth: int):
466        return minWidth <= candidateWidth <= maxWidth
467
468    minWidth, maxWidth = width * threshold, width
469
470    candidateWidths = []
471    match = None
472
473    for candidate in pool:
474        candidateWidth, _ = drawBot.textSize(candidate)
475        candidateWidths.append(candidateWidth)
476
477        if _isWidthAppropriate(candidateWidth):
478            match = candidate
479            break
480
481    if match:
482        return match
483    else:
484        closestWidth = helpers.findClosestValue(
485            candidateWidths, width, discardLarger=True
486        )
487        i = (
488            candidateWidths.index(closestWidth)
489            if closestWidth in candidateWidths
490            else 0
491        )
492        return pool[i]
493
494
495def filterByShape(items: list[str], shape: WordShape | list[WordShape]) -> list[str]:
496    """
497    Filter words by descender, ascender, or caps shape.
498
499    Args:
500        items: List of words.
501        shape: Shape(s) to filter by.
502
503    Returns:
504        List of words matching the shape criteria.
505
506    Example:
507        - `["hi", "hey"], "descender"` => `["hi"]`
508    """
509
510    def _isNotShaped(pattern: str, item: str):
511        return not bool(re.search(pattern, item))
512
513    def _checkShape(shape):
514        wordShape = wordShapes.get(shape)
515        return filter(lambda item: _isNotShaped(wordShape, item), items)
516
517    wordShapes = dict(caps="[A-Z0-9]", ascender="[bdfihklt]", descender="[Qgjqpy/,]")
518    shapeSubsets = [_checkShape(shape) for shape in helpers.coerceList(shape)]
519    return helpers.intersect(shapeSubsets, retainOrder=False)
520
521
522def filterByTokens(
523    items: list[str], tokens: list[CharacterToken] = ["word", "nonword"]
524) -> list[str]:
525    """
526    Filter items by Unicode character token.
527
528    Args:
529        items: List of strings to filter.
530        tokens: List of token types to filter by.
531
532    Returns:
533        List of items matching the token criteria.
534
535    Example:
536        - `word`, `nonword` => `["A4", "R&B"]`
537    """
538
539    possiblePatterns = dict(
540        word=r"\p{Letter}", nonword=r"\p{Symbol}|\p{Number}|\p{Punctuation}"
541    )
542    patterns = [possiblePatterns.get(m) for m in tokens]
543
544    def _filterSingleToken(items: list[str], pattern: str):
545        return [item for item in items if bool(regex.search(pattern, item))]
546
547    individual = [_filterSingleToken(items, p) for p in patterns]
548
549    return helpers.intersect(individual)
550
551
552def isRagPretty(
553    content: Union[str, drawBot.drawBotDrawingTools.FormattedString], coords: tuple
554) -> tuple[bool, bool]:
555    """
556    Evaluate if a paragraph is nicely typeset.
557
558    Args:
559        content: Text content or `FormattedString`.
560        coords: Tuple specifying text box coordinates.
561
562    Returns:
563        Tuple of booleans (isGreat, isOkay).
564        - `isGreat`: All quite long, some very long
565        - `isOkay`: All quite long
566    """
567
568    def _calcLineWidths():
569        """Returns widths for all lines except last and for last line."""
570        textBounds = drawBot.textBoxCharacterBounds(content, coords)
571        linesByY = dict()
572
573        for segment in textBounds:
574            bounds, _, _ = segment
575            _, y, w, _ = bounds
576
577            if not linesByY.get(y):
578                linesByY[y] = 0
579
580            linesByY[y] += w
581
582        last = linesByY.pop(list(linesByY)[-1])
583        return linesByY.values(), last
584
585    try:
586        _, _, width, _ = coords
587        bodyWidths, lastWidth = _calcLineWidths()
588        # All lines are quite long
589        areAllGood = all([w >= width * 0.9 for w in bodyWidths])
590        # A portion of lines are very long
591        areSomeGreat = (
592            len([True for w in bodyWidths if w >= width * 0.95]) >= len(bodyWidths) / 3
593        )
594        # Last line is not longest and not an widow
595        isLastGood = max(bodyWidths) >= lastWidth >= width * 2 / 3
596
597        isOkay = areAllGood and isLastGood
598        # isGreat, isOkay
599        return (isOkay and areSomeGreat), isOkay
600    except Exception as e:
601        logger.warning("Failed isRagPretty: {}", e)
602        return False
603
604
605def filterForbidden(input: list[str]) -> list[str]:
606    """
607    Filter out explicit content using a prohibited terms list (found in `forbidden-....txt`).
608
609    Args:
610        input: List of strings to filter.
611
612    Returns:
613        Filtered list with inappropriate content removed.
614    """
615    with open(
616        "/Users/christianjansky/Library/CloudStorage/Dropbox/KOMETA-Draw/01 Content/forbidden-any.txt",
617        encoding="utf-8",
618    ) as f:
619        prohibited_terms = f.read().splitlines()
620    clean = []
621
622    for item in input:
623        is_clean = True
624        item_lower = item.lower()
625
626        for term in prohibited_terms:
627            if term.lower().strip() and term.lower() in item_lower:
628                is_clean = False
629                break
630
631        if is_clean:
632            clean.append(item)
633
634    return clean

WordShape: TypeAlias = Literal['descender', 'ascender', 'caps']

Categories to filter words by their shape.

See classes.c32_pool.KPool.getItemByWidth for usage.

TextCase: TypeAlias = Literal['UPPER', 'lower', 'Title', 'Caps']

Defines the supported text casing styles for conversion.

UPPER: Converts all letters to uppercase: hi ibm → HI IBM
lower: Converts all letters to lowercase: HI IBM → hi ibm
Title: Capitalizes the first letter of each word, lowercases the rest: hi ibm → Hi Ibm
Caps: Like Title, but preserves acronyms in uppercase: hi ibm USA → Hi Ibm USA

CharacterToken: TypeAlias = Literal['word', 'nonword']

Type alias for character token types.

See filterByTokens for usage.

commonWords: list[str] = ['a', 'an', 'the', 'as', 'is', 'are', 'and', 'but', 'at', 'in', 'on', 'of', 'for', 'by', 'to']

List of common words to be lowercased in title case (unless at start/end).

def toPascalCase(input: str, space: bool = True) -> str: View Source

58def toPascalCase(input: str, space: bool = True) -> str:
59    """Convert string to PascalCase, optionally inserting spaces between words and numbers.
60
61    Args:
62        input: The input string.
63        space: If True, insert spaces between (default: True).
64
65    Example:
66        `One123Four` => `One 123 Four`
67    """
68    # ? Not using caseconverter.pascalcase() because it alters allcaps words
69    separator = " " if space else ""
70    # A Aa, a A, A 0
71    expressions = ["([A-Z0-9])([A-Z][a-z])", "([a-z])([A-Z])", "([A-Za-z])([0-9])"]
72    for exp in expressions:
73        input = re.sub(rf"{exp}", rf"\1{separator}\2", input)
74    return input[:1].upper() + input[1:]

Convert string to PascalCase, optionally inserting spaces between words and numbers.

Arguments:

input: The input string.
space: If True, insert spaces between (default: True).

Example:

One123Four => One 123 Four

def toCamelCase(input: str): View Source

77def toCamelCase(input: str):
78    """Convert string to camelCase.
79
80    Example:
81        `Hello World` => `helloWorld`
82    """
83    return camelcase(input)

Convert string to camelCase.

Example:

Hello World => helloWorld

def toKebabCase(input: str) -> str: View Source

86def toKebabCase(input: str) -> str:
87    """Convert string to kebab-case.
88
89    Example:
90        `Hello World` => `hello-world`
91    """
92    return kebabcase(input)

Convert string to kebab-case.

Example:

Hello World => hello-world

def toSnakeCase(input: str) -> str: View Source

 95def toSnakeCase(input: str) -> str:
 96    """Convert string to snake_case.
 97
 98    Example:
 99        `Hello World` => `hello_world`
100    """
101    return snakecase(input)

Convert string to snake_case.

Example:

Hello World => hello_world

def toTitleCase(input: str, retainUpper: bool = True) -> str: View Source

104def toTitleCase(input: str, retainUpper: bool = True) -> str:
105    """
106    Convert string to title case, handling special cases and acronyms.
107
108    Args:
109        input: The input string.
110        retainUpper: If True, retain uppercase acronyms: `True` USA, `False` Usa
111
112    Returns:
113        Title-cased string.
114
115    Example:
116        `sON Of The USA` => `Son of the USA`
117    """
118    specialChars: list[str] = ["-", "/"]
119
120    def _hasSpecialChars(word: str) -> bool:
121        """Returns True if word contains special characters."""
122        return any(char in word for char in specialChars)
123
124    def _handleSpecialChars(word: str) -> str:
125        """Apply title case to each part of a word split by special characters."""
126        for char in specialChars:
127            if char in word:
128                # ? Split by special char and apply title case to each part
129                parts = word.split(char)
130                return char.join([capwords(part) for part in parts])
131
132    def _processWord(word: str) -> str:
133        """Process a single word for title casing."""
134        # ? Always lowercase common words in continuous text
135        isOnEitherSide = helpers.isFirst(words, word) or helpers.isLast(words, word)
136        isCommon = word.casefold() in commonWords
137        if isCommon and not isOnEitherSide:
138            return word.lower()
139
140        # ? Handle special characters
141        if _hasSpecialChars(word):
142            return _handleSpecialChars(word)
143
144        # Uppercase and punctuation 2+ times
145        isCaps = regex.match(r"[\p{Lu}|\p{P}]{2,}", word)
146
147        # capwords() better .title() => retains lowercase ’s
148        return word if isCaps and retainUpper else capwords(word)
149
150    words = input.split(" ")
151    words = [_processWord(word) for word in words]
152    return " ".join(words)

Convert string to title case, handling special cases and acronyms.

Arguments:

input: The input string.
retainUpper: If True, retain uppercase acronyms: True USA, False Usa

Returns:

Title-cased string.

Example:

sON Of The USA => Son of the USA

def changeCase( input: list[str] | str, case: Literal['UPPER', 'lower', 'Title', 'Caps'] = 'Title') -> list[str] | str: View Source

155def changeCase(
156    input: list[str] | str,
157    case: TextCase = "Title",
158) -> list[str] | str:
159    """
160    Change the case of a string or list of strings.
161
162    Args:
163        input: String or list of strings to change case.
164        case: Desired case ("upper", "lower", "Title", "Caps").
165
166    Returns:
167        String or list of strings with changed case.
168
169    Example:
170        `the USA`
171        - `upper` => `THE USA`
172        - `lower` => `the usa`
173        - `Title` => `The Usa`
174        - `Caps`  => `The USA`
175    """
176
177    def _change(item):
178        if case.casefold() == "upper":
179            return item.upper()
180        elif case.casefold() == "lower":
181            return item.lower()
182        elif case.casefold() == "title":
183            return toTitleCase(item, False)
184        elif case.casefold() == "caps":
185            return toTitleCase(item)
186        else:
187            logger.warning("Unable to change case: {}", case)
188
189    if not case:
190        return input  # Pass through unchanged
191
192    if isinstance(input, list):
193        return [_change(item) for item in input]
194    else:
195        return _change(input)

Change the case of a string or list of strings.

Arguments:

input: String or list of strings to change case.
case: Desired case ("upper", "lower", "Title", "Caps").

Returns:

String or list of strings with changed case.

Example:

the USA

upper => THE USA

lower => the usa

Title => The Usa

Caps => The USA

def isTitleCase(input: str) -> bool: View Source

198def isTitleCase(input: str) -> bool:
199    """Returns True if all words in the string are title case."""
200    return all([regex.match(r"^[\p{Lu}][\p{Ll}]+$", part) for part in input.split(" ")])

Returns True if all words in the string are title case.

def prettifyText(text: str) -> str: View Source

203def prettifyText(text: str) -> str:
204    """
205    Removes Wikipedia formatting tokens, extra whitespace and dumb quotes.
206
207    Args:
208        text: The input text.
209
210    Returns:
211        Prettified text.
212    """
213    removals = [
214        # Hair space
215        r" ",
216        # Remove citation references
217        r"\[[A-Za-z\d]+\](?::?\d+(?:[-–]\d+)?)?",
218        # Remove [citation needed]
219        r"\[citation needed\]",
220        r"\[clarification needed\]",
221        r"\[note [\d+]]",
222        r"\[NB [\d+]]",
223    ]
224    for removal in removals:
225        text = re.sub(rf"{removal}", "", text)
226
227    replacements = [
228        # Multiple spaces to single space
229        (r"[ ]{2,}", " "),
230        # Replace dumb single quotes
231        (r"\'([A-Za-z]+)\'", r"‘\1’"),
232        # Replace dumb single quotes used as contractions: it's => it’s
233        (r"([A-Za-z])\'([A-Za-z])?", r"\1’\2"),
234        # Replace dumb double quotes
235        (r"(\s?|^)\"([^\"]+)\"", r"\1“\2”"),
236        # Add missing space before ( { [ in text
237        (r"(\w)(\(|\[|\{)", r"\1 \2"),
238        # Add missing space after ) } ] in text
239        (r"(\)|\]|\})(\w)", r"\1 \2"),
240    ]
241
242    for before, after in replacements:
243        text = re.sub(before, after, text)
244
245    return text.strip()

Removes Wikipedia formatting tokens, extra whitespace and dumb quotes.

Arguments:

text: The input text.

Returns:

Prettified text.

def omitMissing( input: str | list[str], font: str = None, mode: Literal['glyphs', 'words', 'lines'] = 'words', debug=False): View Source

248def omitMissing(
249    input: str | list[str],
250    font: str = None,
251    mode: Literal["glyphs", "words", "lines"] = "words",
252    debug=False,
253):
254    """
255    Omit missing characters from text or list of text blocks.
256
257    Args:
258        input: A single string or a list of strings to check for missing glyphs.
259        font: Font to use for checking glyphs (optional).
260        mode: Determines the omission granularity:
261            - `glyphs`: Omit only the missing characters, preserving the rest of the text.
262            - `words`: Omit entire words that contain missing glyphs.
263            - `lines`: Omit entire lines that contain missing glyphs.
264        debug: If True, log omitted units.
265
266    Returns:
267        Filtered text or list of text blocks with missing data omitted, depending on mode.
268    """
269    if font:
270        drawBot.font(font)
271
272    isInputString = isinstance(input, str)
273    match (mode):
274        case "glyphs":
275            glue = ""
276        case "words":
277            glue = " "
278        case "lines":
279            glue = "\n"
280    blocks = [input] if isInputString else input
281
282    output = []
283    for block in blocks:
284        units = list(block) if mode == "glyphs" else block.split(glue)
285        filtered = [unit for unit in units if drawBot.fontContainsCharacters(unit)]
286
287        if debug:
288            # Log omitted fontName if available
289            logMessage = lambda unit: (
290                ("[Omitted] {} for {}", unit, fonts.getFontName(font))
291                if font
292                else ("[Omitted] {}", unit)
293            )
294            [
295                logger.trace(*logMessage(unit))
296                for unit in units
297                if not drawBot.fontContainsCharacters(unit)
298            ]
299
300        # Do not add empty list
301        if filtered:
302            output.append(glue.join(filtered))
303
304    return glue.join(output) if isInputString else output

Omit missing characters from text or list of text blocks.

Arguments:

input: A single string or a list of strings to check for missing glyphs.
font: Font to use for checking glyphs (optional).
mode: Determines the omission granularity:
- glyphs: Omit only the missing characters, preserving the rest of the text.
- words: Omit entire words that contain missing glyphs.
- lines: Omit entire lines that contain missing glyphs.
debug: If True, log omitted units.

Returns:

Filtered text or list of text blocks with missing data omitted, depending on mode.

def splitStringToSentences(input: str) -> list[str]: View Source

307def splitStringToSentences(input: str) -> list[str]:
308    """
309    Split running text into a list of sentences.
310
311    Args:
312        input: The input text.
313
314    Example:
315        `I am a sentence. I am another one.` => `["I am a sentence.", "I am another one."]`
316    """
317    replacements = [
318        # Newlines with spaces
319        (r"\n", " "),
320        # Multiple spaces to single space
321        (r"\s{2,}", " "),
322    ]
323    for [before, after] in replacements:
324        input = re.sub(rf"{before}", after, input)
325
326    # Skip abbreviations: (F. Elastica), Ficus var. elastica
327    sentenceExp = r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<![A-Z]\.)(?<=\.|\?)\s(?![a-z])"
328    return re.split(sentenceExp, input)

Split running text into a list of sentences.

Arguments:

input: The input text.

Example:

I am a sentence. I am another one. => ["I am a sentence.", "I am another one."]

def rotateList(input: list) -> list: View Source

331def rotateList(input: list) -> list:
332    """
333    Rotate a list to produce all cyclic permutations.
334
335    Args:
336        input: The input list.
337
338    Example:
339        `[A, B, C]` => `[[A, B, C], [B, C, A], [C, A, B]]`
340    """
341    output = []
342
343    for item in input:
344        cycled = cycle(input)
345        skipped = dropwhile(lambda x: x != item, cycled)
346        sliced = islice(skipped, None, len(input))
347
348        output.append(list(sliced))
349
350    return output

Rotate a list to produce all cyclic permutations.

Arguments:

input: The input list.

Example:

[A, B, C] => [[A, B, C], [B, C, A], [C, A, B]]

def chopSequence(input: str | list[str], limit: int = None, glue=' ', split=' '): View Source

353def chopSequence(input: str | list[str], limit: int = None, glue=" ", split=" "):
354    """
355    Split input into meaningful parts, optionally limiting the number of words: `A B C` => `A, AB, ABC`.
356
357    Args:
358        input: String or list of strings to chop.
359        limit: Limit to `n` words.
360        glue: String to join parts.
361        split: String to split input.
362
363    Example:
364    - input: single sentence
365        - `"I was late."` => `["I", "I was", "I was late."]`
366    - input: list of sentences
367        - `["For me.", "Right?"]` => `["For me.", "For me. Right?"]`
368    - limit: 2
369        - `["I", "I was"]`
370    """
371    if split and isinstance(input, str):
372        input = input.split(split)
373
374    inputLen = len(input)
375    # Limit size if provided
376    stop = min(limit, inputLen) if limit else inputLen
377
378    return [glue.join(input[:i]) for i in range(1, stop + 1)]

Split input into meaningful parts, optionally limiting the number of words: A B C => A, AB, ABC.

Arguments:

input: String or list of strings to chop.
limit: Limit to n words.
glue: String to join parts.
split: String to split input.

Example:

input: single sentence
- "I was late." => ["I", "I was", "I was late."]
input: list of sentences
- ["For me.", "Right?"] => ["For me.", "For me. Right?"]
limit: 2
- ["I", "I was"]

def chopList( input: list[str], clamp: int = None, mode: Literal['separate', 'connected'] = 'separate', shuffle=False) -> list[str]: View Source

381def chopList(
382    input: list[str],
383    clamp: int = None,
384    mode: Literal["separate", "connected"] = "separate",
385    shuffle=False,
386) -> list[str]:
387    """
388    Chop a list of sentences into smaller parts, optionally connecting or shuffling them.
389
390    Args:
391        input: List of sentences.
392        clamp: Limit to n words per iteration.
393        mode: "separate" to chop individually, "connected" to connect chopped sentences.
394        shuffle: If True, shuffle input before chopping.
395
396    Example:
397        `["Hello there.", "Hi you."]` =>
398        - (separate) `["Hello", "Hello there.", "Hi", "Hi you."]`
399        - (connected) `["Hello", "Hello there.", "Hello there. Hi", ...]`
400    """
401    if shuffle:
402        random.shuffle(input)
403
404    if mode == "connected":
405        input = [" ".join(item) for item in rotateList(input)]
406
407    return helpers.flatten([chopSequence(item, clamp) for item in input])

Chop a list of sentences into smaller parts, optionally connecting or shuffling them.

Arguments:

input: List of sentences.
clamp: Limit to n words per iteration.
mode: "separate" to chop individually, "connected" to connect chopped sentences.
shuffle: If True, shuffle input before chopping.

Example:

["Hello there.", "Hi you."] =>

(separate) ["Hello", "Hello there.", "Hi", "Hi you."]

(connected) ["Hello", "Hello there.", "Hello there. Hi", ...]

def permutate(input: list, clamp=20, shuffle=True) -> list: View Source

410def permutate(input: list, clamp=20, shuffle=True) -> list:
411    """
412    Permutate and chop a list of sentences into connected sequences.
413
414    Args:
415        input: List of sentences.
416        clamp: Limit to `n` words per sequence.
417        shuffle: If True, shuffle input before permutation.
418
419    Example:
420        - `["Hi Tim", "Foo bar"]` => list of
421        - `["Hi", "Hi Tim", "Hi Tim Foo", ...], ["Foo", "Foo bar", "Foo bar Hi", ...]`
422    """
423    return chopList(input, clamp, "connected", shuffle)

Permutate and chop a list of sentences into connected sequences.

Arguments:

input: List of sentences.
clamp: Limit to n words per sequence.
shuffle: If True, shuffle input before permutation.

Example:

["Hi Tim", "Foo bar"] => list of

["Hi", "Hi Tim", "Hi Tim Foo", ...], ["Foo", "Foo bar", "Foo bar Hi", ...]

def fillTextOver(container: tuple, content: list, shuffle: bool = True) -> str: View Source

426def fillTextOver(container: tuple, content: list, shuffle: bool = True) -> str:
427    """
428    Returns a string that fills the container up to overflow.
429
430    - Font properties need to be already set
431
432    Args:
433        container: Tuple specifying container dimensions.
434        content: List of possible sentences/items.
435        shuffle: If True, shuffle content before filling.
436    """
437    containerW, containerH = layout.toDimensions(container)
438
439    if shuffle:
440        content = helpers.shuffleAtRandomSegment(content)
441
442    strings = []
443
444    for string in content:
445        strings.append(string)
446        stream = " ".join(strings)
447        _, textH = drawBot.textSize(stream, width=containerW)
448        if textH >= containerH:
449            break
450
451    return stream

Returns a string that fills the container up to overflow.

Font properties need to be already set

Arguments:

container: Tuple specifying container dimensions.
content: List of possible sentences/items.
shuffle: If True, shuffle content before filling.

def getStringForWidth(pool: list, width: int, threshold: float = 0.995) -> str: View Source

454def getStringForWidth(pool: list, width: int, threshold: float = 0.995) -> str:
455    """
456    Get a string from the pool that fits within the specified width.
457
458    - Font properties need to be set already
459
460    Args:
461        pool: List of candidate strings.
462        width: Target width.
463        threshold: Minimum width threshold.
464    """
465
466    def _isWidthAppropriate(candidateWidth: int):
467        return minWidth <= candidateWidth <= maxWidth
468
469    minWidth, maxWidth = width * threshold, width
470
471    candidateWidths = []
472    match = None
473
474    for candidate in pool:
475        candidateWidth, _ = drawBot.textSize(candidate)
476        candidateWidths.append(candidateWidth)
477
478        if _isWidthAppropriate(candidateWidth):
479            match = candidate
480            break
481
482    if match:
483        return match
484    else:
485        closestWidth = helpers.findClosestValue(
486            candidateWidths, width, discardLarger=True
487        )
488        i = (
489            candidateWidths.index(closestWidth)
490            if closestWidth in candidateWidths
491            else 0
492        )
493        return pool[i]

Get a string from the pool that fits within the specified width.

Font properties need to be set already

Arguments:

pool: List of candidate strings.
width: Target width.
threshold: Minimum width threshold.

def filterByShape( items: list[str], shape: Union[Literal['descender', 'ascender', 'caps'], list[Literal['descender', 'ascender', 'caps']]]) -> list[str]: View Source

496def filterByShape(items: list[str], shape: WordShape | list[WordShape]) -> list[str]:
497    """
498    Filter words by descender, ascender, or caps shape.
499
500    Args:
501        items: List of words.
502        shape: Shape(s) to filter by.
503
504    Returns:
505        List of words matching the shape criteria.
506
507    Example:
508        - `["hi", "hey"], "descender"` => `["hi"]`
509    """
510
511    def _isNotShaped(pattern: str, item: str):
512        return not bool(re.search(pattern, item))
513
514    def _checkShape(shape):
515        wordShape = wordShapes.get(shape)
516        return filter(lambda item: _isNotShaped(wordShape, item), items)
517
518    wordShapes = dict(caps="[A-Z0-9]", ascender="[bdfihklt]", descender="[Qgjqpy/,]")
519    shapeSubsets = [_checkShape(shape) for shape in helpers.coerceList(shape)]
520    return helpers.intersect(shapeSubsets, retainOrder=False)

Filter words by descender, ascender, or caps shape.

Arguments:

items: List of words.
shape: Shape(s) to filter by.

Returns:

List of words matching the shape criteria.

Example:

["hi", "hey"], "descender" => ["hi"]

def filterByTokens( items: list[str], tokens: list[typing.Literal['word', 'nonword']] = ['word', 'nonword']) -> list[str]: View Source

523def filterByTokens(
524    items: list[str], tokens: list[CharacterToken] = ["word", "nonword"]
525) -> list[str]:
526    """
527    Filter items by Unicode character token.
528
529    Args:
530        items: List of strings to filter.
531        tokens: List of token types to filter by.
532
533    Returns:
534        List of items matching the token criteria.
535
536    Example:
537        - `word`, `nonword` => `["A4", "R&B"]`
538    """
539
540    possiblePatterns = dict(
541        word=r"\p{Letter}", nonword=r"\p{Symbol}|\p{Number}|\p{Punctuation}"
542    )
543    patterns = [possiblePatterns.get(m) for m in tokens]
544
545    def _filterSingleToken(items: list[str], pattern: str):
546        return [item for item in items if bool(regex.search(pattern, item))]
547
548    individual = [_filterSingleToken(items, p) for p in patterns]
549
550    return helpers.intersect(individual)

Filter items by Unicode character token.

Arguments:

items: List of strings to filter.
tokens: List of token types to filter by.

Returns:

List of items matching the token criteria.

Example:

word, nonword => ["A4", "R&B"]

def isRagPretty( content: Union[str, drawBot.context.baseContext.FormattedString], coords: tuple) -> tuple[bool, bool]: View Source

553def isRagPretty(
554    content: Union[str, drawBot.drawBotDrawingTools.FormattedString], coords: tuple
555) -> tuple[bool, bool]:
556    """
557    Evaluate if a paragraph is nicely typeset.
558
559    Args:
560        content: Text content or `FormattedString`.
561        coords: Tuple specifying text box coordinates.
562
563    Returns:
564        Tuple of booleans (isGreat, isOkay).
565        - `isGreat`: All quite long, some very long
566        - `isOkay`: All quite long
567    """
568
569    def _calcLineWidths():
570        """Returns widths for all lines except last and for last line."""
571        textBounds = drawBot.textBoxCharacterBounds(content, coords)
572        linesByY = dict()
573
574        for segment in textBounds:
575            bounds, _, _ = segment
576            _, y, w, _ = bounds
577
578            if not linesByY.get(y):
579                linesByY[y] = 0
580
581            linesByY[y] += w
582
583        last = linesByY.pop(list(linesByY)[-1])
584        return linesByY.values(), last
585
586    try:
587        _, _, width, _ = coords
588        bodyWidths, lastWidth = _calcLineWidths()
589        # All lines are quite long
590        areAllGood = all([w >= width * 0.9 for w in bodyWidths])
591        # A portion of lines are very long
592        areSomeGreat = (
593            len([True for w in bodyWidths if w >= width * 0.95]) >= len(bodyWidths) / 3
594        )
595        # Last line is not longest and not an widow
596        isLastGood = max(bodyWidths) >= lastWidth >= width * 2 / 3
597
598        isOkay = areAllGood and isLastGood
599        # isGreat, isOkay
600        return (isOkay and areSomeGreat), isOkay
601    except Exception as e:
602        logger.warning("Failed isRagPretty: {}", e)
603        return False

Evaluate if a paragraph is nicely typeset.

Arguments:

content: Text content or FormattedString.
coords: Tuple specifying text box coordinates.

Returns:

Tuple of booleans (isGreat, isOkay).

isGreat: All quite long, some very long

isOkay: All quite long

def filterForbidden(input: list[str]) -> list[str]: View Source

606def filterForbidden(input: list[str]) -> list[str]:
607    """
608    Filter out explicit content using a prohibited terms list (found in `forbidden-....txt`).
609
610    Args:
611        input: List of strings to filter.
612
613    Returns:
614        Filtered list with inappropriate content removed.
615    """
616    with open(
617        "/Users/christianjansky/Library/CloudStorage/Dropbox/KOMETA-Draw/01 Content/forbidden-any.txt",
618        encoding="utf-8",
619    ) as f:
620        prohibited_terms = f.read().splitlines()
621    clean = []
622
623    for item in input:
624        is_clean = True
625        item_lower = item.lower()
626
627        for term in prohibited_terms:
628            if term.lower().strip() and term.lower() in item_lower:
629                is_clean = False
630                break
631
632        if is_clean:
633            clean.append(item)
634
635    return clean

Filter out explicit content using a prohibited terms list (found in forbidden-....txt).

Arguments:

input: List of strings to filter.

Returns:

Filtered list with inappropriate content removed.