Skip to content

random (2)

choices

choices(population, weights=None, *, cum_weights=None, k=1)

Return a k sized list of population elements chosen with replacement.

Example

Jinja call:

{{ [1, 2, 3] | choices(k=2) }}
Result: [1, 1]

DocStrings
Source code in python3.12/random.py
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
def choices(self, population, weights=None, *, cum_weights=None, k=1):
    """Return a k sized list of population elements chosen with replacement.

    If the relative weights or cumulative weights are not specified,
    the selections are made with equal probability.

    """
    random = self.random
    n = len(population)
    if cum_weights is None:
        if weights is None:
            floor = _floor
            n += 0.0    # convert to float for a small speed improvement
            return [population[floor(random() * n)] for i in _repeat(None, k)]
        try:
            cum_weights = list(_accumulate(weights))
        except TypeError:
            if not isinstance(weights, int):
                raise
            k = weights
            raise TypeError(
                f'The number of choices must be a keyword argument: {k=}'
            ) from None
    elif weights is not None:
        raise TypeError('Cannot specify both weights and cumulative weights')
    if len(cum_weights) != n:
        raise ValueError('The number of weights does not match the population')
    total = cum_weights[-1] + 0.0   # convert to float
    if total <= 0.0:
        raise ValueError('Total of weights must be greater than zero')
    if not _isfinite(total):
        raise ValueError('Total of weights must be finite')
    bisect = _bisect
    hi = n - 1
    return [population[bisect(cum_weights, random() * total, 0, hi)]
            for i in _repeat(None, k)]

sample

sample(population, k, *, counts=None)

Chooses k unique random elements from a population sequence.

Example

Jinja call:

{{ [1, 2, 3] | sample(2) }}
Result: [1, 3]

DocStrings
Source code in python3.12/random.py
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
def sample(self, population, k, *, counts=None):
    """Chooses k unique random elements from a population sequence.

    Returns a new list containing elements from the population while
    leaving the original population unchanged.  The resulting list is
    in selection order so that all sub-slices will also be valid random
    samples.  This allows raffle winners (the sample) to be partitioned
    into grand prize and second place winners (the subslices).

    Members of the population need not be hashable or unique.  If the
    population contains repeats, then each occurrence is a possible
    selection in the sample.

    Repeated elements can be specified one at a time or with the optional
    counts parameter.  For example:

        sample(['red', 'blue'], counts=[4, 2], k=5)

    is equivalent to:

        sample(['red', 'red', 'red', 'red', 'blue', 'blue'], k=5)

    To choose a sample from a range of integers, use range() for the
    population argument.  This is especially fast and space efficient
    for sampling from a large population:

        sample(range(10000000), 60)

    """

    # Sampling without replacement entails tracking either potential
    # selections (the pool) in a list or previous selections in a set.

    # When the number of selections is small compared to the
    # population, then tracking selections is efficient, requiring
    # only a small set and an occasional reselection.  For
    # a larger number of selections, the pool tracking method is
    # preferred since the list takes less space than the
    # set and it doesn't suffer from frequent reselections.

    # The number of calls to _randbelow() is kept at or near k, the
    # theoretical minimum.  This is important because running time
    # is dominated by _randbelow() and because it extracts the
    # least entropy from the underlying random number generators.

    # Memory requirements are kept to the smaller of a k-length
    # set or an n-length list.

    # There are other sampling algorithms that do not require
    # auxiliary memory, but they were rejected because they made
    # too many calls to _randbelow(), making them slower and
    # causing them to eat more entropy than necessary.

    if not isinstance(population, _Sequence):
        raise TypeError("Population must be a sequence.  "
                        "For dicts or sets, use sorted(d).")
    n = len(population)
    if counts is not None:
        cum_counts = list(_accumulate(counts))
        if len(cum_counts) != n:
            raise ValueError('The number of counts does not match the population')
        total = cum_counts.pop()
        if not isinstance(total, int):
            raise TypeError('Counts must be integers')
        if total <= 0:
            raise ValueError('Total of counts must be greater than zero')
        selections = self.sample(range(total), k=k)
        bisect = _bisect
        return [population[bisect(cum_counts, s)] for s in selections]
    randbelow = self._randbelow
    if not 0 <= k <= n:
        raise ValueError("Sample larger than population or is negative")
    result = [None] * k
    setsize = 21        # size of a small set minus size of an empty list
    if k > 5:
        setsize += 4 ** _ceil(_log(k * 3, 4))  # table size for big sets
    if n <= setsize:
        # An n-length list is smaller than a k-length set.
        # Invariant:  non-selected at pool[0 : n-i]
        pool = list(population)
        for i in range(k):
            j = randbelow(n - i)
            result[i] = pool[j]
            pool[j] = pool[n - i - 1]  # move non-selected item into vacancy
    else:
        selected = set()
        selected_add = selected.add
        for i in range(k):
            j = randbelow(n)
            while j in selected:
                j = randbelow(n)
            selected_add(j)
            result[i] = population[j]
    return result