Source code for fast.random

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from functools import partial
from random import choice, randrange, random
from pybgl.automaton import Automaton, EdgeDescriptor
from pybgl.shunting_yard_postfix import Ast, MAP_OPERATORS_RE


[docs]def reject_sampling( sample, repeat: bool = True, max_sampling: int = 1000 ): """ Reject sampling procedure. Args: sample (callable): A `Callback() -> Result` callback which draws a random sample. If it returns `None`, the sample is rejected. repeat (bool): Pass `True` to rerun sampling in case of reject. max_sampling (int): Maximum number of sampling. Pass `None` to continue sampling until finding a non-rejected value. Note that if `sample` always returns `None`, this results to an infinite loop. Returns: The sampled instances, ``None`` otherwise. """ if repeat: if max_sampling: for _ in range(max_sampling): ret = sample() if ret is not None: break else: ret = None while ret is None: ret = sample() else: ret = sample() return ret
[docs]def random_word_from_automaton( g: Automaton, p: float = 0.5, repeat: bool = True, max_sampling: int = 1000 ) -> str: """ Perform a uniform random walk over an `Automaton` to generate a word accepted by this `Automaton`. Args: g (Automaton): A DFA. p (float): A value in [0, 1] corresponding to the probability to stop once a final state is reached repeat (bool): Pass ``True`` to try again while the sample is rejected. max_sampling (int): Max number of reject samplings. If you pass ``None``, the algorithm continues until finding a sample, but depending on `p` and `g`, this may result to an infinite loop. Returns: A string representing the word discovered during the walk, or ``None`` if the walk led to a non-final state without successor. """ def find_ith_edge(q: int, i: int, g: Automaton) -> EdgeDescriptor: for e in g.out_edges(q): if i == 0: break i -= 1 return e def sample(g, p) -> str: q = g.initial() w = "" # If we stop on q0 while True: r = random() if g.is_final(q) and p <= r: return w d = g.out_degree(q) if not d: # Trapped! Reject to avoid biases. return None i = randrange(d) e = find_ith_edge(q, i, g) q = g.target(e) w += g.label(e) if not 0 <= p < 1: raise RuntimeError("p = %s must be a float between 0.0 and 1.0") return reject_sampling(partial(sample, g=g, p=p), repeat, max_sampling)
# def random_words_from_automata( # dfas: list, # num_words: int = 30, # max_length: float = float("inf"), # distribution_dfa: list = None, # p: float = 0.5, # repeat: bool = True, # max_sampling: int = 1000 # ) -> list: # """ # Concatenates `num_words` random words. # Each words is randomly drawn by performing a random (uniform) walk # on an input automaton picked at random (uniform). # # Args: # dfas: A `list` of `Automaton` instances. # num_words: An `int` to the number of drawn sub-words. # max_length: The maximum word length allowed once words are concatenated. # distribution_dfa: A list mapping each `Automaton` of `dfas` with its # probability. `distribution_dfa` must sum to `1.0`. Pass `None` # for uniform distribution. # + parameters of `random_word_from_automaton`. # Returns: # A `list` of `str` resulting from the samplings. # Some elements of the list may be `None` in case of reject. # """ # if distribution_dfa is None: # distribution_dfa = [1 / len(dfas) for _ in range(len(dfas))] # assert len(distribution_dfa) == len(dfas) # assert 0.99 < sum(distribution_dfa) < 1.01 # # def sample(dfas, num_words, repeat, max_sampling): # words = list() # for i in range(num_words): # g = choices(population=dfas, weights=distribution_dfa)[0] # w = random_word_from_automaton(g, p, repeat, max_sampling) # words.append(w) # return words if len("".join(words)) < max_length else None # # return reject_sampling( # partial(sample, dfas, num_words, repeat, max_sampling), # repeat, # max_sampling # )
[docs]def random_ast( re_len: int, alphabet: list = None, map_operators: dict = MAP_OPERATORS_RE, ) -> tuple: """ Returns a random AST and its root node. Args: re_len(int): The number of nodes of the AST of the output regular expression. alphabet(list): A ``list`` gathering the character of the alphabet of the regular expressions. Default to ``list('a...z')``. map_operators(dict): A ``dict{str: pybgl.shunting_yard_postfix.Op}`` mapping each meta-character with its corresponding attributes. Defaults to ``pybgl.shunting_yard_postfix.MAP_OPERATORS_RE``. Returns: A ``tuple(pybgl.shunting_yard_postfix.Ast, int)`` gathering the output AST and its root node. """ def random_ast_rec(re_len: int) -> tuple: if re_len == 1: a = choice(alphabet) root = ast.add_vertex(a) elif re_len == 2: a = choice(alphabet) operator = choice(unary_operators) child = ast.add_vertex(a) root = ast.add_vertex(operator) ast.add_edge(root, child) else: card = choice([1, 2]) operator = choice(binary_operators) if card == 2 else choice(unary_operators) root = ast.add_vertex(operator) if card == 2: re_len_left = randrange(1, re_len - 1) re_len_right = re_len - re_len_left - 1 root_left = random_ast_rec(re_len_left) root_right = random_ast_rec(re_len_right) ast.add_edge(root, root_left) ast.add_edge(root, root_right) elif card == 1: root_child = random_ast_rec(re_len - 1) ast.add_edge(root, root_child) return root if not alphabet: alphabet = list(chr(i) for i in range(ord("a"), ord("z") + 1)) if any(op_obj.cardinality > 2 for op_obj in map_operators.values()): raise NotImplementedError ast = Ast() unary_operators = None binary_operators = None if re_len >= 2: unary_operators = [ operator for (operator, attrs) in map_operators.items() if attrs.cardinality == 1 ] if re_len >= 3: binary_operators = [ operator for (operator, attrs) in map_operators.items() if attrs.cardinality == 2 ] ast.root = random_ast_rec(re_len) return ast