Source code for fast.density

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from pybgl.automaton import Automaton, vertices
from pybgl.regexp import compile_dfa
# from .brzozowski_minimization import brzozowski_minimization
from .regexp_ast import RegexpAst


[docs]def dfa_density(dfa: Automaton, length: int, char_proba: float): """ Computes the ratio of the number of words of length ``length`` accepted by an automaton ``dfa``, over the number of words of length ``length``. This corresponds to the density of ``a`` if we restrict to the words of length ``length``. Args: dfa (Automaton): A :py:class:`pybgl.Automaton` instance. length (int): The considered length. Must be a positive integer. char_proba (float): The probability to pick a given character in the alphabet. Should probably be set to ``1 / len(dfa.alphabet())`` ? Returns: The density of ``a`` if we restrict to the words of length ``length``. """ # TODO: this is a kind of pagerank assuming that DFA is acyclic, but the # results depends on the order the vertices are visited. So the implementation # below is inaccurate. # TODO: remove char_proba parameter. map_q_proba = { q: 1 if dfa.is_initial(q) else 0 for q in vertices(dfa) } for _ in range(length): new_map_q_proba = {q: 0 for q in vertices(dfa)} for (q, adj_q) in dfa.m_adjacencies.items(): for (a, r) in adj_q.items(): new_map_q_proba[r] += char_proba * map_q_proba[q] map_q_proba = new_map_q_proba return sum( map_q_proba[q] if dfa.is_final(q) else 0 for q in vertices(dfa) )
[docs]def ast_density( ast: RegexpAst, map_len_proba: dict, char_proba: float, map_pa_infix_re: dict = None ) -> float: """ Compute the density related to a given regular expression abstract syntax tree. Args: ast (RegexpAst): A :py:class:`RegexpAst` instance. map_len_proba (dict): char_proba (float): The probability to pick a given character in the alphabet. Should probably be set to ``1 / len(dfa.alphabet())`` ? map_pa_infix_re (dict): A ``dict{PatternAutomaton : str}`` that maps each :py:class:`PatternAutomaton` to its corresponding regular expression. Returns: The density of ``ast``. """ # print(map_len_proba) # print(" Computing density of %s" % ast.to_prefix_regexp_str()) infix_regexp = ast.to_infix_regexp_str().replace(".", "") # print(" %s" % map_pa_infix_re) # print(" before replacing: %s" % infix_regexp) if map_pa_infix_re: for (pa, infix_re) in map_pa_infix_re.items(): infix_regexp = infix_regexp.replace(pa, "(" + infix_re + ")") # print(" after replacing: %s" % infix_regexp) dfa = compile_dfa(infix_regexp) # mini_dfa = brzozowski_minimization(dfa) map_example_len_density = { # length: dfa_density(mini_dfa, length, char_proba) length: dfa_density(dfa, length, char_proba) for length in map_len_proba.keys() } result = sum( map_example_len_density[length] * map_len_proba[length] for length in map_len_proba.keys() ) # print(" Done, density = %s" % result) return result