#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from collections import deque
from copy import deepcopy
from pybgl.graph import EdgeDescriptor, to_dot
from pybgl.graphviz import enrich_kwargs
from pybgl.property_map import make_func_property_map
from pybgl.ipynb import ipynb_display_graph
from .pattern_automaton import PatternAutomaton
[docs]class RegexpAst:
"""
n-ary representations of a regular expression AST.
Do not use binary AST otherwise the priority queue will explode!
Note that calling simplify transforms an arbitrary AST to a n-ary AST.
"""
ROOT = "root" # TODO ROOT = "⊥"
def __init__(self):
"""
Constructor.
"""
self.num_nodes = 0 # counts the actual number of nodes
self.nodes_id = 0 # node id for next new node
self.map_node_label = dict() # Maps vertex to its label
self.map_node_parent = dict() # Maps a vertex to its parent node # TODO use defaultdict(lambda: None)
self.map_node_children = dict() # Maps a vertex to its child(ren) (if any) # TODO use defaultdict(list)
# Create a root node
self.root = self.add_node(label=self.ROOT)
self.set_child(self.root, None)
# TODO rename to add_vertex
[docs] def add_node(self, label: str) -> int:
"""
Add a new node to this AST.
Args:
label (str): The label of the added node.
Returns:
The newly added vertex descriptor.
"""
new_node = self.nodes_id
self.map_node_label[new_node] = label
self.map_node_children[new_node] = []
self.map_node_parent[new_node] = None
self.nodes_id += 1
self.num_nodes += 1
return new_node
# TODO rename to remove_vertex
[docs] def remove_node(self, u: int):
"""
Removes a node from this :py:class:`RegexpAst` instance.
Args:
u (int): The vertex descriptor of the node to be removed.
"""
del self.map_node_label[u]
del self.map_node_parent[u]
del self.map_node_children[u]
self.num_nodes -= 1
[docs] def is_downwards_arc(self, u: int, v: int) -> bool:
"""
Checks whether an ``(u, v)`` arc is downward
(i.e., if ``v`` is a child of ``u``).
Args:
u (int): The vertex descriptor of the source of the arc.
v (int): The vertex descriptor of the target of the arc.
Returns:
``True`` iff ``(u, v)`` is downward.
"""
return self.map_node_parent[v] == u
[docs] def is_upwards_arc(self, u: int, v: int) -> bool:
"""
Checks whether an ``(u, v)`` arc is upward
(i.e., if ``u`` is a child of ``v``).
Args:
u (int): The vertex descriptor of the source of the arc.
v (int): The vertex descriptor of the target of the arc.
Returns:
``True`` iff ``(u, v)`` is upward, ``False`` otherwise.
"""
return self.is_downwards_arc(v, u)
# TODO why not using len(self.map_node_children) == 1 ?
[docs] def is_unary(self, u: int) -> bool:
"""
Checks whether a node is unary.
It concerns nodes labeled by an unary regular expression operator
(i.e., "+", "*", "?") and the root of this :py:class:``RegexpAst`` instance.
Args:
u (int): The vertex descriptor of the considered node.
Returns:
``True`` iff ``u`` is unary, ``False`` otherwise.
"""
return self.map_node_label[u] in {"+", "*", "?", self.ROOT}
# TODO why not using len(self.map_node_children) > 1 ?
[docs] def is_n_ary(self, u: int) -> bool:
"""
Checks whether a node is ``n``-ary, ``n > 1``.
It concerns nodes labeled by an ``n``-ary regular expression operator
(i.e., ".", "|") and the root of this :py:class:``RegexpAst`` instance.
Args:
u (int): The vertex descriptor of the considered node.
Returns:
``True`` iff ``u`` is ``n``-ary, ``False`` otherwise.
"""
return self.map_node_label[u] in {".", "|"}
# TODO why not using len(self.map_node_children) == 0 ?
[docs] def is_leaf(self, u: int) -> bool:
"""
Checks whether a node is a leaf.
It concerns nodes labeled by a symbol of the alphabet.
Args:
u (int): The vertex descriptor of the considered node.
Returns:
``True`` iff ``u`` is a leaf, ``False`` otherwise.
"""
return not self.is_n_ary(u) and not self.is_unary(u)
[docs] def get_arc_index(self, u: int, v: int) -> int:
"""If (u,v) is downwards, and v is the i^th child of u, returns i.
If (u,v) is upwards, and u is the i^th child of v, returns i."""
if self.is_upwards_arc(u, v):
return self.get_arc_index(v, u)
return self.map_node_children[u].index(v)
[docs] def label(self, u) -> str:
"""
Retrieves the label of a given vertex.
Args:
u (int): The vertex descriptor of the considered node.
Returns:
The label of ``u``.
"""
return self.map_node_label[u]
[docs] def set_label(self, u: int, label: str):
"""
Sets the label of a given vertex.
Args:
u (int): The vertex descriptor of the considered node.
label (str): Set the label of a given node.
"""
self.map_node_label[u] = label
[docs] def parent(self, u: int) -> int:
"""
Retrieves the parent of a given vertex.
Args:
u (int): The vertex descriptor of the considered node.
Returns:
The parent of ``u`` if any or ``None``.
"""
return self.map_node_parent[u]
[docs] def children(self, u: int) -> iter:
"""
Retrieves the children of a given vertex.
Args:
u (int): The vertex descriptor of the considered node.
Returns:
The parent of ``u`` if any or ``None``.
"""
return self.map_node_children[u]
[docs] def first_child(self, u: int) -> int:
"""
Retrieves the first child of a given vertex.
Args:
u (int): The vertex descriptor of the considered node.
Returns:
The vertex descriptor of the first child of ``u``.
Raises:
`IndexError` if the node is a leaf. See also :py:meth:`RegexpAst.is_leaf`.
"""
return self.map_node_children[u][0]
[docs] def ith_child(self, u: int, i: int) -> int:
"""
Retrieves the ``i``-th child of a given vertex.
Args:
u (int): The vertex descriptor of the considered node.
i (int): The child index.
Returns:
The vertex descriptor of the ``i``-th child of ``u``.
Raises:
`IndexError` if the node is a leaf. See also :py:meth:`RegexpAst.is_leaf`.
"""
return self.map_node_children[u][i]
# TODO why v ?
[docs] def is_last_child(self, u: int, v: int) -> bool:
"""
Checks whether ``u`` is the last child of ``v``.
Args:
u (int): The vertex descriptor of the considered children.
v (int): The vertex descriptor of the parent node of ``u``.
Returns:
``True`` if and only if ``u`` is the last child of ``v``.
"""
return self.map_node_children[v][-1] == u
[docs] def set_child(self, u: int, v: int, set_parent: bool = True):
"""
Sets ``v`` as the only child of ``u``.
Args:
u (int): The vertex descriptor of the parent node.
v (int): The vertex descriptor of the child node.
set_parent (bool): Pass ``True`` to update the parent of ``v``.
Defaults to ``True``.
"""
self.map_node_children[u] = [v]
if set_parent:
self.map_node_parent[v] = u
[docs] def set_children(self, u: int, vs: iter, set_parents: bool = True):
"""
Sets the children of ``u``.
Args:
u (int): The vertex descriptor of the parent node.
vs (iter): The vertex descriptors of the child nodes.
set_parent (bool): Pass ``True`` to update the parent of ``v``.
Defaults to ``True``.
"""
self.map_node_children[u] = vs
if set_parents:
for v in vs:
self.map_node_parent[v] = u
[docs] def set_ith_child(self, u: int, v: int, i: int, set_parent: bool = True):
"""
Sets the ``i``-th children of ``u`` to ``v``.
Args:
u (int): The vertex descriptor of the parent node.
v (int): The vertex descriptor of the child node.
i (int): The child index. It must verify
``0 < i <= self.num_children(u)``.
set_parent (bool): Pass ``True`` to update the parent of ``v``.
Defaults to ``True``.
Raises:
`IndexError` if ``not (0 < i <= self.num_children(u))``
"""
self.map_node_children[u][i] = v
if set_parent:
self.map_node_parent[v] = u
[docs] def append_child(self, u: int, v: int, set_parent: bool = True):
"""
Appends a new child to a node ``u``.
Args:
u (int): The vertex descriptor of the parent node.
v (int): The vertex descriptor of the child node.
set_parent (bool): Pass ``True`` to update the parent of ``v``.
Defaults to ``True``.
"""
self.map_node_children[u].append(v)
if set_parent:
self.map_node_parent[v] = u
[docs] def num_children(self, u: int) -> int:
"""
Retrieves the number of children of ``u``.
Args:
u (int): The vertex descriptor of the considered node.
Returns:
The number of children of ``u``.
"""
return len(self.map_node_children[u])
[docs] def is_ancestor_of(self, u: int, v: int) -> bool:
"""
Tests if ``v`` is an ancestor of ``u``.
Args:
u (int): The vertex descriptor of a node.
v (int): The vertex descriptor of the candidate ancestor.
Returns:
``True`` iff ``v`` is an ancestor of ``u``, ``False`` otherwise.
"""
# TODO we should use self.map_parent
if self.is_leaf(u):
return u == v
elif self.is_unary(u):
return self.is_ancestor_of(self.first_child(u), v)
elif self.is_n_ary(u):
return any(
self.is_ancestor_of(u_child, v) for u_child in self.children(u)
)
[docs] def epsilon_successors(self, u: int, v: int) -> set:
"""
Retrieves the out-edges of ``v`` that are epsilon-reachable from
a ``(u, v)`` arc.
See also :py:meth:`RegexpAst.epsilon_reachables`.
Args:
u (int): The source of the arc.
v (int): The target of the arc.
Returns:
The set of out-edges of ``(u', v')`` arcs that are
epsilon-reachable from the ``(u, v)`` arc
"""
if v is None:
assert u == self.root
return {(u, self.first_child(u))}
# print("**", u, v, self.map_node_parent[u], self.map_node_parent[v])
result = set()
result.add((u, v))
v_label = self.label(v)
if self.is_downwards_arc(u, v):
# print("down")
if v_label == "+":
result.add((v, self.first_child(v)))
elif v_label == "*" or v_label == "?":
result.add((v, u))
result.add((v, self.first_child(v)))
elif v_label == ".":
result.add((v, self.first_child(v)))
elif v_label == "|":
for child in self.children(v):
result.add((v, child))
# case v is root or v is a leaf -> no epsilon successors
else:
# print("up")
if v_label == "+" or v_label == "*":
result.add((v, u))
result.add((v, self.parent(v)))
elif v_label == "?":
result.add((v, self.parent(v)))
elif v_label == ".":
if self.is_last_child(u, v):
result.add((v, self.parent(v)))
else:
u_index = self.get_arc_index(v, u)
result.add((v, self.ith_child(v, u_index + 1)))
elif v_label == "|":
result.add((v, self.parent(v)))
# case v is root -> no eps successors
# case v is leaf impossible
# print("**", result)
return result
[docs] def epsilon_reachables(self, u: int, v: int = None) -> set:
"""
Retrieves the edges that are epsilon-reachable from
a ``(u, v)`` arc.
See also :py:meth:`RegexpAst.epsilon_successors`.
Args:
u (int): The source of the arc.
v (int): The target of the arc. If ``u`` is a leaf
you may pass ``None``.
Returns:
The set of out-edges of ``(u', v')`` arcs that are
epsilon-reachable from the ``(u, v)`` arc
"""
# print("eps reach", u, v)
result = set()
stack = deque()
result.add((u, v))
stack.append((u, v))
while len(stack) > 0:
(u, v) = stack.pop()
# print("__", u, v)
for (uu, vv) in self.epsilon_successors(u, v):
if (uu, vv) not in result:
# print("___adding___", uu, vv)
result.add((uu, vv))
stack.append((uu, vv))
return result
[docs] def to_prefix_regexp_str(self, u: int = None) -> str:
"""
Builds the prefix regular expression of this
:py:class:`RegexpAst` instance.
See also :py:meth:`RegexpAst.to_prefix_regexp_list`.
Args:
u (int): Pass ``None``.
Returns:
A string storing the prefix regular expression modeling
this :py:class:`RegexpAst` instance.
"""
if u is None:
u = self.first_child(self.root)
if u is None:
return ""
u_label = self.label(u)
if self.is_n_ary(u):
return u_label + '(' + ','.join(
[self.to_prefix_regexp_str(v) for v in self.children(u)]
) + ')'
elif self.is_unary(u):
return u_label + self.to_prefix_regexp_str(self.first_child(u))
else: # u is a leaf
return u_label
[docs] def to_prefix_regexp_list(self, u: int = None) -> list:
"""
Builds the prefix regular expression of this
:py:class:`RegexpAst` instance.
See also :py:meth:`RegexpAst.to_prefix_regexp_str`.
Args:
u (int): Pass ``None``.
Returns:
A string storing the prefix regular expression modeling
this :py:class:`RegexpAst` instance.
"""
if u is None:
u = self.first_child(self.root)
if u is None:
return []
u_label = self.label(u)
if self.is_n_ary(u):
result = [u_label, '(']
for v in self.children(u):
if not v == self.first_child(u):
result += [',']
result += self.to_prefix_regexp_list(v)
result += [')']
return result
elif self.is_unary(u):
return [u_label] \
+ self.to_prefix_regexp_list(self.map_node_children[u][0])
else: # u is a leaf
return [u_label]
[docs] def to_infix_regexp_str(self, u=None) -> str:
"""
Builds the infix regular expression of this
:py:class:`RegexpAst` instance.
See also :py:meth:`RegexpAst.to_prefix_regexp_list`.
Args:
u (int): Pass ``None``.
Returns:
A string storing the prefix regular expression modeling
this :py:class:`RegexpAst` instance.
"""
if u is None:
u = self.first_child(self.root)
if u is None:
result = ""
u_label = self.label(u)
if self.is_n_ary(u):
result = '(' * (len(self.children(u)) - 1) + u_label.join(
[self.to_infix_regexp_str(v) + (')' if i > 0 else "")
for i, v in enumerate(self.children(u))]
)
elif self.is_unary(u):
child_is_leaf = self.is_leaf(self.first_child(u))
result = ("" if child_is_leaf else "(") \
+ self.to_infix_regexp_str(self.first_child(u)) \
+ ("" if child_is_leaf else ")") + u_label
else: # u is a leaf
result = u_label
if u == self.first_child(self.root):
if result[0] == '(' and result[-1] == ')':
result = result[1:-1]
return result
[docs] def copy(self):
"""
Copy this :py:class:`RegexpAst` instance.
Returns:
A copy this :py:class:`RegexpAst` instance.
"""
return deepcopy(self)
[docs] def simplify(self, active_leaf: int = None):
"""
Applies the following simplifications on this :py:class:`RegexpAst` instance.
- :py:meth:`RegexpAst.simplify_unary_nodes`
- :py:meth:`RegexpAst.simplify_n_ary_nodes`
- :py:meth:`RegexpAst.reorder_or_nodes`
- :py:meth:`remove_unary_n_aries`
Args:
active_leaf (int): Pass `None`
"""
self.simplify_unary_nodes() # ++ -> + ; ?? -> ? ,; ?+ -> * etc.
self.simplify_n_ary_nodes() # (a.b).c -> (a.b.c)
self.reorder_or_nodes() # b|a -> a|b
# self.simplify_or_nodes(active_leaf=active_leaf) # MANDO: a | a -> a. Be cautious, do not remove if a is the source of the active arc.
self.remove_unary_n_aries() # When inserting unary node, some binary node may get only one operand and hence become useless. We just remove the useless operator. |a -> a ; .a -> a
[docs] def simplify_unary_nodes(self, u=None):
# print("8______start")
# print(u)
# ipynb_display_graph(self)
# print("8______end")
if u is None:
u = self.first_child(self.root)
if u is None:
return
if self.is_n_ary(u):
for v in self.children(u):
self.simplify_unary_nodes(v)
elif self.is_unary(u):
v = self.first_child(u)
if self.is_unary(v):
u_label = self.label(u)
v_label = self.label(v)
if u_label != v_label:
self.set_label(u, "*")
# remove v
v_child = self.first_child(v)
self.set_child(u, v_child)
self.remove_node(v)
self.simplify_unary_nodes(u)
else:
self.simplify_unary_nodes(v)
[docs] def merge_n_ary_nodes(self, u, v, remove_v=True):
i = self.get_arc_index(u, v)
self.map_node_children[u] = self.map_node_children[u][:i] \
+ self.map_node_children[v] + self.map_node_children[u][i+1:]
for v_child in self.children(v):
self.map_node_parent[v_child] = u
if remove_v:
self.remove_node(v)
[docs] def simplify_n_ary_nodes(self, u=None):
if u is None:
u = self.first_child(self.root)
if u is None:
return
if self.is_unary(u):
self.simplify_n_ary_nodes(self.first_child(u))
elif self.is_n_ary(u):
u_label = self.label(u)
has_merged = False
for i, v in enumerate(self.children(u)):
v_label = self.label(v)
if u_label == v_label:
self.merge_n_ary_nodes(u, v)
has_merged = True
break
if has_merged:
self.simplify_n_ary_nodes(u)
else:
for v in self.children(u):
self.simplify_n_ary_nodes(v)
# UNUSED
[docs] def simplify_or_nodes(self, active_leaf=None, u=None):
if u is None:
u = self.first_child(self.root)
if u is None:
return
if self.is_unary(u):
self.simplify_or_nodes(self.first_child(u))
elif self.is_n_ary(u):
for v in self.children(u):
self.simplify_or_nodes(v)
if self.label(u) == "|":
children_prefixes = dict()
to_remove = set()
for v in self.children(u):
v_prefix = self.to_prefix_regexp_str(v)
if v_prefix in children_prefixes:
if self.is_ancestor_of(v, active_leaf):
to_remove.add(children_prefixes[v_prefix])
children_prefixes[v_prefix] = v
else:
to_remove.add(v)
else:
children_prefixes[v_prefix] = v
self.set_children(u, [v for v in self.children(u) if v not in to_remove])
[docs] def reorder_or_nodes(self, u=None):
if u is None:
u = self.first_child(self.root)
if u is None:
return
if self.is_unary(u):
self.reorder_or_nodes(self.first_child(u))
elif self.is_n_ary(u):
for v in self.children(u):
self.reorder_or_nodes(v)
if self.label(u) == "|":
self.map_node_children[u] = sorted(
self.map_node_children[u],
key=lambda v: self.to_prefix_regexp_str(v)
)
[docs] def find_unary_n_aries(self, u=None):
if u is None:
print(self.to_prefix_regexp_list())
u = self.first_child(self.root)
if u is None:
return
result = set()
if self.is_unary(u):
result |= self.find_unary_n_aries(self.first_child(u))
elif self.is_n_ary(u):
if len(self.children(u)) == 1:
result.add(u)
for v in self.children(u):
result |= self.find_unary_n_aries(v)
return result
[docs] def remove_unary_n_aries(self, u=None):
if u is None:
u = self.first_child(self.root)
if u is None:
return
if self.is_unary(u):
self.remove_unary_n_aries(self.first_child(u))
elif self.is_n_ary(u):
if len(self.children(u)) == 1:
u_parent = self.parent(u)
i = self.get_arc_index(u_parent, u)
u_child = self.first_child(u)
self.set_ith_child(u_parent, u_child, i)
self.remove_node(u)
self.remove_unary_n_aries(u_child)
else:
for v in self.children(u):
self.remove_unary_n_aries(v)
[docs] def walk_one_char(self, u: int, a: str, verbose: bool = False) -> set:
"""
Performs a single character walk on this :py:class:`RegexpAst` instance.
Starts the walk on the node u (u must be a leaf or the root),
and returns the set of leaves labeled by ``a``
that are epsilon-reachable from ``u``.
Args:
a (str): A symbol, e.g. "a" or "$date".
"""
result = set()
v = self.parent(u) # if u is root, v will be None
if verbose:
print("5_____start")
ipynb_display_graph(self)
print(u, v)
print(a)
print("5______end")
epsilon_reachables = self.epsilon_reachables(u, v)
for uu, vv in epsilon_reachables:
if vv is not None and self.label(vv) == a:
result.add(vv)
return result
[docs] def walk_word(self, w: list, u: int = None) -> set:
"""
Walk on this :py:class:`RegexpAst` instances by consuming the symbols
involved in an input word.
Args:
u (int): The vertex descriptor of the starting node. Pass ``None`` to start
from the root of this :py:class:`RegexpAst` instance. Defaults to ``None``.
w (list): The input word, which is a list of symbols, where each symbol
is a string (e.g. "a" or "$date").
Returns:
The set of vertex descriptor that are reachable from ``u`` by consuming ``w``
"""
if u is None:
u = self.root
possible_locations = {u}
for a in w:
new_possible_locations = set()
for leaf in possible_locations:
new_possible_locations |= self.walk_one_char(leaf, a)
possible_locations = new_possible_locations
return possible_locations
[docs] def recognizes(self, x) -> bool:
return (
self.recognizes_pa(x) if isinstance(x, PatternAutomaton)
else self.recognizes_word(x)
)
[docs] def recognizes_pa(self, pa: PatternAutomaton, verbose: bool = False) -> bool:
# bot = AST root.
# With str: is there a path from bot to bot recognizing an arbirary word.
# With PAs: is there a path from bot to bot recognizing a path from the source
# to the sink of the PA.
stack = deque()
u = self.root
stack.append((u, 0))
while stack:
(u, q) = stack.pop()
if pa.is_final(q):
v = self.parent(u)
if any(
vv == self.root
for (_, vv) in self.epsilon_reachables(u, v)
):
return True
else:
continue
for e in pa.out_edges(q):
r = e.m_target # TODO pa.target(e)
p = pa.label(e)
compatible_leaves = self.walk_one_char(u, p, verbose=verbose)
for leaf in compatible_leaves:
stack.append((leaf, r))
return False
[docs] def recognizes_pa_prefix(
self,
pa: PatternAutomaton,
target_pa_node: int,
target_ast_leaf: int,
u: int = None
) -> bool:
"""
Tests whether there exists in this py:class:`RegexpAst` instance
a path from its root to ``target_ast_leaf`` that corresponds to
a path from the source of a given :py:class:`PatternAutomaton` instance
to ``target_ast_leaf``.
Args:
pa (PatternAutomaton): A :py:class:`PatternAutomaton` instance (modeling
a positive example).
target_pa_node (int): The vertex descriptor of the target node in ``pa``.
target_ast_leaf (int): The vertex descriptor target leaf in ``self``.
u (int): Pass ``None``.
Returns:
``True`` iff this py:class:`RegexpAst` instance matches ``pa``,
``False`` otherwise.
"""
stack = deque()
if u is None:
u = self.root
stack.append((u, 0))
while stack:
u, q = stack.pop()
if u == target_ast_leaf and q == target_pa_node:
return True
if q > target_pa_node:
continue
for e in pa.out_edges(q):
r = e.m_target
p = pa.label(e)
compatible_leaves = self.walk_one_char(u, p)
for leaf in compatible_leaves:
stack.append((leaf, r))
return False
[docs] def recognizes_word(self, w: list) -> bool:
"""
Tests whether a word is matched by this :py:class:`RegexpAst` instance.
Args:
w (list): A list of symbols, where each symbol is a string (e.g. "a" or "$date").
Returns:
``True`` iff this py:class:`RegexpAst` instance matches ``w``,
``False`` otherwise.
"""
possible_last_leaves = self.walk_word(w)
return any(
v == self.root
for leaf in possible_last_leaves
for _, v in self.epsilon_reachables(
leaf, self.map_node_parent[leaf]
)
)
[docs] def recognizes_prefix(self, prefix: list, leaf_to_end_on: int):
"""
Tests whether there exists a valid walk from the root to
a given leaf in this :py:class:`RegexpAst` instance.
Args:
prefix (list): The prefix (list of symbols).
leaf_to_end_on (int): The vertex descriptor target leaf.
Returns:
``True`` if there exists a walk from the root to
``leaf_to_end_on`` that matches ``prefix``, ``False`` otherwise.
"""
# print("4_____start")
# ipynb_display_graph(self)
# print(prefix)
# print(leaf_to_end_on)
# print("4______end")
return leaf_to_end_on in self.walk_word(prefix)
# --------------------------------------------------------------------------
# The following methods are reuired to use pybgl.graphviz
# --------------------------------------------------------------------------
[docs] def edges(self) -> iter:
"""
Retrieves an iterator over the edges of this py:class:`RegexpAst` instance.
Returns:
An iterator over this :py:class:`RegexpAst` instance edges.
"""
return (
EdgeDescriptor(u, v, 0) # TODO: we could just return (u, v)
for (u, vs) in self.map_node_children.items()
for v in vs
)
[docs] def out_edges(self, u: int) -> iter:
"""
Retrieves an iterator over the out-edges of this py:class:`RegexpAst` instance.
Returns:
An iterator over this :py:class:`RegexpAst` instance vertices.
"""
return self.map_node_children[u]
[docs] def vertices(self) -> iter:
"""
Retrieves an iterator over the vertices of this py:class:`RegexpAst` instance.
Returns:
An iterator over this :py:class:`RegexpAst` instance vertices.
"""
return self.map_node_children.keys()
[docs] def source(self, e: EdgeDescriptor) -> int:
"""
Retrieves the source of an arc.
Args:
e (EdgeDescriptor): The ``EdgeDescriptor`` of the arc.
Returns:
The vertex descriptor of the source of ``e``.
"""
return e.m_source
[docs] def target(self, e: EdgeDescriptor) -> int:
"""
Retrieves the target of an arc.
Args:
e (EdgeDescriptor): The ``EdgeDescriptor`` of the arc.
Returns:
The vertex descriptor of the target of ``e``.
"""
return e.m_target
[docs] def to_dot(self, **kwargs) -> str:
"""
Exports this :py:class:`RegexpAst` to Graphviz format.
Returns:
The corresponding Graphviz string.
"""
self.directed = True
dg = {"rankdir": "TB"}
dpv = {
"label": make_func_property_map(
lambda u: "%s [%s]" % (u, self.map_node_label[u])
),
}
dv = {
"shape": "box",
"style": "rounded, filled",
"ordering": "out"
}
kwargs = enrich_kwargs(dg, "dg", **kwargs)
kwargs = enrich_kwargs(dpv, "dpv", **kwargs)
kwargs = enrich_kwargs(dv, "dv", **kwargs)
return to_dot(self, **kwargs)
[docs]def prefix_regexp_to_ast(prefix_regexp: list) -> RegexpAst:
"""
Builds a :py:class:`RegexpAst` instance from a prefix
regular expression.
Args:
prefix_regexp (list): A list modeling a binary prefix regular
expression e.g. [".", "a", "$date"].
Returns:
The corresponding :py:class:`RegexpAst` instance.
"""
ast = RegexpAst()
stack = deque()
stack.append((ast.root, 1))
for regexp_token in prefix_regexp:
u, child_idx = stack.pop()
v = ast.add_node(label=regexp_token)
if child_idx == 1:
ast.set_child(u, v)
else:
ast.append_child(u, v)
if ast.is_n_ary(v):
stack.append((v, 2))
stack.append((v, 1))
elif ast.is_unary(v):
stack.append((v, 1))
assert len(stack) == 0
ast.simplify()
return ast