Castle: The best Real-Time/Embedded/HighTech language EVER. Attempt 2
Revisión | 54ef2ab8a19e3259303e019dc792a54f90a4fec8 (tree) |
---|---|
Tiempo | 2022-03-07 02:45:19 |
Autor | Albert Mietus < albert AT mietus DOT nl > |
Commiter | Albert Mietus < albert AT mietus DOT nl > |
started with generic Reader classes -- See PEGReader
@@ -0,0 +1,34 @@ | ||
1 | +""" General File-Reader support""" | |
2 | + | |
3 | +import logging; logger = logging.getLogger(__name__) | |
4 | + | |
5 | + | |
6 | +from pathlib import Path | |
7 | +import os | |
8 | + | |
9 | + | |
10 | +def _get_file_dirPath(file=None): | |
11 | + if file is None: file=__file__ | |
12 | + path_to_current_test = Path(os.path.realpath(file)) | |
13 | + path_to_current_dir = path_to_current_test.parent | |
14 | + return path_to_current_dir | |
15 | + | |
16 | + | |
17 | + | |
18 | +class BaseReader(): | |
19 | + | |
20 | + def __init__(self, *, read_dirs: list[str], **kwargs): | |
21 | + if isinstance(read_dirs, str): read_dirs=[read_dirs] #Always a list | |
22 | + self.read_dirs = [ _get_file_dirPath() / d for d in read_dirs] | |
23 | + super().__init__(**kwargs) | |
24 | + | |
25 | + def _read(self, filename) ->str: | |
26 | + for d in self.read_dirs: | |
27 | + if (d / filename).exists(): | |
28 | + break | |
29 | + with (d / filename).open() as f: | |
30 | + logger.debug(f'Reading file: >>{f.name}<<') | |
31 | + txt = f.read() | |
32 | + return txt | |
33 | + | |
34 | + |
@@ -0,0 +1,60 @@ | ||
1 | +"""The PEG :py:mod:`castle.readers.parser` module is based on Arpeggio | |
2 | + | |
3 | +.. seealso:: | |
4 | + | |
5 | + * **PEG** *(Parsing Expression Grammar*) https://en.wikipedia.org/wiki/Parsing_expression_grammar | |
6 | + * **Arpeggio** https://textx.github.io/Arpeggio | |
7 | + | |
8 | +""" | |
9 | + | |
10 | +import logging; logger = logging.getLogger(__name__) | |
11 | + | |
12 | +from typing import Callable | |
13 | + | |
14 | +import arpeggio | |
15 | +from castle.readers.general.file import BaseReader | |
16 | + | |
17 | + | |
18 | +class PEGReader(BaseReader): | |
19 | + """File-Reader to read & parse files, using a PEG parser. | |
20 | + | |
21 | + When creating a :py:class:`PEGReader` a default value for the `language_def` & `comment_def` can be set (and/or can | |
22 | + be set when reading a file -- see py:func`parse`). This language_def & comment_def should defined using *Arpeggio*. | |
23 | + | |
24 | + Also, a :py:class:`visitor` (class) should be specified. (this can only be done once). | |
25 | + | |
26 | + When creating a class the search-path `read_dirs` (to look for file-to-be-parsed) can be set. | |
27 | + """ | |
28 | + | |
29 | + def __init__(self, *, read_dirs: list[str]=[], | |
30 | + language_def=None, comment_def=None, visitor:Callable=None, | |
31 | + **kwargs): | |
32 | + super().__init__(read_dirs=read_dirs, **kwargs) | |
33 | + if visitor is None: | |
34 | + raise ValueError("visitor is a mandatory parameter") | |
35 | + self._visitor = visitor | |
36 | + self.default_language_def = language_def | |
37 | + self.default_comment_def = comment_def | |
38 | + | |
39 | + | |
40 | + def parse(self, filename:str, *, language_def=None, comment_def=None): | |
41 | + """Read & Parse a file""" | |
42 | + if not language_def: language_def = self.default_language_def | |
43 | + if not comment_def: comment_def = self.default_comment_def | |
44 | + txt = self._read(filename) | |
45 | + ast = self._do_parse(txt, language_def, comment_def) | |
46 | + return ast | |
47 | + | |
48 | + | |
49 | + def _do_parse(self, txt, language_def, comment_def): | |
50 | + parser = arpeggio.ParserPython(language_def=language_def, comment_def=comment_def) | |
51 | + | |
52 | + pt = parser.parse(txt) | |
53 | + logger.info(f"Reader:_do_parse::\t parse_tree: start={pt.position} end={pt.position_end}; len(txt)={len(txt)}") | |
54 | + | |
55 | + ast = arpeggio.visit_parse_tree(pt, self._visitor) | |
56 | + logger.debug(f"Reader:_do_parse::\t ast: start={ast.position} end={ast.position_end} -- not counting comments.") | |
57 | + | |
58 | + return ast | |
59 | + | |
60 | + |
@@ -58,6 +58,6 @@ | ||
58 | 58 | REs3 = _(r"[rR]'''") |
59 | 59 | REd3 = _(r'[rR]"""') |
60 | 60 | |
61 | -def comment(): return "//", _(".*\n") | |
61 | +def comment(): return ["#", '//'], _(".*\n") | |
62 | 62 | |
63 | 63 |
@@ -1,4 +1,11 @@ | ||
1 | -//EXPECT: | |
2 | -//def my_rule(): return crossref_1, "string", crossref_1 | |
1 | +peg_grammar <- rules EOF ; | |
2 | +rules <- (parse_rule | setting)+ ; | |
3 | +parse_rule <- rule_name '<-' expression ';' ; | |
3 | 4 | |
4 | -my_rule <- crossref_1 "string" crossref_2 ; | |
\ No newline at end of file | ||
5 | +expression <- sequence op_alternative ; | |
6 | +sequence <- single_expr+ ; | |
7 | +single_expr <- ( rule_crossref | term | group | predicate ) op_quantity ; | |
8 | + | |
9 | +op_alternative <- ( '|' expression )? ; | |
10 | +op_quantity <- ( '?' | '*' | '+' | '\#' )? ; | |
11 | + |
@@ -8,14 +8,6 @@ | ||
8 | 8 | import jinja2 |
9 | 9 | |
10 | 10 | |
11 | -def _get_file_dirPath(): | |
12 | - from pathlib import Path | |
13 | - import os | |
14 | - path_to_current_test = Path(os.path.realpath(__file__)) | |
15 | - path_to_current_dir = path_to_current_test.parent | |
16 | - return path_to_current_dir | |
17 | - | |
18 | - | |
19 | 11 | class Reader(): |
20 | 12 | |
21 | 13 | def __init__(self, read_dirs: list[str]): |
@@ -18,6 +18,9 @@ | ||
18 | 18 | if pattern: |
19 | 19 | validate_pattern(parse_tree, pattern=pattern) |
20 | 20 | |
21 | + return parse_tree | |
22 | + | |
23 | + | |
21 | 24 | def validate_pattern(parse_tree, pattern): |
22 | 25 | for e,T in zip(parse_tree,pattern): |
23 | 26 | if T is not None: assert isinstance(e.rule, T), f"{type(e.rule).__name__} doesn't match {T.__name__}" |
@@ -36,5 +39,24 @@ | ||
36 | 39 | def test_grammar_re_no_slash(): verify_regex(r"/((\\/)|[^\/])*/") |
37 | 40 | def test_grammar_auto_re_no_slash(): verify_regex("/" + grammar.re_no_slash().to_match +"/") # Same as above (unless grammar changes |
38 | 41 | |
42 | +def test_any2EOL_1(): | |
43 | + pt = verify_regex("/.*\n/") # NOT raw : \n === newline | |
44 | + re = pt[1] | |
45 | + assert re == ".*\n" | |
46 | + | |
47 | +def test_any2EOL_2(): | |
48 | + pt = verify_regex("""/.* | |
49 | +/""") #Explicit newline (in not-raw string | |
50 | + re = pt[1] | |
51 | + assert re == ".*\n" | |
52 | + | |
53 | +def test_any2EOL_3(): | |
54 | + pt = verify_regex(r"""/.* | |
55 | +/""") #Explicit newline (in RAW string | |
56 | + re = pt[1] | |
57 | + assert re == ".*\n" | |
39 | 58 | |
40 | 59 | |
60 | + | |
61 | + | |
62 | + |
@@ -1,4 +1,5 @@ | ||
1 | 1 | import pytest |
2 | +import logging; logger = logging.getLogger(__name__) | |
2 | 3 | from pathlib import Path |
3 | 4 | import os |
4 | 5 |
@@ -9,6 +10,7 @@ | ||
9 | 10 | path_to_current_test = Path(os.path.realpath(__file__)) |
10 | 11 | path_to_current_dir = path_to_current_test.parent |
11 | 12 | with (path_to_current_dir / dir / filename).open() as f: |
13 | + logger.debug(f'Reading file: >>{f.name}<<') | |
12 | 14 | txt = f.read() |
13 | 15 | |
14 | 16 | parser = arpeggio.ParserPython(grammar.peg_grammar, grammar.comment, debug=False) |
@@ -1,4 +1,5 @@ | ||
1 | 1 | import pytest |
2 | +import logging; logger = logging.getLogger(__name__) | |
2 | 3 | |
3 | 4 | from castle.readers.parser import grammar |
4 | 5 | from castle.ast import peg |
@@ -57,3 +58,12 @@ | ||
57 | 58 | assert isinstance(ast, peg.Expression), "A (str)term is also an Expression" |
58 | 59 | assert len(ast) == 1, "with a lengt of 1 -- note: use: ``len(sequence)`` not ``len(sequence._children)``!!" |
59 | 60 | assert ast[0].value == txt[1:-1], "It's correct value should be without quotes" |
61 | + | |
62 | + | |
63 | +def test_any2EOL(): | |
64 | + txt = "/.*\n/" # NOT raw : \n === newline | |
65 | + ast = parse(txt, grammar.term) | |
66 | + logger.debug(f"any2EOL_1:: {ast}") | |
67 | + | |
68 | + assert isinstance(ast, peg.RegExpTerm) | |
69 | + assert ast.value == ".*\n" |
@@ -0,0 +1,24 @@ | ||
1 | +import pytest | |
2 | +import logging; logger = logging.getLogger(__name__) | |
3 | + | |
4 | +from castle.readers.parser import PEGReader | |
5 | +from castle.readers.parser import grammar, visitor | |
6 | + | |
7 | + | |
8 | +def test_file(): | |
9 | + reader = PEGReader(read_dirs=('../../../demos/ThinOnion', | |
10 | + '../../.././demos/ThinOnion/grammar', | |
11 | + '../../.././pytst/readers/parser'), | |
12 | + language_def=grammar.peg_grammar, | |
13 | + comment_def=grammar.comment, | |
14 | + visitor=visitor.PegVisitor()) | |
15 | + ast = reader.parse('grammar.peg') | |
16 | + # Remember: ast is a peg.Grammar!! | |
17 | + | |
18 | + #Manuel count ... | |
19 | + no_rules, no_settings = 20,19 | |
20 | + assert len(ast.parse_rules) == no_rules, f"The number of (real) rules should be {no_rules} -- unless the file is changed" | |
21 | + assert len(ast.settings) == no_settings, f"The number of settings ('=') should be {no_settings} -- unless the file is changed" | |
22 | + | |
23 | + # XXX See grammar.peg :: comment doesn't works | |
24 | + |
@@ -1,42 +1,60 @@ | ||
1 | -//This is the PEG version og grammar(2).py | |
2 | - | |
3 | -peg_grammar <- rules ; | |
4 | -rules <- rule+ ; | |
5 | -rule <- rule_name '<-' expression ';' ; | |
1 | +peg_grammar <- rules EOF ; | |
2 | +rules <- (parse_rule | setting)+ ; | |
3 | +parse_rule <- rule_name '<-' expression ';' ; | |
6 | 4 | |
7 | 5 | expression <- sequence op_alternative ; |
8 | 6 | sequence <- single_expr+ ; |
9 | 7 | single_expr <- ( rule_crossref | term | group | predicate ) op_quantity ; |
10 | 8 | |
11 | 9 | op_alternative <- ( '|' expression )? ; |
12 | -op_quantity <- ( '?' | '*' | '+' | '#' )? ; | |
10 | +op_quantity <- ( '?' | '*' | '+' | '/#' )? ; | |
13 | 11 | |
14 | 12 | term <- str_term | regex_term ; |
15 | 13 | group <- '(' expression ')' ; |
16 | 14 | predicate <- ( '&' | '!' ) single_expr ; |
17 | 15 | |
18 | -str_term <- "'" str_no_s1 "'" | |
19 | - | '"' str_no_d1 '"' | |
20 | - | "'''" str_no_s3 "'''" | |
21 | - | '"""' str_no_d3 '"""' | |
16 | +str_term <- S3 str_no_s3 S3 | |
17 | + | D3 str_no_d3 D3 | |
18 | + | S1 str_no_s1 S1 | |
19 | + | D1 str_no_d1 D1 | |
22 | 20 | ; |
23 | -regex_term <- '/' str_no_slash '/' | |
24 | - | "r'" str_no_s1 "'" | |
25 | - | 'r"' str_no_d1 '"' | |
26 | - | "r'''" str_no_s3 "'''" | |
27 | - | 'r"""' str_no_d3 '"""' | |
28 | - | "R'" str_no_s1 "'" | |
29 | - | 'R"' str_no_d1 '"' | |
30 | - | "R'''" str_no_s3 "'''" | |
31 | - | 'R"""' str_no_d3 '"""' | |
21 | +regex_term <- RE re_no_slash RE | |
22 | + | REs3 str_no_s3 S3 | |
23 | + | REd3 str_no_d3 D3 | |
24 | + | REs1 str_no_s1 S1 | |
25 | + | REd1 str_no_d1 D1 | |
32 | 26 | ; |
33 | 27 | |
34 | -rule_crossref <- ID ; | |
35 | -rule_name <- ID ; | |
36 | -ID <- /[A-Za-z_][A-Za-z0-9_]*/ ; | |
28 | +rule_name = ID ; | |
29 | +rule_crossref = ID ; | |
30 | +ID = /[A-Za-z_][A-Za-z0-9_]*/ ; | |
37 | 31 | |
38 | -str_no_slash <- /((\\/)|[^\/])*/ ; | |
39 | -str_no_XXX <- XXX | |
40 | - | ETC | |
41 | - ; | |
32 | +re_no_slash = /((\\/)|[^\/])*/ ; | |
33 | +str_no_s1 = /((\\')|[^'\n])*/ ; | |
34 | +str_no_d1 = /((\\")|[^"\n])*/ ; | |
35 | +str_no_s3 = /([^']|('[^'])|(''[^']))*/ ; | |
36 | +str_no_d3 = /([^"]|("[^"])|(""[^"]))*/ ; | |
42 | 37 | |
38 | + | |
39 | +setting <- setting_name '=' value ';' ; | |
40 | +setting_name = ID ; | |
41 | +value <- str_term | regex_term | number | setting_xref ; | |
42 | +number <- complex_lit | float_lit | int_lit ; | |
43 | +setting_xref = ID ; | |
44 | +complex_lit <- /[+-]?([0-9](\.[0-9]*)?)[+-][iIjJ]([0-9](\.[0-9]*)?)/ ; | |
45 | +float_lit <- /[+-]?[0-9]\.[0-9]+/; | |
46 | +int_lit <- /[+-]?[1-9][0-9]*/ ; | |
47 | + | |
48 | +S1 = "'" ; | |
49 | +D1 = '"' ; | |
50 | +S3 = "'''" ; | |
51 | +D3 = '"""' ; | |
52 | +RE = '/' ; | |
53 | +REs1 = /[rR]'"/ ; | |
54 | +REd1 = /[rR]"/ ; | |
55 | +REs3 = /[rR]'''/ ; | |
56 | +REd3 = /[rR]"""/ ; | |
57 | + | |
58 | +comment <- comment1 | comment2 ; | |
59 | +#comment1 <- '#' /.*\n/ ; // XXX:: Does Not work | |
60 | +#comment2 <- '//' /.*\n/ ; # XXX:: Does Not work |