generic text markup tools
Revisión | db3f79c1ce321de46c9017aa56937fd77ddba7df (tree) |
---|---|
Tiempo | 2013-04-05 19:37:31 |
Autor | hylom <hylom@hylo...> |
Commiter | hylom |
rewriting textparser...
@@ -123,12 +123,6 @@ | ||
123 | 123 | "code": { |
124 | 124 | "extends": "fontDecoration", |
125 | 125 | "rules": { |
126 | - "start": { | |
127 | - "priority": 101, | |
128 | - "regexp": "^☆\\+---$", | |
129 | - "continue": false, | |
130 | - "replace": "<pre>" | |
131 | - }, | |
132 | 126 | "red1": { |
133 | 127 | "priority": 102, |
134 | 128 | "regexp": "\\*g\\[(.*?)]", |
@@ -152,6 +146,10 @@ | ||
152 | 146 | }, |
153 | 147 | "begin": "^☆\\+---\\s*$", |
154 | 148 | "end": "^☆\\+---\\s*$", |
149 | + "onStart": { | |
150 | + "insert": "<pre>", | |
151 | + "replace": "" | |
152 | + }, | |
155 | 153 | "onFinished": { |
156 | 154 | "insert": "</pre>", |
157 | 155 | "replace": "" |
@@ -190,6 +188,7 @@ | ||
190 | 188 | "onStart": { |
191 | 189 | "insert": "<div class=\"column note\">", |
192 | 190 | "replace": "" |
191 | + }, | |
193 | 192 | "onFinished": { |
194 | 193 | "insert": "</div>", |
195 | 194 | "replace": "" |
@@ -7,33 +7,113 @@ def CreateMode(lexi, mode_name): | ||
7 | 7 | return Mode(lexi, mode_name) |
8 | 8 | |
9 | 9 | |
10 | -class ModeBase(dict): | |
10 | +class Mode(dict): | |
11 | 11 | def __init__(self, lexi, mode_name): |
12 | 12 | self.lexi = lexi |
13 | 13 | modes = lexi.get("modes") |
14 | 14 | self.update(modes[mode_name]) |
15 | + self.mode_name = mode_name | |
15 | 16 | |
16 | - def match(self, key, text): | |
17 | - rex = self.get(key, False) | |
17 | + def test(self, attr_name, text): | |
18 | + '''test attr_name matches text''' | |
19 | + rex = self.get(attr_name, False) | |
18 | 20 | if rex and re.search(rex, text): |
19 | 21 | return True |
20 | 22 | else: |
21 | 23 | return False |
22 | 24 | |
23 | - def has_element(self, key): | |
24 | - return (key in self) | |
25 | + def has_attr(self, attr_name): | |
26 | + '''if attr_name exists, return True''' | |
27 | + return (attr_name in self) | |
25 | 28 | |
26 | - def write(self, text): | |
27 | - self.writer.write(text) | |
29 | + def attr(self, attr_name, default=None): | |
30 | + '''return value of attr_name''' | |
31 | + return self.get(attr_name, default) | |
28 | 32 | |
29 | -class Mode(dict): | |
30 | - def __init__(self, lexi, writer, defs): | |
31 | - ModeBase.__init__(self, lexi, writer) | |
32 | - self.update(defs) | |
33 | + def is_true(self, attr_name): | |
34 | + '''if attr_name is True, return True''' | |
35 | + return self.get_attr(attr_name, False) | |
36 | + | |
37 | + def is_false(self, attr_name): | |
38 | + '''if attr_name is False, return True''' | |
39 | + return not self.get_attr(attr_name, True) | |
33 | 40 | |
34 | - def mode_end_check(self, text): | |
35 | - return self.match("end", text) | |
41 | + def has_rule(self, rule_name): | |
42 | + '''if rule_name exists, return True''' | |
43 | + rules = self.get("rules", {}) | |
44 | + return rule_name in rules | |
36 | 45 | |
46 | + def rules(self): | |
47 | + '''return list include sorted rule names''' | |
48 | + rules = self.get('rules', {}) | |
49 | + rule_keys = rules.keys() | |
50 | + sort_fn = lambda x,y:-cmp(rules[x].get("priority",0), rules[y].get("priority", 0)) | |
51 | + rule_keys.sort(sort_fn) | |
52 | + return rule_keys | |
53 | + | |
54 | + def rule(self, rule_name): | |
55 | + '''return rule dict corresponds rule_name''' | |
56 | + rules = self.get('rules', {}) | |
57 | + return rules.get(rule_name, None) | |
58 | + | |
59 | + def on_start(self, writer, text): | |
60 | + '''action when mode started''' | |
61 | + if self.has_attr("onStart"): | |
62 | + m = self.attr("onStart") | |
63 | + if "insert" in m: | |
64 | + writer.write(m["insert"]) | |
65 | + if "replace" in m: | |
66 | + text = m["replace"] | |
67 | + return text | |
68 | + | |
69 | + def on_exit(self, writer, text): | |
70 | + '''action when mode finished''' | |
71 | + if self.has_attr("onFinished"): | |
72 | + m = self.attr("onFinished") | |
73 | + if "insert" in m: | |
74 | + writer.write(m["insert"]) | |
75 | + if "replace" in m: | |
76 | + text = m["replace"] | |
77 | + return text | |
78 | + | |
79 | + def markup(self, text): | |
80 | + '''markup text''' | |
81 | + if 'switch' in actions: | |
82 | + for key in actions['switch'].keys(): | |
83 | + value = self.store.load(mode, key) | |
84 | + if value == None: | |
85 | + continue | |
86 | + if value not in actions['switch'][key]: | |
87 | + continue | |
88 | + new_actions = actions['switch'][key][value] | |
89 | + text = self._do_action(mode, text, new_actions, rex, match) | |
90 | + return text | |
91 | + | |
92 | + if 'store' in actions: | |
93 | + arg = actions['store'] | |
94 | + if isinstance(arg, list): | |
95 | + for index in range(len(arg)): | |
96 | + self.store.save(mode, arg[index], match.group(index+1)) | |
97 | + else: | |
98 | + self.store.save(mode, arg, match.group(1)) | |
99 | + if 'unset' in actions: | |
100 | + key = actions['unset'] | |
101 | + self.store.delete(mode, key) | |
102 | + if 'set' in actions: | |
103 | + arg = actions['set'] | |
104 | + self.store.save(mode, arg[0], arg[1]) | |
105 | + # if 'call' in actions: | |
106 | + # (func, newarg) = actions['call'] | |
107 | + # if func in self.functions: | |
108 | + # arg = match.group(1) | |
109 | + # context = self.store | |
110 | + # results = self.functions[func](context, arg) | |
111 | + # for (k, v) in results: | |
112 | + # self._store(mode, k, v) | |
113 | + if 'replace' in actions: | |
114 | + text = rex.sub(actions['replace'], text) | |
115 | + text = self._expand_variable(mode, text) | |
116 | + return text | |
37 | 117 | |
38 | 118 | class ModeStack(object): |
39 | 119 | def __init__(self): |
@@ -47,9 +127,9 @@ class ModeStack(object): | ||
47 | 127 | |
48 | 128 | def current(self): |
49 | 129 | try: |
50 | - return self.mode_stack[-1] | |
130 | + return self.stack[-1] | |
51 | 131 | except IndexError: |
52 | - return False | |
132 | + return None | |
53 | 133 | |
54 | 134 | class Store(dict): |
55 | 135 | def __init__(self): |
@@ -60,26 +140,30 @@ class Store(dict): | ||
60 | 140 | self.global_store[key] = value |
61 | 141 | |
62 | 142 | def save(self, mode, key, value): |
63 | - if not mode in self.mode_stores: | |
64 | - self.mode_stores[mode] = {} | |
65 | - self.mode_stores[mode][key] = value | |
143 | + modekey = mode.mode_name | |
144 | + if not modekey in self.mode_stores: | |
145 | + self.mode_stores[modekey] = {} | |
146 | + self.mode_stores[modekey][key] = value | |
66 | 147 | |
67 | 148 | def load(self, mode, key, default=None): |
68 | - if (mode in self.mode_stores) and (key in self.mode_stores[mode]): | |
69 | - return self.mode_stores[mode][key] | |
149 | + modekey = mode.mode_name | |
150 | + if (modekey in self.mode_stores) and (key in self.mode_stores[modekey]): | |
151 | + return self.mode_stores[modekey][key] | |
70 | 152 | else: |
71 | 153 | return self.global_store.get(key, default) |
72 | 154 | |
73 | 155 | def delete(self, mode, key): |
74 | - if not mode in self.mode_stores: | |
156 | + modekey = mode.mode_name | |
157 | + if not modekey in self.mode_stores: | |
75 | 158 | return |
76 | - if not key in self.mode_stores[mode]: | |
159 | + if not key in self.mode_stores[modekey]: | |
77 | 160 | return |
78 | - del self.mode_stores[mode][key] | |
161 | + del self.mode_stores[modekey][key] | |
79 | 162 | |
80 | 163 | def clear(self, mode): |
81 | - if mode in self.mode_stores: | |
82 | - del self.mode_stores[mode] | |
164 | + modekey = mode.mode_name | |
165 | + if modekey in self.mode_stores: | |
166 | + del self.mode_stores[modekey] | |
83 | 167 | |
84 | 168 | |
85 | 169 | class Parser(object): |
@@ -91,18 +175,21 @@ class Parser(object): | ||
91 | 175 | @return TextWriter object |
92 | 176 | """ |
93 | 177 | self.lexi = lexi |
94 | -# self.mode_stack = ["global", ] | |
95 | 178 | self.mode_stack = ModeStack() |
96 | - mode_global = CreateMode(self.lexi, "global") | |
97 | - self.mode_stack.push(mode_global) | |
179 | + self.mode_stack.push("global") | |
98 | 180 | |
99 | 181 | self.store = Store() |
100 | 182 | self.functions = { |
101 | 183 | # "getImageGeom": getImageGeom, |
102 | 184 | } |
103 | 185 | |
186 | + def _get_mode(self, mode_name): | |
187 | + return Mode(self.lexi, mode_name) | |
188 | + | |
104 | 189 | def current_mode(self): |
105 | - return self.mode_stack.current() | |
190 | + mode_name = self.mode_stack.current() | |
191 | + if mode_name: | |
192 | + return self._get_mode(mode_name) | |
106 | 193 | |
107 | 194 | def markup(self, iter_in, stream_out): |
108 | 195 | """read from iter_in and output to stream_out |
@@ -111,57 +198,19 @@ class Parser(object): | ||
111 | 198 | """ |
112 | 199 | self.stream_out = stream_out |
113 | 200 | try: |
114 | - while self.currentMode(): | |
201 | + while self.current_mode(): | |
115 | 202 | l = iter_in.next().strip('\r\n') |
116 | - out = self._apply_rules(self.currentMode(), l) | |
117 | - self._write(out) | |
203 | + out = self._markup(l) | |
204 | + self.write(out) | |
118 | 205 | except StopIteration: |
119 | 206 | return |
120 | 207 | |
121 | - def _sorted_keys(self, rules): | |
122 | - rule_keys = rules.keys() | |
123 | - rule_keys.sort(lambda x,y:-cmp(rules[x]["priority"], rules[y]["priority"])) | |
124 | - return rule_keys | |
125 | - | |
126 | - def modes(self): | |
127 | - return self.lexi.get("modes", {}) | |
128 | - | |
129 | - def modeOf(self, mode): | |
130 | - return self.modes().get(mode, {}) | |
131 | - | |
132 | - def transitionsOf(self, mode): | |
133 | - return self.modeOf(mode).get('transitions', []) | |
134 | - | |
135 | - def rulesOf(self, mode): | |
136 | - return self.modeOf(mode).get('rules', {}) | |
137 | - | |
138 | - def _transition_check(self, mode, text): | |
139 | - transitions = self.transitionsOf(mode) | |
140 | - for mode in transitions: | |
141 | - rex = self.modeOf(mode).get("begin", False) | |
142 | - if rex and re.search(rex, text): | |
143 | - return mode | |
144 | - return False | |
145 | - | |
146 | - def _mode_end_check(self, mode, text): | |
147 | - current_mode = self.modeOf(mode) | |
148 | - rex = current_mode.get("end", False) | |
149 | - if rex and re.search(rex, text): | |
150 | - return True | |
151 | - else: | |
152 | - return False | |
153 | - | |
154 | - def _transition(self, mode): | |
155 | - self.mode_stack.append(mode) | |
156 | - | |
157 | - def _mode_exit(self): | |
158 | - self.mode_stack.pop() | |
159 | - | |
160 | - def _write(self, text): | |
208 | + def write(self, text): | |
161 | 209 | self.stream_out.write(text) |
162 | 210 | self.stream_out.write('\n') |
163 | 211 | |
164 | - def _expand_variable(self, mode, text): | |
212 | + def _expand_variable(self, text): | |
213 | + mode = self.current_mode() | |
165 | 214 | if not text.find('$'): |
166 | 215 | return text |
167 | 216 | rex = re.compile('\${?([A-Za-z0-9_]+)}?') |
@@ -170,93 +219,7 @@ class Parser(object): | ||
170 | 219 | text = rex.sub(sub_func, text) |
171 | 220 | return text |
172 | 221 | |
173 | - def _do_action(self, mode, text, actions, rex, match): | |
174 | - if 'switch' in actions: | |
175 | - for key in actions['switch'].keys(): | |
176 | - value = self.store.load(mode, key) | |
177 | - if value == None: | |
178 | - continue | |
179 | - if value not in actions['switch'][key]: | |
180 | - continue | |
181 | - new_actions = actions['switch'][key][value] | |
182 | - text = self._do_action(mode, text, new_actions, rex, match) | |
183 | - return text | |
184 | - | |
185 | - if 'store' in actions: | |
186 | - arg = actions['store'] | |
187 | - if isinstance(arg, list): | |
188 | - for index in range(len(arg)): | |
189 | - self.store.save(mode, arg[index], match.group(index+1)) | |
190 | - else: | |
191 | - self.store.save(mode, arg, match.group(1)) | |
192 | - if 'unset' in actions: | |
193 | - key = actions['unset'] | |
194 | - self.store.delete(mode, key) | |
195 | - if 'set' in actions: | |
196 | - arg = actions['set'] | |
197 | - self.store.save(mode, arg[0], arg[1]) | |
198 | - # if 'call' in actions: | |
199 | - # (func, newarg) = actions['call'] | |
200 | - # if func in self.functions: | |
201 | - # arg = match.group(1) | |
202 | - # context = self.store | |
203 | - # results = self.functions[func](context, arg) | |
204 | - # for (k, v) in results: | |
205 | - # self._store(mode, k, v) | |
206 | - if 'replace' in actions: | |
207 | - text = rex.sub(actions['replace'], text) | |
208 | - text = self._expand_variable(mode, text) | |
209 | - return text | |
210 | - | |
211 | - def _apply_rules(self, mode, text): | |
212 | - # check global rule | |
213 | - gi = self.lexi.get("globalIdentifier", False) | |
214 | - if gi: | |
215 | - m_gvi = re.search(gi, text) | |
216 | - if m_gvi: | |
217 | - self.store.save_global(m_gvi.group(1), m_gvi.group(2)) | |
218 | - return '' | |
219 | - | |
220 | - if self._mode_end_check(mode, text): | |
221 | - self._mode_exit() | |
222 | - if "onFinished" in self.modeOf(mode): | |
223 | - m = self.modeOf(mode)["onFinished"] | |
224 | - if "insert" in m: | |
225 | - self._write(m["insert"]) | |
226 | - if "replace" in m: | |
227 | - text = m["replace"] | |
228 | - if len(self.mode_stack) == 0: | |
229 | - return text | |
230 | - mode = self.mode_stack[-1] | |
231 | - | |
232 | - next_mode = self._transition_check(mode, text) | |
233 | - if next_mode: | |
234 | - self._transition(next_mode) | |
235 | - mode = next_mode | |
236 | - if "onStart" in self.modeOf(mode): | |
237 | - m = self.modeOf(mode)["onStart"] | |
238 | - if "insert" in m: | |
239 | - self._write(m["insert"]) | |
240 | - | |
241 | - rules = self.rulesOf(mode) | |
242 | - for key in self._sorted_keys(rules): | |
243 | - rule = rules[key] | |
244 | - if rule.get('pass', False): | |
245 | - continue | |
246 | - if 'regexp' in rule: | |
247 | - rex = re.compile(rule['regexp']) | |
248 | - match = rex.search(text) | |
249 | - if match: | |
250 | - sub_rule = rule.get('apply', False) | |
251 | - if sub_rule: | |
252 | - text = self._apply_rules(sub_rule, text) | |
253 | - text = self._do_action(mode, text, rule, rex, match) | |
254 | - if not rule.get('continue', True): | |
255 | - break | |
256 | - | |
257 | - return text | |
258 | - | |
259 | - def _apply_rules2(self, text): | |
222 | + def _markup(self, text): | |
260 | 223 | # check global rule |
261 | 224 | gi = self.lexi.get("globalIdentifier", False) |
262 | 225 | if gi: |
@@ -266,43 +229,91 @@ class Parser(object): | ||
266 | 229 | return '' |
267 | 230 | |
268 | 231 | mode = self.current_mode() |
269 | - if mode.mode_end_check(text): | |
270 | - if mode.has_element("onFinished"): | |
271 | - m = mode.get("onFinished") | |
272 | - if "insert" in m: | |
273 | - self._write(m["insert"]) | |
274 | - if "replace" in m: | |
275 | - text = m["replace"] | |
276 | 232 | |
233 | + if mode.test("end", text): | |
234 | + text = mode.on_exit(self, text) | |
277 | 235 | self.mode_stack.pop() |
278 | - if len(self.mode_stack) > 0: | |
236 | + next_mode = self.current_mode() | |
237 | + if next_mode: | |
279 | 238 | # start next mode |
280 | - if self.current().hasElement("onStart"): | |
281 | - m = self.current()["onStart"] | |
282 | - if "insert" in m: | |
283 | - self._write(m["insert"]) | |
284 | - text = self._apply_rules2(text) | |
239 | + text = self._markup(text) | |
285 | 240 | return text |
286 | 241 | |
287 | - rules = self.rulesOf(mode) | |
288 | - for key in self._sorted_keys(rules): | |
289 | - rule = rules[key] | |
290 | - if rule.get('pass', False): | |
291 | - continue | |
292 | - if 'regexp' in rule: | |
293 | - rex = re.compile(rule['regexp']) | |
294 | - match = rex.search(text) | |
295 | - if match: | |
296 | - sub_rule = rule.get('apply', False) | |
297 | - if sub_rule: | |
298 | - text = self._apply_rules(sub_rule, text) | |
299 | - text = self._do_action(mode, text, rule, rex, match) | |
300 | - if not rule.get('continue', True): | |
301 | - break | |
242 | + if mode.has_attr('transitions'): | |
243 | + for candidate in mode.attr('transitions'): | |
244 | + mode = self._get_mode(candidate) | |
245 | + if mode and mode.test('begin', text): | |
246 | + self.mode_stack.push(candidate) | |
247 | + next_mode = self.current_mode() | |
248 | + text = next_mode.on_start(self, text) | |
249 | + text = self._markup(text) | |
250 | + return text | |
251 | + | |
252 | + for key in mode.rules(): | |
253 | + (is_finish, text) = self.apply_rule(mode.rule(key), text) | |
254 | + if is_finish: | |
255 | + break | |
302 | 256 | |
303 | - return text | |
304 | 257 | |
258 | + return text | |
305 | 259 | |
260 | + def apply_rule(self, rule, text): | |
261 | + # if 'pass' rule is True, exit | |
262 | + if rule.get('pass', False): | |
263 | + return (False, text) | |
264 | + | |
265 | + if 'regexp' in rule: | |
266 | + rex = re.compile(rule['regexp']) | |
267 | + match = rex.search(text) | |
268 | + if match: | |
269 | + if 'apply' in rule: | |
270 | + self.mode_stack.push(rule["apply"]) | |
271 | + text = self._markup(text) | |
272 | + self.mode_stack.pop() | |
273 | + | |
274 | + if 'switch' in rule: | |
275 | + for key in rule['switch'].keys(): | |
276 | + value = self.store.load(self.current_mode(), key) | |
277 | + if value == None: | |
278 | + continue | |
279 | + if value not in rule['switch'][key]: | |
280 | + continue | |
281 | + new_rule = rule['switch'][key][value] | |
282 | + return self.apply_rule(new_rule, text) | |
283 | + | |
284 | + if 'store' in rule: | |
285 | + arg = rule['store'] | |
286 | + if isinstance(arg, list): | |
287 | + for index in range(len(arg)): | |
288 | + self.store.save(self.current_mode(), arg[index], match.group(index+1)) | |
289 | + else: | |
290 | + self.store.save(self.current_mode(), arg, match.group(1)) | |
291 | + | |
292 | + if 'unset' in rule: | |
293 | + key = rule['unset'] | |
294 | + self.store.delete(self.current_mode(), key) | |
295 | + | |
296 | + if 'set' in rule: | |
297 | + arg = rule['set'] | |
298 | + self.store.save(self.current_mode(), arg[0], arg[1]) | |
299 | + | |
300 | + # if 'call' in rule: | |
301 | + # (func, newarg) = rule['call'] | |
302 | + # if func in self.functions: | |
303 | + # arg = match.group(1) | |
304 | + # context = self.store | |
305 | + # results = self.functions[func](context, arg) | |
306 | + # for (k, v) in results: | |
307 | + # self._store(mode, k, v) | |
308 | + | |
309 | + if 'replace' in rule: | |
310 | + text = rex.sub(rule['replace'], text) | |
311 | + text = self._expand_variable(text) | |
312 | + | |
313 | + if 'continue' in rule: | |
314 | + if rule['continue'] == False: | |
315 | + (True, text) | |
316 | + return (False, text) | |
306 | 317 | |
307 | 318 | |
308 | 319 | def getImageGeom(context, filename): |