text lexical analyzer
Fork

(Original repository, No fork origin)

R/O
HTTP
SSH
HTTPS

Commit

Commit MetaInfo

Revisión	db3f79c1ce321de46c9017aa56937fd77ddba7df (tree)
Tiempo	2013-04-05 19:37:31
Autor	hylom <hylom@hylo...>
Commiter	hylom

Log Message

rewriting textparser...

Cambiar Resumen

modified: jarkup.json (diff)
modified: textparser.py (diff)

Diferencia incremental

--- a/jarkup.json

+++ b/jarkup.json

		@@ -123,12 +123,6 @@
123	123	"code": {
124	124	"extends": "fontDecoration",
125	125	"rules": {
126		- "start": {
127		- "priority": 101,
128		- "regexp": "^☆\\+---$",
129		- "continue": false,
130		- "replace": "<pre>"
131		- },
132	126	"red1": {
133	127	"priority": 102,
134	128	"regexp": "\\g\\[(.?)]",

		@@ -152,6 +146,10 @@
152	146	},
153	147	"begin": "^☆\\+---\\s*$",
154	148	"end": "^☆\\+---\\s*$",
	149	+ "onStart": {
	150	+ "insert": "<pre>",
	151	+ "replace": ""
	152	+ },
155	153	"onFinished": {
156	154	"insert": "</pre>",
157	155	"replace": ""

		@@ -190,6 +188,7 @@
190	188	"onStart": {
191	189	"insert": "<div class=\"column note\">",
192	190	"replace": ""
	191	+ },
193	192	"onFinished": {
194	193	"insert": "</div>",
195	194	"replace": ""

--- a/textparser.py

+++ b/textparser.py

		@@ -7,33 +7,113 @@ def CreateMode(lexi, mode_name):
7	7	return Mode(lexi, mode_name)
8	8
9	9
10		-class ModeBase(dict):
	10	+class Mode(dict):
11	11	def __init__(self, lexi, mode_name):
12	12	self.lexi = lexi
13	13	modes = lexi.get("modes")
14	14	self.update(modes[mode_name])
	15	+ self.mode_name = mode_name
15	16
16		- def match(self, key, text):
17		- rex = self.get(key, False)
	17	+ def test(self, attr_name, text):
	18	+ '''test attr_name matches text'''
	19	+ rex = self.get(attr_name, False)
18	20	if rex and re.search(rex, text):
19	21	return True
20	22	else:
21	23	return False
22	24
23		- def has_element(self, key):
24		- return (key in self)
	25	+ def has_attr(self, attr_name):
	26	+ '''if attr_name exists, return True'''
	27	+ return (attr_name in self)
25	28
26		- def write(self, text):
27		- self.writer.write(text)
	29	+ def attr(self, attr_name, default=None):
	30	+ '''return value of attr_name'''
	31	+ return self.get(attr_name, default)
28	32
29		-class Mode(dict):
30		- def __init__(self, lexi, writer, defs):
31		- ModeBase.__init__(self, lexi, writer)
32		- self.update(defs)
	33	+ def is_true(self, attr_name):
	34	+ '''if attr_name is True, return True'''
	35	+ return self.get_attr(attr_name, False)
	36	+
	37	+ def is_false(self, attr_name):
	38	+ '''if attr_name is False, return True'''
	39	+ return not self.get_attr(attr_name, True)
33	40
34		- def mode_end_check(self, text):
35		- return self.match("end", text)
	41	+ def has_rule(self, rule_name):
	42	+ '''if rule_name exists, return True'''
	43	+ rules = self.get("rules", {})
	44	+ return rule_name in rules
36	45
	46	+ def rules(self):
	47	+ '''return list include sorted rule names'''
	48	+ rules = self.get('rules', {})
	49	+ rule_keys = rules.keys()
	50	+ sort_fn = lambda x,y:-cmp(rules[x].get("priority",0), rules[y].get("priority", 0))
	51	+ rule_keys.sort(sort_fn)
	52	+ return rule_keys
	53	+
	54	+ def rule(self, rule_name):
	55	+ '''return rule dict corresponds rule_name'''
	56	+ rules = self.get('rules', {})
	57	+ return rules.get(rule_name, None)
	58	+
	59	+ def on_start(self, writer, text):
	60	+ '''action when mode started'''
	61	+ if self.has_attr("onStart"):
	62	+ m = self.attr("onStart")
	63	+ if "insert" in m:
	64	+ writer.write(m["insert"])
	65	+ if "replace" in m:
	66	+ text = m["replace"]
	67	+ return text
	68	+
	69	+ def on_exit(self, writer, text):
	70	+ '''action when mode finished'''
	71	+ if self.has_attr("onFinished"):
	72	+ m = self.attr("onFinished")
	73	+ if "insert" in m:
	74	+ writer.write(m["insert"])
	75	+ if "replace" in m:
	76	+ text = m["replace"]
	77	+ return text
	78	+
	79	+ def markup(self, text):
	80	+ '''markup text'''
	81	+ if 'switch' in actions:
	82	+ for key in actions['switch'].keys():
	83	+ value = self.store.load(mode, key)
	84	+ if value == None:
	85	+ continue
	86	+ if value not in actions['switch'][key]:
	87	+ continue
	88	+ new_actions = actions['switch'][key][value]
	89	+ text = self._do_action(mode, text, new_actions, rex, match)
	90	+ return text
	91	+
	92	+ if 'store' in actions:
	93	+ arg = actions['store']
	94	+ if isinstance(arg, list):
	95	+ for index in range(len(arg)):
	96	+ self.store.save(mode, arg[index], match.group(index+1))
	97	+ else:
	98	+ self.store.save(mode, arg, match.group(1))
	99	+ if 'unset' in actions:
	100	+ key = actions['unset']
	101	+ self.store.delete(mode, key)
	102	+ if 'set' in actions:
	103	+ arg = actions['set']
	104	+ self.store.save(mode, arg[0], arg[1])
	105	+ # if 'call' in actions:
	106	+ # (func, newarg) = actions['call']
	107	+ # if func in self.functions:
	108	+ # arg = match.group(1)
	109	+ # context = self.store
	110	+ # results = self.functions[func](context, arg)
	111	+ # for (k, v) in results:
	112	+ # self._store(mode, k, v)
	113	+ if 'replace' in actions:
	114	+ text = rex.sub(actions['replace'], text)
	115	+ text = self._expand_variable(mode, text)
	116	+ return text
37	117
38	118	class ModeStack(object):
39	119	def __init__(self):

		@@ -47,9 +127,9 @@ class ModeStack(object):
47	127
48	128	def current(self):
49	129	try:
50		- return self.mode_stack[-1]
	130	+ return self.stack[-1]
51	131	except IndexError:
52		- return False
	132	+ return None
53	133
54	134	class Store(dict):
55	135	def __init__(self):

		@@ -60,26 +140,30 @@ class Store(dict):
60	140	self.global_store[key] = value
61	141
62	142	def save(self, mode, key, value):
63		- if not mode in self.mode_stores:
64		- self.mode_stores[mode] = {}
65		- self.mode_stores[mode][key] = value
	143	+ modekey = mode.mode_name
	144	+ if not modekey in self.mode_stores:
	145	+ self.mode_stores[modekey] = {}
	146	+ self.mode_stores[modekey][key] = value
66	147
67	148	def load(self, mode, key, default=None):
68		- if (mode in self.mode_stores) and (key in self.mode_stores[mode]):
69		- return self.mode_stores[mode][key]
	149	+ modekey = mode.mode_name
	150	+ if (modekey in self.mode_stores) and (key in self.mode_stores[modekey]):
	151	+ return self.mode_stores[modekey][key]
70	152	else:
71	153	return self.global_store.get(key, default)
72	154
73	155	def delete(self, mode, key):
74		- if not mode in self.mode_stores:
	156	+ modekey = mode.mode_name
	157	+ if not modekey in self.mode_stores:
75	158	return
76		- if not key in self.mode_stores[mode]:
	159	+ if not key in self.mode_stores[modekey]:
77	160	return
78		- del self.mode_stores[mode][key]
	161	+ del self.mode_stores[modekey][key]
79	162
80	163	def clear(self, mode):
81		- if mode in self.mode_stores:
82		- del self.mode_stores[mode]
	164	+ modekey = mode.mode_name
	165	+ if modekey in self.mode_stores:
	166	+ del self.mode_stores[modekey]
83	167
84	168
85	169	class Parser(object):

		@@ -91,18 +175,21 @@ class Parser(object):
91	175	@return TextWriter object
92	176	"""
93	177	self.lexi = lexi
94		-# self.mode_stack = ["global", ]
95	178	self.mode_stack = ModeStack()
96		- mode_global = CreateMode(self.lexi, "global")
97		- self.mode_stack.push(mode_global)
	179	+ self.mode_stack.push("global")
98	180
99	181	self.store = Store()
100	182	self.functions = {
101	183	# "getImageGeom": getImageGeom,
102	184	}
103	185
	186	+ def _get_mode(self, mode_name):
	187	+ return Mode(self.lexi, mode_name)
	188	+
104	189	def current_mode(self):
105		- return self.mode_stack.current()
	190	+ mode_name = self.mode_stack.current()
	191	+ if mode_name:
	192	+ return self._get_mode(mode_name)
106	193
107	194	def markup(self, iter_in, stream_out):
108	195	"""read from iter_in and output to stream_out

		@@ -111,57 +198,19 @@ class Parser(object):
111	198	"""
112	199	self.stream_out = stream_out
113	200	try:
114		- while self.currentMode():
	201	+ while self.current_mode():
115	202	l = iter_in.next().strip('\r\n')
116		- out = self._apply_rules(self.currentMode(), l)
117		- self._write(out)
	203	+ out = self._markup(l)
	204	+ self.write(out)
118	205	except StopIteration:
119	206	return
120	207
121		- def _sorted_keys(self, rules):
122		- rule_keys = rules.keys()
123		- rule_keys.sort(lambda x,y:-cmp(rules[x]["priority"], rules[y]["priority"]))
124		- return rule_keys
125		-
126		- def modes(self):
127		- return self.lexi.get("modes", {})
128		-
129		- def modeOf(self, mode):
130		- return self.modes().get(mode, {})
131		-
132		- def transitionsOf(self, mode):
133		- return self.modeOf(mode).get('transitions', [])
134		-
135		- def rulesOf(self, mode):
136		- return self.modeOf(mode).get('rules', {})
137		-
138		- def _transition_check(self, mode, text):
139		- transitions = self.transitionsOf(mode)
140		- for mode in transitions:
141		- rex = self.modeOf(mode).get("begin", False)
142		- if rex and re.search(rex, text):
143		- return mode
144		- return False
145		-
146		- def _mode_end_check(self, mode, text):
147		- current_mode = self.modeOf(mode)
148		- rex = current_mode.get("end", False)
149		- if rex and re.search(rex, text):
150		- return True
151		- else:
152		- return False
153		-
154		- def _transition(self, mode):
155		- self.mode_stack.append(mode)
156		-
157		- def _mode_exit(self):
158		- self.mode_stack.pop()
159		-
160		- def _write(self, text):
	208	+ def write(self, text):
161	209	self.stream_out.write(text)
162	210	self.stream_out.write('\n')
163	211
164		- def _expand_variable(self, mode, text):
	212	+ def _expand_variable(self, text):
	213	+ mode = self.current_mode()
165	214	if not text.find('$'):
166	215	return text
167	216	rex = re.compile('\${?([A-Za-z0-9_]+)}?')

		@@ -170,93 +219,7 @@ class Parser(object):
170	219	text = rex.sub(sub_func, text)
171	220	return text
172	221
173		- def _do_action(self, mode, text, actions, rex, match):
174		- if 'switch' in actions:
175		- for key in actions['switch'].keys():
176		- value = self.store.load(mode, key)
177		- if value == None:
178		- continue
179		- if value not in actions['switch'][key]:
180		- continue
181		- new_actions = actions['switch'][key][value]
182		- text = self._do_action(mode, text, new_actions, rex, match)
183		- return text
184		-
185		- if 'store' in actions:
186		- arg = actions['store']
187		- if isinstance(arg, list):
188		- for index in range(len(arg)):
189		- self.store.save(mode, arg[index], match.group(index+1))
190		- else:
191		- self.store.save(mode, arg, match.group(1))
192		- if 'unset' in actions:
193		- key = actions['unset']
194		- self.store.delete(mode, key)
195		- if 'set' in actions:
196		- arg = actions['set']
197		- self.store.save(mode, arg[0], arg[1])
198		- # if 'call' in actions:
199		- # (func, newarg) = actions['call']
200		- # if func in self.functions:
201		- # arg = match.group(1)
202		- # context = self.store
203		- # results = self.functions[func](context, arg)
204		- # for (k, v) in results:
205		- # self._store(mode, k, v)
206		- if 'replace' in actions:
207		- text = rex.sub(actions['replace'], text)
208		- text = self._expand_variable(mode, text)
209		- return text
210		-
211		- def _apply_rules(self, mode, text):
212		- # check global rule
213		- gi = self.lexi.get("globalIdentifier", False)
214		- if gi:
215		- m_gvi = re.search(gi, text)
216		- if m_gvi:
217		- self.store.save_global(m_gvi.group(1), m_gvi.group(2))
218		- return ''
219		-
220		- if self._mode_end_check(mode, text):
221		- self._mode_exit()
222		- if "onFinished" in self.modeOf(mode):
223		- m = self.modeOf(mode)["onFinished"]
224		- if "insert" in m:
225		- self._write(m["insert"])
226		- if "replace" in m:
227		- text = m["replace"]
228		- if len(self.mode_stack) == 0:
229		- return text
230		- mode = self.mode_stack[-1]
231		-
232		- next_mode = self._transition_check(mode, text)
233		- if next_mode:
234		- self._transition(next_mode)
235		- mode = next_mode
236		- if "onStart" in self.modeOf(mode):
237		- m = self.modeOf(mode)["onStart"]
238		- if "insert" in m:
239		- self._write(m["insert"])
240		-
241		- rules = self.rulesOf(mode)
242		- for key in self._sorted_keys(rules):
243		- rule = rules[key]
244		- if rule.get('pass', False):
245		- continue
246		- if 'regexp' in rule:
247		- rex = re.compile(rule['regexp'])
248		- match = rex.search(text)
249		- if match:
250		- sub_rule = rule.get('apply', False)
251		- if sub_rule:
252		- text = self._apply_rules(sub_rule, text)
253		- text = self._do_action(mode, text, rule, rex, match)
254		- if not rule.get('continue', True):
255		- break
256		-
257		- return text
258		-
259		- def _apply_rules2(self, text):
	222	+ def _markup(self, text):
260	223	# check global rule
261	224	gi = self.lexi.get("globalIdentifier", False)
262	225	if gi:

		@@ -266,43 +229,91 @@ class Parser(object):
266	229	return ''
267	230
268	231	mode = self.current_mode()
269		- if mode.mode_end_check(text):
270		- if mode.has_element("onFinished"):
271		- m = mode.get("onFinished")
272		- if "insert" in m:
273		- self._write(m["insert"])
274		- if "replace" in m:
275		- text = m["replace"]
276	232
	233	+ if mode.test("end", text):
	234	+ text = mode.on_exit(self, text)
277	235	self.mode_stack.pop()
278		- if len(self.mode_stack) > 0:
	236	+ next_mode = self.current_mode()
	237	+ if next_mode:
279	238	# start next mode
280		- if self.current().hasElement("onStart"):
281		- m = self.current()["onStart"]
282		- if "insert" in m:
283		- self._write(m["insert"])
284		- text = self._apply_rules2(text)
	239	+ text = self._markup(text)
285	240	return text
286	241
287		- rules = self.rulesOf(mode)
288		- for key in self._sorted_keys(rules):
289		- rule = rules[key]
290		- if rule.get('pass', False):
291		- continue
292		- if 'regexp' in rule:
293		- rex = re.compile(rule['regexp'])
294		- match = rex.search(text)
295		- if match:
296		- sub_rule = rule.get('apply', False)
297		- if sub_rule:
298		- text = self._apply_rules(sub_rule, text)
299		- text = self._do_action(mode, text, rule, rex, match)
300		- if not rule.get('continue', True):
301		- break
	242	+ if mode.has_attr('transitions'):
	243	+ for candidate in mode.attr('transitions'):
	244	+ mode = self._get_mode(candidate)
	245	+ if mode and mode.test('begin', text):
	246	+ self.mode_stack.push(candidate)
	247	+ next_mode = self.current_mode()
	248	+ text = next_mode.on_start(self, text)
	249	+ text = self._markup(text)
	250	+ return text
	251	+
	252	+ for key in mode.rules():
	253	+ (is_finish, text) = self.apply_rule(mode.rule(key), text)
	254	+ if is_finish:
	255	+ break
302	256
303		- return text
304	257
	258	+ return text
305	259
	260	+ def apply_rule(self, rule, text):
	261	+ # if 'pass' rule is True, exit
	262	+ if rule.get('pass', False):
	263	+ return (False, text)
	264	+
	265	+ if 'regexp' in rule:
	266	+ rex = re.compile(rule['regexp'])
	267	+ match = rex.search(text)
	268	+ if match:
	269	+ if 'apply' in rule:
	270	+ self.mode_stack.push(rule["apply"])
	271	+ text = self._markup(text)
	272	+ self.mode_stack.pop()
	273	+
	274	+ if 'switch' in rule:
	275	+ for key in rule['switch'].keys():
	276	+ value = self.store.load(self.current_mode(), key)
	277	+ if value == None:
	278	+ continue
	279	+ if value not in rule['switch'][key]:
	280	+ continue
	281	+ new_rule = rule['switch'][key][value]
	282	+ return self.apply_rule(new_rule, text)
	283	+
	284	+ if 'store' in rule:
	285	+ arg = rule['store']
	286	+ if isinstance(arg, list):
	287	+ for index in range(len(arg)):
	288	+ self.store.save(self.current_mode(), arg[index], match.group(index+1))
	289	+ else:
	290	+ self.store.save(self.current_mode(), arg, match.group(1))
	291	+
	292	+ if 'unset' in rule:
	293	+ key = rule['unset']
	294	+ self.store.delete(self.current_mode(), key)
	295	+
	296	+ if 'set' in rule:
	297	+ arg = rule['set']
	298	+ self.store.save(self.current_mode(), arg[0], arg[1])
	299	+
	300	+ # if 'call' in rule:
	301	+ # (func, newarg) = rule['call']
	302	+ # if func in self.functions:
	303	+ # arg = match.group(1)
	304	+ # context = self.store
	305	+ # results = self.functions[func](context, arg)
	306	+ # for (k, v) in results:
	307	+ # self._store(mode, k, v)
	308	+
	309	+ if 'replace' in rule:
	310	+ text = rex.sub(rule['replace'], text)
	311	+ text = self._expand_variable(text)
	312	+
	313	+ if 'continue' in rule:
	314	+ if rule['continue'] == False:
	315	+ (True, text)
	316	+ return (False, text)
306	317
307	318
308	319	def getImageGeom(context, filename):

text lexical analyzer Fork

Commit

Tags

Frequently used words (click to add to your profile)

Commit MetaInfo

Log Message

Cambiar Resumen

Diferencia incremental

text lexical analyzer
Fork