Mercurial Test 02
Revisión | 4db780ccc4b48ee647d6975cf1bc304bc884d90b (tree) |
---|---|
Tiempo | 2013-06-11 18:51:57 |
Autor | hylom <hylom@user...> |
Commiter | hylom |
initial commit for htmltree
@@ -0,0 +1,2 @@ | ||
1 | +.*~ | |
2 | + |
@@ -0,0 +1,5 @@ | ||
1 | +htmltree - HTML parse and query library for python | |
2 | +======== | |
3 | + | |
4 | + | |
5 | + |
@@ -0,0 +1,16 @@ | ||
1 | +# Copyright (c) 2011 hylom <hylomm at gmail.com> | |
2 | +# All rights reserved. | |
3 | +# | |
4 | +# This module is released under BSD License. | |
5 | +# http://www.opensource.org/licenses/bsd-license.php | |
6 | +# | |
7 | +"""htmltree module | |
8 | + | |
9 | +Parse and convert HTML to tree-style object | |
10 | +""" | |
11 | +#__all__ = ["htmltree"] | |
12 | + | |
13 | +import htmltree | |
14 | +from htmltree import parse | |
15 | + | |
16 | +version = (0, 2, 1) |
@@ -0,0 +1,542 @@ | ||
1 | +# htmltree.py by hylom | |
2 | +# -*- coding: utf-8 -*- | |
3 | + | |
4 | +"""htmltree.py - HTML Element-Tree Builder | |
5 | +by hylom <hylomm@@single_at_mark@@gmail.com> | |
6 | +""" | |
7 | + | |
8 | +import HTMLParser | |
9 | +import re | |
10 | + | |
11 | +class HTMLElementError(Exception): | |
12 | + def __init__(self, msg, elem): | |
13 | + self.msg = msg | |
14 | + self.elem = elem | |
15 | + | |
16 | + def __repr__(self): | |
17 | + str = "HTML Element Error: %s in %s" % (self.msg, self.elem) | |
18 | + return str | |
19 | + | |
20 | +class Renderer(object): | |
21 | + """HTMLElement Render base class.""" | |
22 | + def attrs2str(self, elem): | |
23 | + strs = [] | |
24 | + for attr in elem.attrs: | |
25 | + if elem.attrs[attr] == None: | |
26 | + strs.append(attr) | |
27 | + elif "'" in elem.attrs[attr]: | |
28 | + strs.append('%s="%s"' % (attr, elem.attrs[attr])) | |
29 | + else: | |
30 | + strs.append("%s='%s'" % (attr, elem.attrs[attr])) | |
31 | + strs.insert(0, "") | |
32 | + return " ".join(strs) | |
33 | + | |
34 | +class HTMLRenderer(Renderer): | |
35 | + """Render HTMLElement as HTML.""" | |
36 | + # TODO: check tags not need to close more strict... | |
37 | + UNCLOSABLE_TAGS = ["br", "link", "meta", "img"] | |
38 | + | |
39 | + def render_inner(self, elem): | |
40 | + texts = [] | |
41 | + for child in elem: | |
42 | + self._recursive(child, texts) | |
43 | + return "".join(texts) | |
44 | + | |
45 | + def render(self, elem): | |
46 | + texts = [] | |
47 | + self._recursive(elem, texts) | |
48 | + return "".join(texts) | |
49 | + | |
50 | + def _recursive(self, elem, texts): | |
51 | + if elem.is_tag(): | |
52 | + texts.append("<" + elem.name + self.attrs2str(elem) + ">") | |
53 | + for child in elem: | |
54 | + self._recursive(child, texts) | |
55 | + if not elem.name in self.UNCLOSABLE_TAGS: | |
56 | + texts.append("</" + elem.name + ">") | |
57 | + elif elem.is_text(): | |
58 | + if elem.text(): | |
59 | + texts.append(elem.text()) | |
60 | + elif elem.is_root(): | |
61 | + for child in elem: | |
62 | + self._recursive(child, texts) | |
63 | + elif elem.is_decl(): | |
64 | + texts.append("<!" + elem.name + ">") | |
65 | + elif elem.is_comment(): | |
66 | + texts.append("<!--" + elem.name + "-->") | |
67 | + | |
68 | + | |
69 | +class TEXTRenderer(Renderer): | |
70 | + """Render HTMLElement as TEXT.""" | |
71 | + # TODO: check tags not need to close more strict... | |
72 | + UNCLOSABLE_TAGS = ["br", "link", "meta", "img"] | |
73 | + | |
74 | + def render_inner(self, elem): | |
75 | + texts = [] | |
76 | + for child in elem: | |
77 | + self._recursive(child, texts) | |
78 | + return "".join(texts) | |
79 | + | |
80 | + def render(self, elem): | |
81 | + texts = [] | |
82 | + self._recursive(elem, texts) | |
83 | + return "".join(texts) | |
84 | + | |
85 | + def _recursive(self, elem, texts): | |
86 | + if elem.is_tag(): | |
87 | + for child in elem: | |
88 | + self._recursive(child, texts) | |
89 | + elif elem.is_text(): | |
90 | + if elem.text(): | |
91 | + texts.append(elem.text()) | |
92 | + elif elem.is_root(): | |
93 | + for child in elem: | |
94 | + self._recursive(child, texts) | |
95 | + | |
96 | +class HTMLElement(list): | |
97 | + """HTML element object to use as tree nodes.""" | |
98 | + ROOT = 0 | |
99 | + TAG = 100 | |
100 | + TEXT = 200 | |
101 | + DECL = 300 | |
102 | + COMMENT = 400 | |
103 | + | |
104 | + def __init__(self, type, name="", attrs={}): | |
105 | + """ | |
106 | + create HTMLElement object. | |
107 | + | |
108 | + Arguments: | |
109 | + type -- element type. HTMLElement.(ROOT|TAG|TEXT) | |
110 | + name -- element name (default: "") | |
111 | + attrs -- dict of attributes (default:{}) | |
112 | + | |
113 | + Example: | |
114 | + attr = dict(href="http://example.com/", target="_blank") | |
115 | + e = HTMLElement(HTMLElement.TAG, "a", attr) | |
116 | + # 'e' means <a href="http://example.com/" target="_blank"> | |
117 | + """ | |
118 | + | |
119 | + self.type = type | |
120 | + self.name = name | |
121 | + self.attrs = dict(attrs) | |
122 | + self._text = "" | |
123 | + self._parent = None | |
124 | + self._next_elem = None | |
125 | + self._prev_elem = None | |
126 | + | |
127 | + def __repr__(self): | |
128 | + if self.type == HTMLElement.TAG: | |
129 | + return "<TAG:%s %s>" % (self.name, self._attrs2str()) | |
130 | + elif self.type == HTMLElement.DECL: | |
131 | + return "<DECL:'%s'>" % self.name | |
132 | + elif self.type == HTMLElement.COMMENT: | |
133 | + return "<COMMENT:'%s'>" % self.name | |
134 | + elif self.type == HTMLElement.TEXT: | |
135 | + return "<TEXT:'%s'>" % self._text | |
136 | + else: | |
137 | + return "<UNKNOWN>" | |
138 | + | |
139 | + def __eq__(self, other): | |
140 | + return id(self) == id(other) | |
141 | + | |
142 | + def _attrs2str(self): | |
143 | + str = [] | |
144 | + f = lambda x,y: x if y == None else "%s='%s'" % (x,y) | |
145 | + | |
146 | + strs = [f(x,self.attrs[x]) for x in self.attrs] | |
147 | + return " ".join(strs) | |
148 | + | |
149 | + # basic acquision functions | |
150 | + def get_attribute(self, attr, default=None): | |
151 | + """returns given attribute's value.""" | |
152 | + return self.attrs.get(attr, default) | |
153 | + | |
154 | + def attr(self, attr, default=None): | |
155 | + """returns given attribute's value.""" | |
156 | + return self.attrs.get(attr, default) | |
157 | + | |
158 | + def has_attribute(self, attr): | |
159 | + """returns True if element has "attr" attribute.""" | |
160 | + return attr in self.attrs | |
161 | + | |
162 | + def text(self): | |
163 | + """returns content in the tag.""" | |
164 | + return self._text | |
165 | + | |
166 | + def inner_html(self): | |
167 | + "returns inner html" | |
168 | + rn = HTMLRenderer() | |
169 | + return rn.render_inner(self) | |
170 | + | |
171 | + def inner_text(self): | |
172 | + "returns inner text" | |
173 | + rn = TEXTRenderer() | |
174 | + return rn.render_inner(self) | |
175 | + | |
176 | + def get_classes(self): | |
177 | + "returns classes" | |
178 | + attr = self.get_attribute('class') | |
179 | + if attr == None: | |
180 | + return [] | |
181 | + return attr.split() | |
182 | + | |
183 | + # navigation functions | |
184 | + def parent(self): | |
185 | + """returns tag's parent element.""" | |
186 | + return self._parent | |
187 | + | |
188 | + def next(self): | |
189 | + """returns tag's next element.""" | |
190 | + return self._next_elem | |
191 | + | |
192 | + def prev(self): | |
193 | + """returns tag's previous element.""" | |
194 | + return self._prev_elem | |
195 | + | |
196 | + def next_tag(self): | |
197 | + """returns tag's next tag.""" | |
198 | + next = self.next() | |
199 | + while(next != None): | |
200 | + if next.is_tag(): | |
201 | + break | |
202 | + next = next.next() | |
203 | + return next | |
204 | + | |
205 | + def prev_tag(self): | |
206 | + """returns tag's previous tag.""" | |
207 | + prev = self.prev() | |
208 | + while(prev != None): | |
209 | + if prev.is_tag(): | |
210 | + break | |
211 | + prev = prev.prev() | |
212 | + return prev | |
213 | + | |
214 | + # basic query functions | |
215 | + def get_elements_by_name(self, name): | |
216 | + buf = [] | |
217 | + for i in self: | |
218 | + i._r_get_elements_by_name(name, buf) | |
219 | + return buf | |
220 | + | |
221 | + def _r_get_elements_by_name(self, name, buf): | |
222 | + if self.name == name: | |
223 | + buf.append(self) | |
224 | + for i in self: | |
225 | + i._r_get_elements_by_name(name, buf) | |
226 | + | |
227 | + def get_comments(self): | |
228 | + buf = [] | |
229 | + for i in self: | |
230 | + i._r_get_comments(buf) | |
231 | + return buf | |
232 | + | |
233 | + def _r_get_comments(self, buf): | |
234 | + if self.is_comment(): | |
235 | + buf.append(self) | |
236 | + for i in self: | |
237 | + i._r_get_comments(buf) | |
238 | + | |
239 | + def get_element_by_id(self, id): | |
240 | + for i in self: | |
241 | + if "id" in i.attrs and i.attrs["id"] == id: | |
242 | + return i | |
243 | + e = i.get_element_by_id(id) | |
244 | + if e != None: | |
245 | + return e | |
246 | + #raise HTMLElementError("Element not found") | |
247 | + return None | |
248 | + | |
249 | + def get_elements_by_class(self, cls): | |
250 | + buf = [] | |
251 | + for i in self: | |
252 | + i._r_get_elements_by_class(cls, buf) | |
253 | + return buf | |
254 | + | |
255 | + def _r_get_elements_by_class(self, cls, buf): | |
256 | + if cls in self.get_classes(): | |
257 | + buf.append(self) | |
258 | + for i in self: | |
259 | + i._r_get_elements_by_class(cls, buf) | |
260 | + | |
261 | + def get_elements(self, name, attrs): | |
262 | + elems = self.get_elements_by_name(name) | |
263 | + results = [] | |
264 | + for elem in elems: | |
265 | + for name in attrs: | |
266 | + if elem.get_attribute(name, "") != attrs[name]: | |
267 | + break | |
268 | + else: | |
269 | + results.append(elem) | |
270 | + return results | |
271 | + | |
272 | + # manipulation functions | |
273 | + def append_tag(self, tag, attrs): | |
274 | + elem = HTMLElement(HTMLElement.TAG, tag, attrs) | |
275 | + self.append(elem) | |
276 | + | |
277 | + def remove_element(self, elem): | |
278 | + parent = elem.parent() | |
279 | + parent.remove(elem) | |
280 | + | |
281 | + def delete(self): | |
282 | + p = self.parent() | |
283 | + p.remove(self) | |
284 | + | |
285 | + # query functions | |
286 | + # TODO: this function is under implementing... | |
287 | + def select(self, expr): | |
288 | + terms = expr.strip().split() | |
289 | + if len(terms) == 0: | |
290 | + return [] | |
291 | + results = self | |
292 | + for pat in terms: | |
293 | + t = [] | |
294 | + for elem in results: | |
295 | + t.extend(self._select_pattern(pat, elem)) | |
296 | + results = t | |
297 | + return results | |
298 | + | |
299 | + def _select_pattern(self, pat, elem): | |
300 | + results = [] | |
301 | + if pat[0] == "#": | |
302 | + results = [elem.get_element_by_id(pat[1:]),] | |
303 | + elif pat[0] == ".": | |
304 | + results = elem.get_elements_by_class(pat[1:]) | |
305 | + else: | |
306 | + results = elem.get_elements_by_name(pat) | |
307 | + return [x for x in results if x] | |
308 | + | |
309 | + def select_1st(self, expr): | |
310 | + r = self.select(expr) | |
311 | + if len(r) == 0: | |
312 | + return None | |
313 | + else: | |
314 | + return r[0] | |
315 | + | |
316 | + def select_by_name2(self, term1, term2): | |
317 | + tbl = self.get_elements_by_name(term1) | |
318 | + buf = [] | |
319 | + for elem in tbl: | |
320 | + st = elem.get_elements_by_name(term2) | |
321 | + buf.extend(st) | |
322 | + return buf | |
323 | + | |
324 | + # is_* functions | |
325 | + def is_text(self): | |
326 | + return self.type == HTMLElement.TEXT | |
327 | + | |
328 | + def is_tag(self): | |
329 | + return self.type == HTMLElement.TAG | |
330 | + | |
331 | + def is_root(self): | |
332 | + return self.type == HTMLElement.ROOT | |
333 | + | |
334 | + def is_decl(self): | |
335 | + return self.type == HTMLElement.DECL | |
336 | + | |
337 | + def is_comment(self): | |
338 | + return self.type == HTMLElement.COMMENT | |
339 | + | |
340 | + def is_descendant(self, tagname): | |
341 | + p = self.parent() | |
342 | + while p != None: | |
343 | + if p.name == tagname: | |
344 | + return p | |
345 | + p = p.parent() | |
346 | + return False | |
347 | + | |
348 | + # mmmh.... | |
349 | + def trace_back(self, tag): | |
350 | + """ regexp string => list""" | |
351 | + p = self.parent() | |
352 | + rex = re.compile(tag) | |
353 | + result = [] | |
354 | + while p != None: | |
355 | + if rex.search(p.name): | |
356 | + result.append(p.name) | |
357 | + p = p.parent() | |
358 | + return result | |
359 | + | |
360 | + | |
361 | +class HTMLTreeError(Exception): | |
362 | + def __init__(self, msg, lineno, offset): | |
363 | + self.msg = msg | |
364 | + self.lineno = lineno | |
365 | + self.offset = offset | |
366 | + | |
367 | + def __repr__(self): | |
368 | + str = "HTML Parse Error: %s , line: %d, char: %d" % (self.msg, self.lineno, self.offset) | |
369 | + return str | |
370 | + | |
371 | + | |
372 | +def parse(data, charset=None, option=0): | |
373 | + "parse HTML and returns HTMLTree object" | |
374 | + tree = HTMLTree() | |
375 | + tree.parse(data, charset, option) | |
376 | + return tree | |
377 | + | |
378 | + | |
379 | +class HTMLTree(HTMLParser.HTMLParser): | |
380 | + "HTML Tree Builder" | |
381 | + USE_VALIDATE = 0x0001 | |
382 | + | |
383 | + IGNORE_BLANK = 0x0010 | |
384 | + TRUNC_BLANK = 0x0020 | |
385 | + JOIN_TEXT = 0x0040 | |
386 | + | |
387 | + TRUNC_BR = 0x0100 | |
388 | + # TODO: check tags not need to close more strict... | |
389 | + UNCLOSABLE_TAGS = ["br", "link", "meta", "img", "input"] | |
390 | + | |
391 | + def __init__(self): | |
392 | + "Constructor" | |
393 | + HTMLParser.HTMLParser.__init__(self) | |
394 | + | |
395 | + def parse(self, data, charset=None, option=0): | |
396 | + """ | |
397 | + Parse given HTML. | |
398 | + | |
399 | + Arguments: | |
400 | + data -- HTML to parse | |
401 | + charset -- charset of HTML (default: None) | |
402 | + option -- option (default: 0, meaning none) | |
403 | + | |
404 | + """ | |
405 | + | |
406 | + self.charset = charset | |
407 | + self._htmlroot = HTMLElement(HTMLElement.ROOT) | |
408 | + self._cursor = self._htmlroot | |
409 | + self._option = option | |
410 | + try: | |
411 | + self.feed(data) | |
412 | + except HTMLParser.HTMLParseError, e: | |
413 | + raise HTMLTreeError("HTML parse error: " + e.msg, | |
414 | + e.lineno, e.offset) | |
415 | + | |
416 | + # if charset is not given, detect charset | |
417 | + if self.charset == None: | |
418 | + r = self.root() | |
419 | + metas = r.get_elements_by_name("meta") | |
420 | + for meta in metas: | |
421 | + if meta.attrs.get("http-equiv", None) == "Content-Type": | |
422 | + ctype = meta.attrs.get("content", "") | |
423 | + m = re.search(r"charset=([^;]+)", ctype) | |
424 | + if m: | |
425 | + self.charset = m.group(1) | |
426 | + else: | |
427 | + self.charset = None | |
428 | + | |
429 | + if self.charset: | |
430 | + self._htmlroot = HTMLElement(HTMLElement.ROOT) | |
431 | + self._cursor = self._htmlroot | |
432 | + self.feed(data) | |
433 | + | |
434 | + self._finalize() | |
435 | + | |
436 | + def _finalize(self): | |
437 | + r = self.root() | |
438 | + self._r_finalize(r) | |
439 | + | |
440 | + def _r_finalize(self, elem): | |
441 | + if elem.is_text(): | |
442 | + return | |
443 | + | |
444 | + l = len(elem) | |
445 | + if l > 1: | |
446 | + elem[0]._next_elem = elem[1] | |
447 | + elem[-1]._prev_elem = elem[-2] | |
448 | + if l > 2: | |
449 | + for i in range(1, l-1): # 1 to l-2 | |
450 | + elem[i]._prev_elem = elem[i-1] | |
451 | + elem[i]._next_elem = elem[i+1] | |
452 | + | |
453 | + for sub_elem in elem: | |
454 | + self._r_finalize(sub_elem) | |
455 | + | |
456 | + def validate(self): | |
457 | + r = self.root() | |
458 | + self._r_validate(self, e) | |
459 | + | |
460 | + # tools | |
461 | + def _text_encoder(self, text): | |
462 | + # text encode check and convert. | |
463 | + # if charset is given, convert text to unicode type. | |
464 | + val = "" | |
465 | + if self.charset: | |
466 | + try: | |
467 | + val = unicode(text, self.charset) | |
468 | + except TypeError: | |
469 | + # self.charset is utf-8. | |
470 | + val = text | |
471 | + else: | |
472 | + # treat as unicode input | |
473 | + val = text | |
474 | + return val | |
475 | + | |
476 | + def _attr_encoder(self, attrs): | |
477 | + return [(k, self._text_encoder(v)) for (k, v) in attrs] | |
478 | + | |
479 | + # Handlers | |
480 | + def handle_starttag(self, tag, attrs): | |
481 | + # some tags treat as start-end tag. | |
482 | + if tag in self.UNCLOSABLE_TAGS: | |
483 | + return self.handle_startendtag(tag, attrs) | |
484 | + | |
485 | + elem = HTMLElement(HTMLElement.TAG, tag, self._attr_encoder(attrs)) | |
486 | + | |
487 | + if self._option & HTMLTree.USE_VALIDATE > 0: | |
488 | + # try validation (experimental) | |
489 | + if tag == "li" and self._cursor.name == "li": | |
490 | + self.handle_endtag("li") | |
491 | + # end of validation | |
492 | + | |
493 | + elem._parent = self._cursor | |
494 | + self._cursor.append(elem) | |
495 | + self._cursor = elem | |
496 | + | |
497 | + def handle_endtag(self, tag): | |
498 | + # some tags treat as start-end tag. | |
499 | + if tag in self.UNCLOSABLE_TAGS: | |
500 | + return | |
501 | + | |
502 | + self._cursor = self._cursor.parent() | |
503 | + | |
504 | + def handle_startendtag(self, tag, attrs): | |
505 | + elem = HTMLElement(HTMLElement.TAG, tag, self._attr_encoder(attrs)) | |
506 | + elem._parent = self._cursor | |
507 | + self._cursor.append(elem) | |
508 | + | |
509 | + def handle_data(self, data): | |
510 | + if self._option & HTMLTree.IGNORE_BLANK > 0: | |
511 | + if re.search(r"^\s*$", data): | |
512 | + data = "" | |
513 | + | |
514 | + elem = HTMLElement(HTMLElement.TEXT) | |
515 | + elem._parent = self._cursor | |
516 | + | |
517 | + # encode text to utf-8 | |
518 | + elem._text = self._text_encoder(data) | |
519 | + | |
520 | + self._cursor.append(elem) | |
521 | + | |
522 | + def handle_entityref(self, name): | |
523 | + data = "&" + name + ";" | |
524 | + self.handle_data(data) | |
525 | + | |
526 | + def handle_charref(self, ref): | |
527 | + data = "&#" + ref + ";" | |
528 | + self.handle_data(data) | |
529 | + | |
530 | + def handle_decl(self, decl): | |
531 | + elem = HTMLElement(HTMLElement.DECL, decl) | |
532 | + elem._parent = self._cursor | |
533 | + self._cursor.append(elem) | |
534 | + | |
535 | + def handle_comment(self, data): | |
536 | + elem = HTMLElement(HTMLElement.COMMENT, data) | |
537 | + elem._parent = self._cursor | |
538 | + self._cursor.append(elem) | |
539 | + | |
540 | + # Accessor | |
541 | + def root(self): | |
542 | + return self._htmlroot |
@@ -0,0 +1,1 @@ | ||
1 | +../ | |
\ No newline at end of file |
@@ -0,0 +1,15 @@ | ||
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
2 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
3 | +<head> | |
4 | +<title>htmltree.py sample html</title> | |
5 | +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
6 | +<link rel="stylesheet" type="text/css" href="main.css"> | |
7 | +</head> | |
8 | +<body id="htmltree" class="content"> | |
9 | +<div class="main-column"> | |
10 | +<h1 id="header1">This is htmltree</h1> | |
11 | +foo bar hoge hoge | |
12 | +</div> | |
13 | +</didy> | |
14 | +</html> | |
15 | + |
@@ -0,0 +1,54 @@ | ||
1 | +#!/usr/bin/env python | |
2 | + | |
3 | +from htmltree import htmltree | |
4 | +import sys | |
5 | +import re | |
6 | + | |
7 | +try: | |
8 | + input = sys.argv[1] | |
9 | +except IndexError: | |
10 | + sys.exit("%s <input_html>" % sys.argv[0]) | |
11 | + | |
12 | +f = open(input, "r") | |
13 | +html_text = f.read() | |
14 | +f.close() | |
15 | + | |
16 | +t = htmltree.HTMLTree() | |
17 | +t.parse(html_text) | |
18 | + | |
19 | +r = t.root() | |
20 | + | |
21 | +titles = r.select_by_name2("head", "title") | |
22 | +if len(titles) > 0: | |
23 | + for title in titles: | |
24 | + for item in title: | |
25 | + print item | |
26 | + print item.text() | |
27 | + | |
28 | +rn = htmltree.HTMLRenderer() | |
29 | +print rn.render(r) | |
30 | + | |
31 | +t = r.get_element_by_id("htmltree") | |
32 | +print t.inner_html() | |
33 | + | |
34 | +t = r.get_element_by_id("header1") | |
35 | +print t.inner_html() | |
36 | + | |
37 | +t = r.select_1st("#header1") | |
38 | +print t.inner_html() | |
39 | + | |
40 | +print "test_remove" | |
41 | +attrs = { | |
42 | + "rel": "stylesheet", | |
43 | + "type": "text/css", | |
44 | + "href": "main.css" | |
45 | + } | |
46 | +elems = r.get_elements("link", attrs) | |
47 | +for elem in elems: | |
48 | + elem.delete() | |
49 | +print r.inner_html() | |
50 | + | |
51 | +t = r.select_1st("#htmltree") | |
52 | +t.delete() | |
53 | +print r.inner_html() | |
54 | + |