from html.parser import HTMLParser from html.entities import name2codepoint class MyHTMLParser(HTMLParser): def __init__(self): super(MyHTMLParser, self).__init__() self.vdm = [] self.save = False def _searchClass(self, listTuple, className="post article"): for v in listTuple: if v[0] == 'class' and v[1] == className: return True return False def handle_starttag(self, tag, attrs): if tag == "p" and self._searchClass(attrs, className="content"): self.save = True def handle_data(self, data): if self.save: self.vdm.append(data) self.save = False def getText(self): return self.vdm