correction du parseur html aprés changement sur le site
This commit is contained in:
parent
51d120df26
commit
0c56442ad0
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +1,4 @@
|
||||
.idea/
|
||||
.vscode/
|
||||
*__pycache__/
|
||||
dist/
|
||||
|
@ -5,25 +5,82 @@ class MyHTMLParser(HTMLParser):
|
||||
def __init__(self):
|
||||
super(MyHTMLParser, self).__init__()
|
||||
self.vdm = []
|
||||
self.save = 0
|
||||
|
||||
def _searchClass(self, listTuple, className="post article"):
|
||||
for v in listTuple:
|
||||
if v[0] == 'class' and v[1] == className:
|
||||
return True
|
||||
return False
|
||||
# self.article = False
|
||||
# self.panel = False
|
||||
# self.panelBody = False
|
||||
# self.panelContent = False
|
||||
# self.p = False
|
||||
# self.a = False
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag == "p" and self._searchClass(attrs, className="block"):
|
||||
self.save = 1
|
||||
# self.skip = []
|
||||
|
||||
# @classmethod
|
||||
# def _hasClass(cls, list_tuple, class_name):
|
||||
# for name_value in list_tuple:
|
||||
# array_class = name_value[1].strip().split(" ")
|
||||
# array_class = [ x.strip() for x in array_class ]
|
||||
# contain_all = set(array_class).issubset(class_name)
|
||||
|
||||
# if name_value[0] == 'class' and contain_all:
|
||||
# return True
|
||||
# return False
|
||||
|
||||
# def handle_starttag(self, tag, attrs):
|
||||
# # print(self.get_starttag_text())
|
||||
# if tag == "article" and self._hasClass(attrs, ["art-panel", "col-xs-12"]):
|
||||
# self.article = True
|
||||
# self.skip = []
|
||||
# else:
|
||||
# if tag == "div" and self._hasClass(attrs, ["panel", "panel-default"]) and self.article:
|
||||
# self.panel = True
|
||||
# else:
|
||||
# if tag == "div" and self._hasClass(attrs, ["panel-body"]) and self.article and self.panel:
|
||||
# self.panelBody = True
|
||||
# else:
|
||||
# if tag == "div" and self._hasClass(attrs, ["panel-content"]) and self.article and self.panel and self.panelBody:
|
||||
# self.panelContent = True
|
||||
# else:
|
||||
# if tag == "p" and self._hasClass(attrs, ["block", "hidden-xs"]) and self.article and self.panel and self.panelBody and self.panelContent:
|
||||
# self.p = True
|
||||
# else:
|
||||
# if tag == "a" and self.article and self.panel and self.panelBody and self.panelContent and self.p:
|
||||
# self.a = True
|
||||
# else:
|
||||
# self.skip.append(self.get_starttag_text())
|
||||
|
||||
# def handle_endtag(self, tag):
|
||||
# if len(self.skip) == 0:
|
||||
# if tag == "a" and self.a:
|
||||
# self.a = False
|
||||
# else:
|
||||
# if tag == "p" and self.p:
|
||||
# self.p = False
|
||||
# else:
|
||||
# if tag == "div" and self.panelContent:
|
||||
# self.panelContent = False
|
||||
# else:
|
||||
# if tag == "div" and self.panelBody:
|
||||
# self.panelBody = False
|
||||
# else:
|
||||
# if tag == "div" and self.panel:
|
||||
# self.panel = False
|
||||
# else:
|
||||
# if tag == "article" and self.article:
|
||||
# self.article = False
|
||||
# else:
|
||||
# self.skip.pop()
|
||||
|
||||
if tag == "a" and self.save == 1:
|
||||
self.save = 2
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.save == 2:
|
||||
# print("#"+data+"#")
|
||||
|
||||
# if self.article and self.panel and self.panelBody and self.panelContent and self.p and self.a:
|
||||
# self.vdm.append(data.strip())
|
||||
|
||||
data = data.strip()
|
||||
if data.startswith("Aujourd'hui,") and data.endswith("VDM"):
|
||||
self.vdm.append(data.strip())
|
||||
self.save = 0
|
||||
|
||||
|
||||
def getText(self):
|
||||
return self.vdm
|
||||
return self.vdm
|
||||
|
@ -32,5 +32,4 @@ class VDM(object):
|
||||
|
||||
class errorVDM(Exception):
|
||||
def __init__(self, message):
|
||||
super(errorVDM, self).__init__(message)
|
||||
|
||||
super(errorVDM, self).__init__(message)
|
@ -1 +1 @@
|
||||
version = '1.1'
|
||||
version = '1.2'
|
||||
|
BIN
dist/VDM API-1.1.tar.gz
vendored
BIN
dist/VDM API-1.1.tar.gz
vendored
Binary file not shown.
BIN
dist/VDM_API-1.1-py3-none-any.whl
vendored
BIN
dist/VDM_API-1.1-py3-none-any.whl
vendored
Binary file not shown.
@ -2,6 +2,6 @@
|
||||
|
||||
pip uninstall VDM-API
|
||||
pip wheel --wheel-dir=dist ./
|
||||
pip install dist/VDM_API-1.1-py3-none-any.whl
|
||||
pip install dist/VDM_API-1.2-py3-none-any.whl
|
||||
|
||||
# python setup.py sdist generate tar.gz archive setupable with pip
|
||||
|
6
setup.py
6
setup.py
@ -5,11 +5,11 @@ VDMAPI
|
||||
import os
|
||||
import sys
|
||||
from setuptools import find_packages, setup
|
||||
from VDMAPI.version import version
|
||||
|
||||
module_path = os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)), 'VDMAPI')
|
||||
sys.path.insert(0, module_path)
|
||||
from version import version
|
||||
sys.path.remove(module_path)
|
||||
|
||||
|
||||
@ -28,8 +28,8 @@ setup(
|
||||
author_email='antoinroux@hotmail.fr',
|
||||
description='VDM API: an API for recover random VDM from vdm.fr website',
|
||||
long_description=read('README.rst'),
|
||||
url='git://176.189.130.29/python/vdmAPI.git',
|
||||
download_url='git://176.189.130.29/python/vdmAPI.git/tags',
|
||||
url='git://antoine-roux.fr.to/python/vdmAPI.git',
|
||||
download_url='git://antoine-roux.fr.to/python/vdmAPI.git/tags',
|
||||
license='Beerware',
|
||||
platforms='any',
|
||||
packages=find_packages(),
|
||||
|
Loading…
Reference in New Issue
Block a user