correction du parseur html aprés changement sur le site
This commit is contained in:
parent
51d120df26
commit
0c56442ad0
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +1,4 @@
|
|||||||
.idea/
|
.idea/
|
||||||
|
.vscode/
|
||||||
*__pycache__/
|
*__pycache__/
|
||||||
|
dist/
|
||||||
|
@ -5,25 +5,82 @@ class MyHTMLParser(HTMLParser):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(MyHTMLParser, self).__init__()
|
super(MyHTMLParser, self).__init__()
|
||||||
self.vdm = []
|
self.vdm = []
|
||||||
self.save = 0
|
|
||||||
|
|
||||||
def _searchClass(self, listTuple, className="post article"):
|
# self.article = False
|
||||||
for v in listTuple:
|
# self.panel = False
|
||||||
if v[0] == 'class' and v[1] == className:
|
# self.panelBody = False
|
||||||
return True
|
# self.panelContent = False
|
||||||
return False
|
# self.p = False
|
||||||
|
# self.a = False
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
# self.skip = []
|
||||||
if tag == "p" and self._searchClass(attrs, className="block"):
|
|
||||||
self.save = 1
|
# @classmethod
|
||||||
|
# def _hasClass(cls, list_tuple, class_name):
|
||||||
|
# for name_value in list_tuple:
|
||||||
|
# array_class = name_value[1].strip().split(" ")
|
||||||
|
# array_class = [ x.strip() for x in array_class ]
|
||||||
|
# contain_all = set(array_class).issubset(class_name)
|
||||||
|
|
||||||
|
# if name_value[0] == 'class' and contain_all:
|
||||||
|
# return True
|
||||||
|
# return False
|
||||||
|
|
||||||
|
# def handle_starttag(self, tag, attrs):
|
||||||
|
# # print(self.get_starttag_text())
|
||||||
|
# if tag == "article" and self._hasClass(attrs, ["art-panel", "col-xs-12"]):
|
||||||
|
# self.article = True
|
||||||
|
# self.skip = []
|
||||||
|
# else:
|
||||||
|
# if tag == "div" and self._hasClass(attrs, ["panel", "panel-default"]) and self.article:
|
||||||
|
# self.panel = True
|
||||||
|
# else:
|
||||||
|
# if tag == "div" and self._hasClass(attrs, ["panel-body"]) and self.article and self.panel:
|
||||||
|
# self.panelBody = True
|
||||||
|
# else:
|
||||||
|
# if tag == "div" and self._hasClass(attrs, ["panel-content"]) and self.article and self.panel and self.panelBody:
|
||||||
|
# self.panelContent = True
|
||||||
|
# else:
|
||||||
|
# if tag == "p" and self._hasClass(attrs, ["block", "hidden-xs"]) and self.article and self.panel and self.panelBody and self.panelContent:
|
||||||
|
# self.p = True
|
||||||
|
# else:
|
||||||
|
# if tag == "a" and self.article and self.panel and self.panelBody and self.panelContent and self.p:
|
||||||
|
# self.a = True
|
||||||
|
# else:
|
||||||
|
# self.skip.append(self.get_starttag_text())
|
||||||
|
|
||||||
|
# def handle_endtag(self, tag):
|
||||||
|
# if len(self.skip) == 0:
|
||||||
|
# if tag == "a" and self.a:
|
||||||
|
# self.a = False
|
||||||
|
# else:
|
||||||
|
# if tag == "p" and self.p:
|
||||||
|
# self.p = False
|
||||||
|
# else:
|
||||||
|
# if tag == "div" and self.panelContent:
|
||||||
|
# self.panelContent = False
|
||||||
|
# else:
|
||||||
|
# if tag == "div" and self.panelBody:
|
||||||
|
# self.panelBody = False
|
||||||
|
# else:
|
||||||
|
# if tag == "div" and self.panel:
|
||||||
|
# self.panel = False
|
||||||
|
# else:
|
||||||
|
# if tag == "article" and self.article:
|
||||||
|
# self.article = False
|
||||||
|
# else:
|
||||||
|
# self.skip.pop()
|
||||||
|
|
||||||
if tag == "a" and self.save == 1:
|
|
||||||
self.save = 2
|
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
if self.save == 2:
|
# print("#"+data+"#")
|
||||||
|
|
||||||
|
# if self.article and self.panel and self.panelBody and self.panelContent and self.p and self.a:
|
||||||
|
# self.vdm.append(data.strip())
|
||||||
|
|
||||||
|
data = data.strip()
|
||||||
|
if data.startswith("Aujourd'hui,") and data.endswith("VDM"):
|
||||||
self.vdm.append(data.strip())
|
self.vdm.append(data.strip())
|
||||||
self.save = 0
|
|
||||||
|
|
||||||
def getText(self):
|
def getText(self):
|
||||||
return self.vdm
|
return self.vdm
|
||||||
|
@ -32,5 +32,4 @@ class VDM(object):
|
|||||||
|
|
||||||
class errorVDM(Exception):
|
class errorVDM(Exception):
|
||||||
def __init__(self, message):
|
def __init__(self, message):
|
||||||
super(errorVDM, self).__init__(message)
|
super(errorVDM, self).__init__(message)
|
||||||
|
|
@ -1 +1 @@
|
|||||||
version = '1.1'
|
version = '1.2'
|
||||||
|
BIN
dist/VDM API-1.1.tar.gz
vendored
BIN
dist/VDM API-1.1.tar.gz
vendored
Binary file not shown.
BIN
dist/VDM_API-1.1-py3-none-any.whl
vendored
BIN
dist/VDM_API-1.1-py3-none-any.whl
vendored
Binary file not shown.
@ -2,6 +2,6 @@
|
|||||||
|
|
||||||
pip uninstall VDM-API
|
pip uninstall VDM-API
|
||||||
pip wheel --wheel-dir=dist ./
|
pip wheel --wheel-dir=dist ./
|
||||||
pip install dist/VDM_API-1.1-py3-none-any.whl
|
pip install dist/VDM_API-1.2-py3-none-any.whl
|
||||||
|
|
||||||
# python setup.py sdist generate tar.gz archive setupable with pip
|
# python setup.py sdist generate tar.gz archive setupable with pip
|
||||||
|
6
setup.py
6
setup.py
@ -5,11 +5,11 @@ VDMAPI
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from setuptools import find_packages, setup
|
from setuptools import find_packages, setup
|
||||||
|
from VDMAPI.version import version
|
||||||
|
|
||||||
module_path = os.path.join(
|
module_path = os.path.join(
|
||||||
os.path.dirname(os.path.realpath(__file__)), 'VDMAPI')
|
os.path.dirname(os.path.realpath(__file__)), 'VDMAPI')
|
||||||
sys.path.insert(0, module_path)
|
sys.path.insert(0, module_path)
|
||||||
from version import version
|
|
||||||
sys.path.remove(module_path)
|
sys.path.remove(module_path)
|
||||||
|
|
||||||
|
|
||||||
@ -28,8 +28,8 @@ setup(
|
|||||||
author_email='antoinroux@hotmail.fr',
|
author_email='antoinroux@hotmail.fr',
|
||||||
description='VDM API: an API for recover random VDM from vdm.fr website',
|
description='VDM API: an API for recover random VDM from vdm.fr website',
|
||||||
long_description=read('README.rst'),
|
long_description=read('README.rst'),
|
||||||
url='git://176.189.130.29/python/vdmAPI.git',
|
url='git://antoine-roux.fr.to/python/vdmAPI.git',
|
||||||
download_url='git://176.189.130.29/python/vdmAPI.git/tags',
|
download_url='git://antoine-roux.fr.to/python/vdmAPI.git/tags',
|
||||||
license='Beerware',
|
license='Beerware',
|
||||||
platforms='any',
|
platforms='any',
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
|
Loading…
Reference in New Issue
Block a user