Browse Source

correction du parseur html aprés changement sur le site

master
antoine 3 years ago
parent
commit
0c56442ad0
8 changed files with 80 additions and 22 deletions
  1. +2
    -0
      .gitignore
  2. +72
    -15
      VDMAPI/HtmlParser.py
  3. +1
    -2
      VDMAPI/VDM.py
  4. +1
    -1
      VDMAPI/version.py
  5. BIN
      dist/VDM API-1.1.tar.gz
  6. BIN
      dist/VDM_API-1.1-py3-none-any.whl
  7. +1
    -1
      generateWheel.md
  8. +3
    -3
      setup.py

+ 2
- 0
.gitignore View File

@@ -1,2 +1,4 @@
.idea/
.vscode/
*__pycache__/
dist/

+ 72
- 15
VDMAPI/HtmlParser.py View File

@@ -5,25 +5,82 @@ class MyHTMLParser(HTMLParser):
def __init__(self):
super(MyHTMLParser, self).__init__()
self.vdm = []
self.save = 0

def _searchClass(self, listTuple, className="post article"):
for v in listTuple:
if v[0] == 'class' and v[1] == className:
return True
return False
# self.article = False
# self.panel = False
# self.panelBody = False
# self.panelContent = False
# self.p = False
# self.a = False

def handle_starttag(self, tag, attrs):
if tag == "p" and self._searchClass(attrs, className="block"):
self.save = 1
# self.skip = []

# @classmethod
# def _hasClass(cls, list_tuple, class_name):
# for name_value in list_tuple:
# array_class = name_value[1].strip().split(" ")
# array_class = [ x.strip() for x in array_class ]
# contain_all = set(array_class).issubset(class_name)

# if name_value[0] == 'class' and contain_all:
# return True
# return False

# def handle_starttag(self, tag, attrs):
# # print(self.get_starttag_text())
# if tag == "article" and self._hasClass(attrs, ["art-panel", "col-xs-12"]):
# self.article = True
# self.skip = []
# else:
# if tag == "div" and self._hasClass(attrs, ["panel", "panel-default"]) and self.article:
# self.panel = True
# else:
# if tag == "div" and self._hasClass(attrs, ["panel-body"]) and self.article and self.panel:
# self.panelBody = True
# else:
# if tag == "div" and self._hasClass(attrs, ["panel-content"]) and self.article and self.panel and self.panelBody:
# self.panelContent = True
# else:
# if tag == "p" and self._hasClass(attrs, ["block", "hidden-xs"]) and self.article and self.panel and self.panelBody and self.panelContent:
# self.p = True
# else:
# if tag == "a" and self.article and self.panel and self.panelBody and self.panelContent and self.p:
# self.a = True
# else:
# self.skip.append(self.get_starttag_text())

# def handle_endtag(self, tag):
# if len(self.skip) == 0:
# if tag == "a" and self.a:
# self.a = False
# else:
# if tag == "p" and self.p:
# self.p = False
# else:
# if tag == "div" and self.panelContent:
# self.panelContent = False
# else:
# if tag == "div" and self.panelBody:
# self.panelBody = False
# else:
# if tag == "div" and self.panel:
# self.panel = False
# else:
# if tag == "article" and self.article:
# self.article = False
# else:
# self.skip.pop()

if tag == "a" and self.save == 1:
self.save = 2
def handle_data(self, data):
if self.save == 2:
# print("#"+data+"#")

# if self.article and self.panel and self.panelBody and self.panelContent and self.p and self.a:
# self.vdm.append(data.strip())

data = data.strip()
if data.startswith("Aujourd'hui,") and data.endswith("VDM"):
self.vdm.append(data.strip())
self.save = 0

def getText(self):
return self.vdm
return self.vdm

+ 1
- 2
VDMAPI/VDM.py View File

@@ -32,5 +32,4 @@ class VDM(object):

class errorVDM(Exception):
def __init__(self, message):
super(errorVDM, self).__init__(message)
super(errorVDM, self).__init__(message)

+ 1
- 1
VDMAPI/version.py View File

@@ -1 +1 @@
version = '1.1'
version = '1.2'

BIN
dist/VDM API-1.1.tar.gz View File


BIN
dist/VDM_API-1.1-py3-none-any.whl View File


+ 1
- 1
generateWheel.md View File

@@ -2,6 +2,6 @@

pip uninstall VDM-API
pip wheel --wheel-dir=dist ./
pip install dist/VDM_API-1.1-py3-none-any.whl
pip install dist/VDM_API-1.2-py3-none-any.whl

# python setup.py sdist generate tar.gz archive setupable with pip

+ 3
- 3
setup.py View File

@@ -5,11 +5,11 @@ VDMAPI
import os
import sys
from setuptools import find_packages, setup
from VDMAPI.version import version

module_path = os.path.join(
os.path.dirname(os.path.realpath(__file__)), 'VDMAPI')
sys.path.insert(0, module_path)
from version import version
sys.path.remove(module_path)


@@ -28,8 +28,8 @@ setup(
author_email='antoinroux@hotmail.fr',
description='VDM API: an API for recover random VDM from vdm.fr website',
long_description=read('README.rst'),
url='git://176.189.130.29/python/vdmAPI.git',
download_url='git://176.189.130.29/python/vdmAPI.git/tags',
url='git://antoine-roux.fr.to/python/vdmAPI.git',
download_url='git://antoine-roux.fr.to/python/vdmAPI.git/tags',
license='Beerware',
platforms='any',
packages=find_packages(),


Loading…
Cancel
Save