扫码关注官方订阅号
参考代码,BeautifulSoup的用法可以阅读官方文档
html = '<input class="input-xlarge focused" id="listCode" name="listCode" readonly="true" type="text" value="001"></input><input class="input-xlarge focused" id="type" name="type" readonly="true" type="text" value="002"></input><input class="input-xlarge focused" id="yyc" name="yyc" readonly="true" type="text" value="yyzz"></input>' from bs4 import BeautifulSoup soup = BeautifulSoup(html, "lxml") content = dict() datas = soup.find_all("input", class_="input-xlarge focused") for data in datas: content[data["name"]] = data["value"] print(content)
txt = "内容" import re inputTxt = re.compile(r'<input.*?</input>', re.S) nameTxt = re.compile(r'name="(.*?)"') valueTxt = re.compile(r'value="(.*?)"') content = {} for i in re.findall(inputTxt, txt): content[re.findall(nameTxt,i)[0]] = re.findall(valueTxt, i)[0] print(content)
同样功能用HTMLParser实现了一下:
from HTMLParser import HTMLParser from htmlentitydefs import name2codepoint class MyHTMLParser(HTMLParser): def __init__(self): self.input_tag_d = {} HTMLParser.__init__(self) # super(MyHTMLParser, self).__init__() def handle_starttag(self, tag, attrs): if tag != 'input': return for attr in attrs: if attr[0] == 'name': self.input_tag_d[attr[1]] = '' for attr in attrs: if attr[0] == 'name': name = attr[1] if attr[0] == 'value' : self.input_tag_d[name] = attr[1] parser = MyHTMLParser() html_str = '''<input class="input-xlarge focused" id="listCode" name="listCode" readonly="true" type="text" value="001"> </input> <input class="input-xlarge focused" id="type" name="type" readonly="true" type="text" value="002"> </input> <input class="input-xlarge focused" id="yyc" name="yyc" readonly="true" type="text" value="yyzz"> </input>''' parser.feed(html_str) print(parser.input_tag_d) >>> {'type': '002', 'yyc': 'yyzz', 'listCode': '001'}
只用自带re模块不是更容易实现?
s = '''<input class="input-xlarge focused" id="listCode" name="listCode" readonly="true" type="text" value="001"> </input> <input class="input-xlarge focused" id="type" name="type" readonly="true" type="text" value="002"> </input> <input class="input-xlarge focused" id="yyc" name="yyc" readonly="true" type="text" value="yyzz"> </input>''' import re compile = r'name="(\S+)".*value="(\S+)"' matches = re.finditer(compile,s) result = dict() for match in matches: result[match.group(1)] = match.group(2) print(result) {'yyc': 'yyzz', 'type': '002', 'listCode': '001'}
微信扫码关注PHP中文网服务号
QQ扫码加入技术交流群
Copyright 2014-2025 https://www.php.cn/ All Rights Reserved | php.cn | 湘ICP备2023035733号
PHP学习
技术支持
返回顶部
参考代码,BeautifulSoup的用法可以阅读官方文档
re方法
同样功能用HTMLParser实现了一下:
只用自带re模块不是更容易实现?