艺海无涯,学无止境。欢迎一起探讨技术和人生,了解我

Python爬取网页用正则解析链接地址写入MongoDB

from pymongo import MongoClient
from urllib import request
import re

client=MongoClient('localhost',27017)
db=client.NLog
collection=db.Test
response=request.urlopen("https://mayb.cn") html=response.read() html=html.decode("utf-8") html=html.replace(" "," ") urls=re.findall(r"<a.*?href=.*?<\/a>",html,re.I) for i in urls: collection.insert({"domain":"mayb.cn","msg":i})