python每日抓取电影天堂首页最新电影推送到微信
import requests
from lxml import etree
class Movie(object):
def __init__(self):
self.url = 'https://www.dytt8.net/html/gndy/dyzz/index.html'
self.headers ={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36"}
self.movies = []
self.html = ''
def get_all(self):
movies = ''
r = requests.get(url=self.url, headers=self.headers)
r.encoding = 'gb2312'
# print(r.text)
tree = etree.HTML(r.text)
tables = tree.xpath('//div[@class="co_content8"]/ul/td/table')
# print(tables)
for table in tables:
title = table.xpath('./tr[2]/td[2]/b/a/text()')[0]
htmls = table.xpath('./tr[2]/td[2]/b/a/@href')[0]
html = 'https://www.dytt8.net' + htmls
# print(title)
self.movies.append(title + '\n' + html + '\n')
self.html = "".join(self.movies)
return self.html
dy = Movie()
# print(dy.get_all())
dy.get_all()