headers = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36", }
# 实现翻页爬取 for i inrange(1, 6): url = base_url.format(i) html = requests.get(url=url, headers=headers).text
f = open('./课程/04course/04_%d.html' % i, 'w', encoding='utf-8') f.write(html) f.close()
root = etree.HTML(html) trs = root.xpath('//tr')
f = open('./课程/04course/data04_%d.txt' % i, 'w', encoding='utf-8') for tr in trs: tds = tr.xpath('./td') s = '' for td in tds: s = s + str(td.xpath('string(.)')) + '|' print(s) if s!= '': f.write(s + '\n')