1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
| __author__ = 'yangpeiwen'
from urllib import * from bs4 import BeautifulSoup import socket import os
url = "http://wap.jdxs.net/index.php/book/chapter/bid=100722/cid=16819732/" socket.setdefaulttimeout(3) path = "xs"
socket.setdefaulttimeout(3) if os.path.exists(path) is False: os.makedirs(path) f = open(path+"/xs.txt", "w")
always = True while always: ci = 0 try: html = urlopen(url).read() soup = BeautifulSoup(html) text = soup.find(attrs={'class': 'chapter'}).text print text f.write(text.encode("utf-8")) if soup.find(id='btnNext').text.encode('utf-8').index('下一章') == 0: url = "http://wap.jdxs.net/" + soup.find(id='btnNext')['href'] print url else: break except IOError: ci += 1 if ci > 10: always = False f.close()
|