Create your Gitee Account
Explore and code with more than 6 million developers,Free private repositories !:)
Sign up
Clone or download
picpicker_mt.py 14.18 KB
Copy Edit Web IDE Raw Blame History
kzeng_jack authored 2017-03-15 19:56 . a
import threading
from time import ctime,sleep
import os
from urllib import request
from bs4 import BeautifulSoup
def down_image(url,file_name):
req = request.Request(url = url)
req.add_header('User-Agent', user_agent_str)
binary_data = request.urlopen(req).read()
temp_file = open(file_name, 'wb')
temp_file.write(binary_data)
temp_file.close()
def loop1():
print('=====>starting picpicer#1 at:', ctime())
f = open('isbn.txt', 'r', -1, 'utf-8')
for line in f.readlines()[0:s12]:
isbn = line.strip()
print('----->http://search.jd.com/Search?keyword=' + isbn)
req = request.Request('http://search.jd.com/Search?keyword='+isbn)
req.add_header('User-Agent', user_agent_str)
try:
with request.urlopen(req) as f:
data = f.read()
soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
# print(i.find_all('img'))
for imgs in i.find_all('img', limit=1):
try:
if imgs.get('src') is None:
print(imgs.get('src'))
else:
url = 'http:' + imgs.get('src')
print(url)
down_image(url, img_dir + '\\' + isbn + '.jpg')
except Exception as e:
print('=====>Error:', e)
except Exception as e:
print('=-===>Error:', e)
def loop2():
print('=====>starting picpicer#2 at:', ctime())
f = open('isbn.txt', 'r', -1, 'utf-8')
for line in f.readlines()[s21:s22]:
isbn = line.strip()
print('----->http://search.jd.com/Search?keyword=' + isbn)
req = request.Request('http://search.jd.com/Search?keyword='+isbn)
req.add_header('User-Agent', user_agent_str)
try:
with request.urlopen(req) as f:
data = f.read()
soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
# print(i.find_all('img'))
for imgs in i.find_all('img', limit=1):
try:
if imgs.get('src') is None:
print(imgs.get('src'))
else:
url = 'http:' + imgs.get('src')
print(url)
down_image(url, img_dir + '\\' + isbn + '.jpg')
except Exception as e:
print('=====>Error:', e)
except Exception as e:
print('=-===>Error:', e)
def loop3():
print('=====>starting picpicer#3 at:', ctime())
f = open('isbn.txt', 'r', -1, 'utf-8')
for line in f.readlines()[s31:s32]:
isbn = line.strip()
print('----->http://search.jd.com/Search?keyword=' + isbn)
req = request.Request('http://search.jd.com/Search?keyword='+isbn)
req.add_header('User-Agent', user_agent_str)
try:
with request.urlopen(req) as f:
data = f.read()
soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
# print(i.find_all('img'))
for imgs in i.find_all('img', limit=1):
try:
if imgs.get('src') is None:
print(imgs.get('src'))
else:
url = 'http:' + imgs.get('src')
print(url)
down_image(url, img_dir + '\\' + isbn + '.jpg')
except Exception as e:
print('=====>Error:', e)
except Exception as e:
print('=-===>Error:', e)
def loop4():
print('=====>starting picpicer#4 at:', ctime())
f = open('isbn.txt', 'r', -1, 'utf-8')
for line in f.readlines()[s41:s42]:
isbn = line.strip()
print('----->http://search.jd.com/Search?keyword=' + isbn)
req = request.Request('http://search.jd.com/Search?keyword='+isbn)
req.add_header('User-Agent', user_agent_str)
try:
with request.urlopen(req) as f:
data = f.read()
soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
# print(i.find_all('img'))
for imgs in i.find_all('img', limit=1):
try:
if imgs.get('src') is None:
print(imgs.get('src'))
else:
url = 'http:' + imgs.get('src')
print(url)
down_image(url, img_dir + '\\' + isbn + '.jpg')
except Exception as e:
print('=====>Error:', e)
except Exception as e:
print('=-===>Error:', e)
def loop5():
print('=====>starting picpicer#5 at:', ctime())
f = open('isbn.txt', 'r', -1, 'utf-8')
for line in f.readlines()[s51:s52]:
isbn = line.strip()
print('----->http://search.jd.com/Search?keyword=' + isbn)
req = request.Request('http://search.jd.com/Search?keyword='+isbn)
req.add_header('User-Agent', user_agent_str)
try:
with request.urlopen(req) as f:
data = f.read()
soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
# print(i.find_all('img'))
for imgs in i.find_all('img', limit=1):
try:
if imgs.get('src') is None:
print(imgs.get('src'))
else:
url = 'http:' + imgs.get('src')
print(url)
down_image(url, img_dir + '\\' + isbn + '.jpg')
except Exception as e:
print('=====>Error:', e)
except Exception as e:
print('=-===>Error:', e)
def loop6():
print('=====>starting picpicer#6 at:', ctime())
f = open('isbn.txt', 'r', -1, 'utf-8')
for line in f.readlines()[s61:s62]:
isbn = line.strip()
print('----->http://search.jd.com/Search?keyword=' + isbn)
req = request.Request('http://search.jd.com/Search?keyword='+isbn)
req.add_header('User-Agent', user_agent_str)
try:
with request.urlopen(req) as f:
data = f.read()
soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
# print(i.find_all('img'))
for imgs in i.find_all('img', limit=1):
try:
if imgs.get('src') is None:
print(imgs.get('src'))
else:
url = 'http:' + imgs.get('src')
print(url)
down_image(url, img_dir + '\\' + isbn + '.jpg')
except Exception as e:
print('=====>Error:', e)
except Exception as e:
print('=-===>Error:', e)
def loop7():
print('=====>starting picpicer#7 at:', ctime())
f = open('isbn.txt', 'r', -1, 'utf-8')
for line in f.readlines()[s71:s72]:
isbn = line.strip()
print('----->http://search.jd.com/Search?keyword=' + isbn)
req = request.Request('http://search.jd.com/Search?keyword='+isbn)
req.add_header('User-Agent', user_agent_str)
try:
with request.urlopen(req) as f:
data = f.read()
soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
# print(i.find_all('img'))
for imgs in i.find_all('img', limit=1):
try:
if imgs.get('src') is None:
print(imgs.get('src'))
else:
url = 'http:' + imgs.get('src')
print(url)
down_image(url, img_dir + '\\' + isbn + '.jpg')
except Exception as e:
print('=====>Error:', e)
except Exception as e:
print('=-===>Error:', e)
def loop8():
print('=====>starting picpicer#8 at:', ctime())
f = open('isbn.txt', 'r', -1, 'utf-8')
for line in f.readlines()[s81:s82]:
isbn = line.strip()
print('----->http://search.jd.com/Search?keyword=' + isbn)
req = request.Request('http://search.jd.com/Search?keyword='+isbn)
req.add_header('User-Agent', user_agent_str)
try:
with request.urlopen(req) as f:
data = f.read()
soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
# print(i.find_all('img'))
for imgs in i.find_all('img', limit=1):
try:
if imgs.get('src') is None:
print(imgs.get('src'))
else:
url = 'http:' + imgs.get('src')
print(url)
down_image(url, img_dir + '\\' + isbn + '.jpg')
except Exception as e:
print('=====>Error:', e)
except Exception as e:
print('=-===>Error:', e)
def loop9():
print('=====>starting picpicer#9 at:', ctime())
f = open('isbn.txt', 'r', -1, 'utf-8')
for line in f.readlines()[s91:s92]:
isbn = line.strip()
print('----->http://search.jd.com/Search?keyword=' + isbn)
req = request.Request('http://search.jd.com/Search?keyword='+isbn)
req.add_header('User-Agent', user_agent_str)
try:
with request.urlopen(req) as f:
data = f.read()
soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
# print(i.find_all('img'))
for imgs in i.find_all('img', limit=1):
try:
if imgs.get('src') is None:
print(imgs.get('src'))
else:
url = 'http:' + imgs.get('src')
print(url)
down_image(url, img_dir + '\\' + isbn + '.jpg')
except Exception as e:
print('=====>Error:', e)
except Exception as e:
print('=-===>Error:', e)
def loop10():
print('=====>starting picpicer#10 at:', ctime())
f = open('isbn.txt', 'r', -1, 'utf-8')
for line in f.readlines()[s101:s102]:
isbn = line.strip()
print('----->http://search.jd.com/Search?keyword=' + isbn)
req = request.Request('http://search.jd.com/Search?keyword='+isbn)
req.add_header('User-Agent', user_agent_str)
try:
with request.urlopen(req) as f:
data = f.read()
soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
# print(i.find_all('img'))
for imgs in i.find_all('img', limit=1):
try:
if imgs.get('src') is None:
print(imgs.get('src'))
else:
url = 'http:' + imgs.get('src')
print(url)
down_image(url, img_dir + '\\' + isbn + '.jpg')
except Exception as e:
print('=====>Error:', e)
except Exception as e:
print('=-===>Error:', e)
user_agent_str = 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) \
AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25'
lines = open('isbn.txt', 'r', -1, 'utf-8').readlines()
lines_cnt = int(len(lines))
# print(lines_cnt)
# print('---'*5)
s11 = 1
s12 = int(lines_cnt/10)
s21 = s12 + 1
s22 = int(lines_cnt*2/10)
s31 = s22 + 1
s32 = int(lines_cnt*3/10)
s41 = s32 + 1
s42 = int(lines_cnt*4/10)
s51 = s42 + 1
s52 = int(lines_cnt*5/10)
s61 = s52 + 1
s62 = int(lines_cnt*6/10)
s71 = s62 + 1
s72 = int(lines_cnt*7/10)
s81 = s72 + 1
s82 = int(lines_cnt*8/10)
s91 = s82 + 1
s92 = int(lines_cnt*9/10)
s101 = s92 + 1
s102 = lines_cnt - 1
threads = []
t1 = threading.Thread(target=loop1)
threads.append(t1)
t2 = threading.Thread(target=loop2)
threads.append(t2)
t3 = threading.Thread(target=loop3)
threads.append(t3)
t4 = threading.Thread(target=loop4)
threads.append(t4)
t5 = threading.Thread(target=loop5)
threads.append(t5)
t6 = threading.Thread(target=loop6)
threads.append(t6)
t7 = threading.Thread(target=loop7)
threads.append(t7)
t8 = threading.Thread(target=loop8)
threads.append(t8)
t9 = threading.Thread(target=loop9)
threads.append(t9)
t10 = threading.Thread(target=loop10)
threads.append(t10)
if __name__ == '__main__':
img_dir = ".\cover"
if not os.path.isdir(img_dir):
os.mkdir(img_dir)
print('----->starting picpicker at:', ctime())
for t in threads:
t.setDaemon(False)
t.start()
print("All over", ctime())

Comment ( 0 )

Sign in for post a comment