去评论
就爱江湖 Www.92Jh.Cn

猫眼电影top100海报爬取和生成照片墙

q3196355
2019/08/22 10:39:35
话不多说先上结果



最后生成的照片墙




以下是源码


01

02

03

04

05

06

07

08

09

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

import requests
from bs4 import BeautifulSoup
import re
import time
import os
import math
import pickle
from PIL import Image

#存图片的路径
path = 'result/'

headers={
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'
}

#请求返回网页源码
def get_html(url):
    response = requests.get(url,headers=headers)
    if response.status_code ==200:
        return response.text
    else:
        print('请求网页失败')

#解析网页源码得到影片title和海报src
def parse(html):
    soup = BeautifulSoup(html,'lxml')
    dl = soup.select('#app > div > div > div.main > dl')[0]
    dds = dl('dd')
    for dd in dds:
        title = dd.find('a')['title']
        star = dd.select('.star')[0].string.strip()
        img = dd.select('img.board-img')[0]['data-src']
        yield {
            'title':title,
            'star':star,
            'img':img
        }

#路径检测
def make_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

#下载影片海报至result文件夹
def downlord_img(title,img_src):
    make_dir(path)
    match = re.search('(https.*?jpg)',img_src,re.S)
    if match:
        src = match.group(1)
        response = requests.get(src,headers=headers)
        if response.status_code ==200:
            with open(os.path.join(path,title+'.jpg'),'ab') as f:
                f.write(response.content)
                f.close()

#这段代码作者@Charles
#指定文件夹内图片生成图片墙
def makePicturesWall(picdir):
    picslist = os.listdir(picdir)
    num_pics = len(picslist)
    size = 128
    line_numpics = int(math.sqrt(num_pics))
    picwall = Image.new('RGBA', (line_numpics*size, line_numpics*size))
    x = 0
    y = 0
    for pic in picslist:
        img = Image.open(os.path.join(picdir, pic))
        img = img.resize((size, size), Image.ANTIALIAS)
        picwall.paste(img, (x*size, y*size))
        x += 1
        if x == line_numpics:
            x = 0
            y += 1
    print('图片墙制作成功!')
    picwall.save("picwall.png")

def main():
    count = 0
    for i in range(0,10):
        print('准备下载第{}页'.format(str(i+1)))
        url = 'https://maoyan.com/board/4?offset={}'.format(str(i*10))
        html = get_html(url)
        data = parse(html)
        for each in data:
            count+=1
            downlord_img(each['title'],each['img'])
            print('下载{}海报成功'.format(each['title']))
        print('准备睡眠3s')
        time.sleep(3)
    makePicturesWall(path)

if __name__ == '__main__':
    main()






其他想说的

1.新手上路请多指教

2.使用方法用pip3安装相关库后直接run即可


下载的100张海报图片