Quantcast
Channel: CNode:Node.js专业中文社区
Viewing all articles
Browse latest Browse all 14821

使用koa写一个漫画下载的爬虫

$
0
0

项目说明

  • 使用koa2.xnodejs>=7.6,
  • 使用async await解决异步,
  • 使用request-promise配合async await解决请求异步
  • 使用cheerio处理选择img标签

核心代码,spider.js

const fs = require('fs');
const request = require("request-promise");
const cheerio = require("cheerio");
const mkdirp = require('mkdirp');
const config = require('../config');

exports.download = async function(ctx, next) {
    const dir = 'images';
    // 图片链接地址
    let links = [];
    // 创建目录
    mkdirp(dir);
    var urls = [];
    let tasks = [];
    let downloadTask = [];
    let url = config.url;
    for (var i = 1; i <= config.size; i++) {
        let link = url + '_' + i + '.html';
        if (i == 1) {
            link = url + '.html';
        }
        tasks.push(getResLink(i, link))
    }
    links = await Promise.all(tasks)
    console.log('links==========', links.length);

    for (var i = 0; i < links.length; i++) {
        let item = links[i];
        let index = item.split('___')[0];
        let src = item.split('___')[1];
        downloadTask.push(downloadImg(src, dir, index + links[i].substr(-4, 4)));
    }
    await Promise.all(downloadTask);
}
async function downloadImg(url, dir, filename) {
    console.log('download begin---', url);
    request.get(url).pipe(fs.createWriteStream(dir + "/" + filename)).on('close', function() {
        console.log('download success', url);
    });
}
async function getResLink(index, url) {
    const body = await request(url);
    let urls = [];
    var $ = cheerio.load(body);
    $(config.rule).each(function() {
        var src = $(this).attr('src');
        urls.push(src);
    });
    return index + '___' + urls[0];
}

代码地址,有需要可以看看

koa-spider代码地址

代码运行效果

GQ)PW}B)7UYH6LZ_SFE~AG7.png

下载美女图片的效果

test.png


Viewing all articles
Browse latest Browse all 14821

Trending Articles