用node爬取之后,写入文件不是按顺序的 问题::怎么保证同步呢 var cheerio = require(“cheerio”); var fs = require(‘fs’); var superagent = require(“superagent”); const charset = require(‘superagent-charset’); charset(superagent);
superagent.get(‘http://www.kanunu8.com/files/chinese/201103/1976.html’) .charset(‘gbk’) .end(function(err, sres) { if (err) { return next(err); } var $ = cheerio.load(sres.text); var items = []; $(“a”).each(function(idx, element) { var $element = $(element); items.push($element.attr(“href”)); })
cutItem(items);
for (var j = 0; j < items.length; j++) {
if (items[j] != "" && items[j] != undefined) {
//nodejs中setTimeout也是异步的 setTimeout并不能保证按顺序爬取
//setTimeout(reqxydh, 10,'http://www.kanunu8.com/files/chinese/201103/'+items[j]);
reqxydh('http://www.kanunu8.com/files/chinese/201103/' + items[j]);
}
}
});
function cutItem(items) { for (var i = 0; i < items.length; i++) { items[i] += ‘’; if (typeof items[i] == undefined || items[i].indexOf(‘1976’) == -1) { delete items[i]; } } return items; } function reqxydh(url) { superagent.get(url) .charset(‘gbk’) .end(function(err, sres) { var $ = cheerio.load(sres.text); fs.appendFileSync(‘xydh.txt’, $(“h2”).text() + $(“p”).text(), { flag: ‘a’ }); }); }