I've got the following code routine that works great. The only problem is that I need the results to come back in the same order of the links array. For example I need the results of the google.com link to come back first, then yahoo, etc. The code currently returns in a "random" order.
var Nightmare = require('nightmare');
var async = require('async');
var links = [
"http://www.google.com",
"http://www.yahoo.com",
"http://www.bing.com",
"http://www.aol.com",
"http://duckduckgo.com",
"http://www.ask.com"
];
var scrape = function(url, callback) {
var nightmare = new Nightmare();
nightmare.goto(url);
nightmare.wait('body');
nightmare.evaluate(function () {
return document.querySelector('body').innerText;
})
.then(function (result) {
console.log(url, result);
})
nightmare.end(function() {
callback();
});
}
async.map(links, scrape);
UPDATE: Thanks @christophetd. Here is my revised working code:
var Nightmare = require('nightmare');
var async = require('async');
var links = [
"http://www.google.com",
"http://www.yahoo.com",
"http://www.bing.com",
"http://www.aol.com",
"http://duckduckgo.com",
"http://www.ask.com"
];
var scrape = function(url, callback) {
var nightmare = new Nightmare();
nightmare.goto(url);
nightmare.wait('body');
nightmare.evaluate(function () {
return document.querySelector('body').innerText;
})
.then(function (result) {
callback(null, url+result);
});
nightmare.end();
}
async.map(links, scrape, function (err, results) {
if (err) return console.log(err);
console.log(results);
})