< Back to Blog Overview

Top 5 JavaScript Web Scraping libraries

2020-06-02
Top 5 JavaScript Web Scraping libraries

List of Web Scraping Libraries we will go through

Request-Promise-Native

const request = require(‘request-promise-native’);let scrape = async() => {
 var respo = await request(‘http://books.toscrape.com/')
 return respo;
}scrape().then((value) => {
 console.log(value); // HTML code of the website
});

Unirest

var unirest = require('unirest');let scrape = async() => {
 var respo = await unirest.get(‘http://books.toscrape.com/')
 return respo.body;
}scrape().then((value) => {
 console.log(value); // Success!
});
var unirest = require(‘unirest’);let scrape = async() => {
 var respo = await unirest.post(‘http://httpbin.org/anything').headers({'X-header': ‘123’})
 return respo.body;
}scrape().then((value) => {
 console.log(value); // Success!
});
{
 args: {},
 data: ‘’,
 files: {},
 form: {},
 headers: {
 ‘Content-Length’: ‘0’,
 Host: ‘httpbin.org’,
 ‘X-Amzn-Trace-Id’: ‘Root=1–5ed62f2e-554cdc40bbc0b226c749b072’,
 ‘X-Header’: ‘123’
 },
 json: null,
 method: ‘POST’,
 origin: ‘23.238.134.113’,
 url: ‘http://httpbin.org/anything'
}
var unirest = require(‘unirest’);let scrape = async() => {
 var respo = await unirest.put(‘http://httpbin.org/anything').headers({'X-header': ‘123’})
 return respo.body;
}scrape().then((value) => {
 console.log(value); // Success!
});
{
 args: {},
 data: ‘’,
 files: {},
 form: {},
 headers: {
 ‘Content-Length’: ‘0’,
 Host: ‘httpbin.org’,
 ‘X-Amzn-Trace-Id’: ‘Root=1–5ed62f91-bb2b684e39bbfbb3f36d4b6e’,
 ‘X-Header’: ‘123’
 },
 json: null,
 method: ‘PUT’,
 origin: ‘23.63.69.65’,
 url: ‘http://httpbin.org/anything'
}

Cheerio

const cheerio = require(‘cheerio’)let scrape = async() => {
 var respo = await request(‘http://books.toscrape.com/')
 return respo;
}scrape().then((value) => {const $ = cheerio.load(value)
 var numberofbooks = $(‘ol[class=”row”]’).find(‘li’).length
 console.log(numberofbooks); // 20!
});

Puppeteer

let scrape = async () => {
 const browser = await puppeteer.launch({headless: true}); 
 const page = await browser.newPage(); await page.goto(‘http://books.toscrape.com/'); await page.waitFor(1000); var result = await page.content(); browser.close();
 return result;
};scrape().then((value) => {
 console.log(value); // complete HTML code of the target url!
});

Osmosis

osmosis(‘https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_population').set({ heading: ‘h1’, title: ‘title’}).data(item => console.log(item));
{ heading: ‘List of U.S. states and territories by population’, title: ‘List of U.S. states and territories by population — Wikipedia’ }

Conclusion

Additional Resources

Scrapingdog Logo

Try Scrapingdog for Free!

Free 1000 API calls of testing.

No credit card required!