scraper/src/index.js

54 lines
1.5 KiB
JavaScript

const rq = require('request-promise')
const cheerio = require('cheerio')
const filters = {
breaks: require('./filter/breaks'),
spaces: require('./filter/spaces'),
tags: require('./filter/tags'),
trim: require('./filter/trim'),
}
const scraper = function(options, selector, filtersToApply, callbackSuccess, callbackError, isMultiple) {
filtersToApply = filtersToApply || {}
rq({
uri: options.url,
method: options.method || 'GET',
simple: !(options.acceptAllStatus || false)
})
.then(function(body) {
const $ = cheerio.load(body)
let value = []
if (isMultiple) {
let nodes = $(selector)
nodes.each(function(i, node) {
value.push($(node).html())
})
} else {
value = $(selector).html()
}
for (let filter in filtersToApply) {
if (isMultiple) {
for (let i in value) {
value[i] = filters[filter](value[i], filtersToApply[filter])
}
} else {
value = filters[filter](value, filtersToApply[filter])
}
}
if (callbackSuccess) {
callbackSuccess(value)
}
})
.catch(function(error) {
if (callbackError) {
callbackError(error)
}
})
}
module.exports = scraper