2020-04-11 22:18:23 +02:00
|
|
|
const rq = require('request-promise')
|
|
|
|
const cheerio = require('cheerio')
|
|
|
|
const filters = {
|
|
|
|
breaks: require('./filter/breaks'),
|
|
|
|
spaces: require('./filter/spaces'),
|
|
|
|
tags: require('./filter/tags'),
|
|
|
|
trim: require('./filter/trim'),
|
|
|
|
}
|
|
|
|
|
2020-11-10 13:30:38 +01:00
|
|
|
const scraper = function(options, selector, filtersToApply, callbackSuccess, callbackError, isMultiple) {
|
2020-04-14 18:16:19 +02:00
|
|
|
filtersToApply = filtersToApply || {}
|
2020-04-11 22:18:23 +02:00
|
|
|
|
2020-04-14 18:16:19 +02:00
|
|
|
rq({
|
|
|
|
uri: options.url,
|
|
|
|
method: options.method || 'GET',
|
|
|
|
simple: !(options.acceptAllStatus || false)
|
2020-04-11 22:18:23 +02:00
|
|
|
})
|
2020-04-14 18:16:19 +02:00
|
|
|
.then(function(body) {
|
|
|
|
const $ = cheerio.load(body)
|
2020-11-10 13:30:38 +01:00
|
|
|
let value = []
|
|
|
|
|
|
|
|
if (isMultiple) {
|
|
|
|
let nodes = $(selector)
|
|
|
|
|
|
|
|
nodes.each(function(i, node) {
|
|
|
|
value.push($(node).html())
|
|
|
|
})
|
|
|
|
} else {
|
|
|
|
value = $(selector).html()
|
|
|
|
}
|
2020-04-14 18:16:19 +02:00
|
|
|
|
|
|
|
for (let filter in filtersToApply) {
|
2020-11-10 13:30:38 +01:00
|
|
|
if (isMultiple) {
|
|
|
|
for (let i in value) {
|
|
|
|
value[i] = filters[filter](value[i], filtersToApply[filter])
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
value = filters[filter](value, filtersToApply[filter])
|
|
|
|
}
|
2020-04-14 18:16:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (callbackSuccess) {
|
|
|
|
callbackSuccess(value)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.catch(function(error) {
|
|
|
|
if (callbackError) {
|
|
|
|
callbackError(error)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
2020-04-11 22:18:23 +02:00
|
|
|
|
2020-04-14 18:16:19 +02:00
|
|
|
module.exports = scraper
|