add option to get multiple results
Gitnet/scraper/pipeline/head This commit looks good Details

This commit is contained in:
Simon Vieille 2020-11-10 13:30:38 +01:00
parent bdd90b0e09
commit b680003549
Signed by: deblan
GPG Key ID: 03383D15A1D31745
3 changed files with 34 additions and 7 deletions

View File

@ -38,6 +38,8 @@ const options = {
method: 'GET', // Optional, default is `GET` method: 'GET', // Optional, default is `GET`
} }
const isMultiple = false // get the first result, `true` to get an array of results
const selector = '.repository-content .numbers-summary li:nth-child(4) a' const selector = '.repository-content .numbers-summary li:nth-child(4) a'
const filters = { const filters = {
@ -56,7 +58,8 @@ scraper(
}, },
function(error) { function(error) {
console.log(error) console.log(error)
} },
isMultiple
) )
``` ```

View File

@ -34,7 +34,8 @@ Optional parameters
--method [METHOD] HTTP Method --method [METHOD] HTTP Method
--accept-http-error Accepts all status codes (like 404) --accept-http-error Accepts all status codes (like 404)
--verbose, -v Show message of error --verbose, -v Show message of error
--multiple, -m The output must contain all the selector targets
` `
if (input.has('help')) { if (input.has('help')) {
@ -62,6 +63,7 @@ const selector = input.get('selector')
const method = input.has('method') ? input.get('method') : 'GET' const method = input.has('method') ? input.get('method') : 'GET'
const acceptAllStatus = input.has('accept-http-error') const acceptAllStatus = input.has('accept-http-error')
const verbose = input.has('verbose') || input.has('v') const verbose = input.has('verbose') || input.has('v')
const isMultiple = input.has('multiple') || input.has('m')
let filtersToApply = {} let filtersToApply = {}
@ -86,7 +88,13 @@ const options = {
} }
const onSuccess = function(value) { const onSuccess = function(value) {
output.write(value) if (isMultiple && value instanceof Array) {
for (let item of value) {
output.write(item)
}
} else {
output.write(value)
}
} }
const onError = function(error) { const onError = function(error) {
@ -99,4 +107,4 @@ const onError = function(error) {
process.exit(1) process.exit(1)
} }
scraper(options, selector, filtersToApply, onSuccess, onError) scraper(options, selector, filtersToApply, onSuccess, onError, isMultiple)

View File

@ -7,7 +7,7 @@ const filters = {
trim: require('./filter/trim'), trim: require('./filter/trim'),
} }
const scraper = function(options, selector, filtersToApply, callbackSuccess, callbackError) { const scraper = function(options, selector, filtersToApply, callbackSuccess, callbackError, isMultiple) {
filtersToApply = filtersToApply || {} filtersToApply = filtersToApply || {}
rq({ rq({
@ -17,10 +17,26 @@ const scraper = function(options, selector, filtersToApply, callbackSuccess, cal
}) })
.then(function(body) { .then(function(body) {
const $ = cheerio.load(body) const $ = cheerio.load(body)
let value = $(selector).html() let value = []
if (isMultiple) {
let nodes = $(selector)
nodes.each(function(i, node) {
value.push($(node).html())
})
} else {
value = $(selector).html()
}
for (let filter in filtersToApply) { for (let filter in filtersToApply) {
value = filters[filter](value, filtersToApply[filter]) if (isMultiple) {
for (let i in value) {
value[i] = filters[filter](value[i], filtersToApply[filter])
}
} else {
value = filters[filter](value, filtersToApply[filter])
}
} }
if (callbackSuccess) { if (callbackSuccess) {