2020-04-14 18:16:19 +02:00
|
|
|
|
const Output = require('./console/output')
|
|
|
|
|
const Input = require('./console/input')
|
|
|
|
|
const scraper = require('./index')
|
|
|
|
|
const output = new Output()
|
|
|
|
|
const input = new Input(process)
|
|
|
|
|
const filters = [
|
|
|
|
|
'breaks',
|
|
|
|
|
'spaces',
|
|
|
|
|
'tags',
|
|
|
|
|
'trim',
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
const usage = `
|
|
|
|
|
${input.node} ${input.script} [params]
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
|
|
|
|
|
--url [URL] URL to scrap
|
|
|
|
|
--selector [Selector] CSS selector
|
|
|
|
|
|
|
|
|
|
Optional parameters
|
|
|
|
|
|
|
|
|
|
Filters
|
|
|
|
|
|
|
|
|
|
Order has a meaning.
|
|
|
|
|
|
|
|
|
|
--breaks Removes breaks (\\n, \\r)
|
|
|
|
|
--trim Strips whitespaces from the beginning and end of the value
|
|
|
|
|
--spaces Replaces 2 successive spaces by 1, except breaks
|
|
|
|
|
--tags, --tags [TAGS] Removes tags. You can specify the tags to remove (separated by comma)
|
|
|
|
|
|
|
|
|
|
HTTP
|
|
|
|
|
|
|
|
|
|
--method [METHOD] HTTP Method
|
|
|
|
|
--accept-http-error Accepts all status codes (like 404)
|
|
|
|
|
|
2020-11-10 13:30:38 +01:00
|
|
|
|
--verbose, -v Show message of error
|
|
|
|
|
--multiple, -m The output must contain all the selector targets
|
2020-04-14 18:16:19 +02:00
|
|
|
|
`
|
|
|
|
|
|
|
|
|
|
if (input.has('help')) {
|
|
|
|
|
output.write(usage)
|
|
|
|
|
|
|
|
|
|
process.exit(0)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!input.has('url')) {
|
|
|
|
|
output.write('You must specify --url')
|
|
|
|
|
output.write(usage)
|
|
|
|
|
|
|
|
|
|
process.exit(1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!input.has('selector')) {
|
|
|
|
|
output.write('You must specify --selector')
|
|
|
|
|
output.write(usage)
|
|
|
|
|
|
|
|
|
|
process.exit(1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const url = input.get('url')
|
|
|
|
|
const selector = input.get('selector')
|
|
|
|
|
const method = input.has('method') ? input.get('method') : 'GET'
|
|
|
|
|
const acceptAllStatus = input.has('accept-http-error')
|
|
|
|
|
const verbose = input.has('verbose') || input.has('v')
|
2020-11-10 13:30:38 +01:00
|
|
|
|
const isMultiple = input.has('multiple') || input.has('m')
|
2020-04-14 18:16:19 +02:00
|
|
|
|
|
|
|
|
|
let filtersToApply = {}
|
|
|
|
|
|
|
|
|
|
for (let param in input.args) {
|
|
|
|
|
if (filters.includes(param)) {
|
|
|
|
|
let value = input.args[param]
|
|
|
|
|
|
|
|
|
|
if (value !== true) {
|
|
|
|
|
value = value.split(',')
|
|
|
|
|
} else {
|
|
|
|
|
value = null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
filtersToApply[param] = value
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const options = {
|
|
|
|
|
url: url,
|
|
|
|
|
method: method,
|
|
|
|
|
acceptAllStatus: acceptAllStatus,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const onSuccess = function(value) {
|
2020-11-10 13:30:38 +01:00
|
|
|
|
if (isMultiple && value instanceof Array) {
|
|
|
|
|
for (let item of value) {
|
|
|
|
|
output.write(item)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
output.write(value)
|
|
|
|
|
}
|
2020-04-14 18:16:19 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const onError = function(error) {
|
|
|
|
|
output.write(error.name, 'error')
|
|
|
|
|
|
|
|
|
|
if (verbose) {
|
|
|
|
|
output.write(error.message, 'error')
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
process.exit(1)
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-10 13:30:38 +01:00
|
|
|
|
scraper(options, selector, filtersToApply, onSuccess, onError, isMultiple)
|