scraper/src/cli.js

103 lines
2.2 KiB
JavaScript
Raw Normal View History

const Output = require('./console/output')
const Input = require('./console/input')
const scraper = require('./index')
const output = new Output()
const input = new Input(process)
const filters = [
'breaks',
'spaces',
'tags',
'trim',
]
const usage = `
${input.node} ${input.script} [params]
Parameters
--url [URL] URL to scrap
--selector [Selector] CSS selector
Optional parameters
Filters
Order has a meaning.
--breaks Removes breaks (\\n, \\r)
--trim Strips whitespaces from the beginning and end of the value
--spaces Replaces 2 successive spaces by 1, except breaks
--tags, --tags [TAGS] Removes tags. You can specify the tags to remove (separated by comma)
HTTP
--method [METHOD] HTTP Method
--accept-http-error Accepts all status codes (like 404)
--verbose, -v Show message of error
`
if (input.has('help')) {
output.write(usage)
process.exit(0)
}
if (!input.has('url')) {
output.write('You must specify --url')
output.write(usage)
process.exit(1)
}
if (!input.has('selector')) {
output.write('You must specify --selector')
output.write(usage)
process.exit(1)
}
const url = input.get('url')
const selector = input.get('selector')
const method = input.has('method') ? input.get('method') : 'GET'
const acceptAllStatus = input.has('accept-http-error')
const verbose = input.has('verbose') || input.has('v')
let filtersToApply = {}
for (let param in input.args) {
if (filters.includes(param)) {
let value = input.args[param]
if (value !== true) {
value = value.split(',')
} else {
value = null
}
filtersToApply[param] = value
}
}
const options = {
url: url,
method: method,
acceptAllStatus: acceptAllStatus,
}
const onSuccess = function(value) {
output.write(value)
}
const onError = function(error) {
output.write(error.name, 'error')
if (verbose) {
output.write(error.message, 'error')
}
process.exit(1)
}
scraper(options, selector, filtersToApply, onSuccess, onError)