|
|
@ -0,0 +1,108 @@ |
|
|
|
const Output = require('./console/output') |
|
|
|
const Input = require('./console/input') |
|
|
|
const rq = require('request-promise') |
|
|
|
const cheerio = require('cheerio') |
|
|
|
|
|
|
|
const filters = { |
|
|
|
breaks: require('./filter/breaks'), |
|
|
|
spaces: require('./filter/spaces'), |
|
|
|
tags: require('./filter/tags'), |
|
|
|
trim: require('./filter/trim'), |
|
|
|
} |
|
|
|
|
|
|
|
const output = new Output() |
|
|
|
const input = new Input(process) |
|
|
|
|
|
|
|
const usage = `
|
|
|
|
${input.node} ${input.script} [params] |
|
|
|
|
|
|
|
Parameters |
|
|
|
|
|
|
|
--url [URL] URL to scrap |
|
|
|
--selector [Selector] CSS selector |
|
|
|
|
|
|
|
Optional parameters |
|
|
|
|
|
|
|
Filters |
|
|
|
|
|
|
|
Order has a meaning. |
|
|
|
|
|
|
|
--breaks Removes breaks (\\n, \\r) |
|
|
|
--trim Strips whitespaces from the beginning and end of the value |
|
|
|
--spaces Replaces 2 successive spaces by 1, except breaks |
|
|
|
--tags, --tags [TAGS] Removes tags. You can specify the tags to remove (separated by comma) |
|
|
|
|
|
|
|
HTTP |
|
|
|
|
|
|
|
--method [METHOD] HTTP Method |
|
|
|
--accept-http-error Accepts all status code (like 404) |
|
|
|
|
|
|
|
--verbose, -v Show message of error |
|
|
|
`
|
|
|
|
|
|
|
|
if (input.has('help')) { |
|
|
|
output.write(usage) |
|
|
|
|
|
|
|
process.exit(0) |
|
|
|
} |
|
|
|
|
|
|
|
if (!input.has('url')) { |
|
|
|
output.write('You must specify --url') |
|
|
|
output.write(usage) |
|
|
|
|
|
|
|
process.exit(1) |
|
|
|
} |
|
|
|
|
|
|
|
if (!input.has('selector')) { |
|
|
|
output.write('You must specify --selector') |
|
|
|
output.write(usage) |
|
|
|
|
|
|
|
process.exit(1) |
|
|
|
} |
|
|
|
|
|
|
|
const url = input.get('url') |
|
|
|
const selector = input.get('selector') |
|
|
|
const method = input.has('method') ? input.get('method') : 'GET' |
|
|
|
const acceptAllStatus = input.has('accept-http-error') |
|
|
|
const verbose = input.has('verbose') || input.has('v') |
|
|
|
|
|
|
|
let filtersToApply = {} |
|
|
|
|
|
|
|
for (let param in input.args) { |
|
|
|
if (filters.hasOwnProperty(param)) { |
|
|
|
let value = input.args[param] |
|
|
|
|
|
|
|
if (value !== true) { |
|
|
|
value = value.split(',') |
|
|
|
} else { |
|
|
|
value = null |
|
|
|
} |
|
|
|
|
|
|
|
filtersToApply[param] = value |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
rq({ |
|
|
|
uri: url, |
|
|
|
method: method, |
|
|
|
simple: !acceptAllStatus |
|
|
|
}) |
|
|
|
.then(function(body) { |
|
|
|
const $ = cheerio.load(body) |
|
|
|
let value = $(selector).html() |
|
|
|
|
|
|
|
for (let filter in filtersToApply) { |
|
|
|
value = filters[filter](value, filtersToApply[filter]) |
|
|
|
} |
|
|
|
|
|
|
|
output.write(value) |
|
|
|
}) |
|
|
|
.catch(function(error) { |
|
|
|
output.write(error.name, 'error') |
|
|
|
|
|
|
|
if (verbose) { |
|
|
|
output.write(error.message, 'error') |
|
|
|
} |
|
|
|
|
|
|
|
process.exit(1) |
|
|
|
}) |