diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..07e6e47 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/node_modules diff --git a/src/console/input.js b/src/console/input.js new file mode 100644 index 0000000..0774fab --- /dev/null +++ b/src/console/input.js @@ -0,0 +1,58 @@ +"use strict"; + +var Minimist = require('minimist'); + +class Input { + /** + * Constructor. + * + * @param object process + */ + constructor(process) { + this.args = Minimist(process.argv.slice(2)); + this.node = process.argv[0]; + this.script = process.argv[1]; + } + + /** + * Return the value of the given name argument. + * + * @param string name + * @param mixed default + * + * @return mixed + */ + get(name, defaultValue) { + if (this.has(name)) { + return this.args[name]; + } + + if (defaultValue !== undefined) { + return defaultValue; + } + + return null; + } + + /** + * Check the given argument name exists. + * + * @param string name + * + * @return boolean + */ + has(name) { + return this.args.hasOwnProperty(name); + } + + /** + * Return if args is empty. + * + * @return boolean + */ + empty() { + return Object.keys(this.args).length === 1; + } +} + +module.exports = Input diff --git a/src/console/output.js b/src/console/output.js new file mode 100644 index 0000000..20e94ea --- /dev/null +++ b/src/console/output.js @@ -0,0 +1,37 @@ +"use strict"; + +class Output { + /** + * Convert and print data to json. + * + * @param mixed data + */ + json(data, pretty) { + data = JSON.stringify( + data, + function(key, value) { + if (value === undefined) { + return null; + } + + return value; + }, + pretty ? 2 : null + ); + + return this.write(data); + } + + /** + * Print data. + * + * @param mixed data + */ + write(data, level) { + level = level || 'log' + + console[level](data) + } +} + +module.exports = Output diff --git a/src/filter/breaks.js b/src/filter/breaks.js new file mode 100644 index 0000000..48747d0 --- /dev/null +++ b/src/filter/breaks.js @@ -0,0 +1,5 @@ +const filter = function(value) { + return value.replace(/(\n|\r)/g, '') +} + +module.exports = filter diff --git a/src/filter/spaces.js b/src/filter/spaces.js new file mode 100644 index 0000000..dae7028 --- /dev/null +++ b/src/filter/spaces.js @@ -0,0 +1,5 @@ +const filter = function(value) { + return value.replace(/\s{2,}/g, ' ') +} + +module.exports = filter diff --git a/src/filter/tags.js b/src/filter/tags.js new file mode 100644 index 0000000..c303a04 --- /dev/null +++ b/src/filter/tags.js @@ -0,0 +1,7 @@ +const striptags = require('striptags') + +const filter = function(value, tags) { + return striptags(value, tags) +} + +module.exports = filter diff --git a/src/filter/trim.js b/src/filter/trim.js new file mode 100644 index 0000000..da9194c --- /dev/null +++ b/src/filter/trim.js @@ -0,0 +1,7 @@ +const trim = require('trim') + +const filter = function(value) { + return trim(value) +} + +module.exports = filter diff --git a/src/index.js b/src/index.js new file mode 100644 index 0000000..8d9a47b --- /dev/null +++ b/src/index.js @@ -0,0 +1,108 @@ +const Output = require('./console/output') +const Input = require('./console/input') +const rq = require('request-promise') +const cheerio = require('cheerio') + +const filters = { + breaks: require('./filter/breaks'), + spaces: require('./filter/spaces'), + tags: require('./filter/tags'), + trim: require('./filter/trim'), +} + +const output = new Output() +const input = new Input(process) + +const usage = ` +${input.node} ${input.script} [params] + +Parameters + + --url [URL] URL to scrap + --selector [Selector] CSS selector + +Optional parameters + + Filters + + Order has a meaning. + + --breaks Removes breaks (\\n, \\r) + --trim Strips whitespaces from the beginning and end of the value + --spaces Replaces 2 successive spaces by 1, except breaks + --tags, --tags [TAGS] Removes tags. You can specify the tags to remove (separated by comma) + + HTTP + + --method [METHOD] HTTP Method + --accept-http-error Accepts all status code (like 404) + + --verbose, -v Show message of error +` + +if (input.has('help')) { + output.write(usage) + + process.exit(0) +} + +if (!input.has('url')) { + output.write('You must specify --url') + output.write(usage) + + process.exit(1) +} + +if (!input.has('selector')) { + output.write('You must specify --selector') + output.write(usage) + + process.exit(1) +} + +const url = input.get('url') +const selector = input.get('selector') +const method = input.has('method') ? input.get('method') : 'GET' +const acceptAllStatus = input.has('accept-http-error') +const verbose = input.has('verbose') || input.has('v') + +let filtersToApply = {} + +for (let param in input.args) { + if (filters.hasOwnProperty(param)) { + let value = input.args[param] + + if (value !== true) { + value = value.split(',') + } else { + value = null + } + + filtersToApply[param] = value + } +} + +rq({ + uri: url, + method: method, + simple: !acceptAllStatus +}) + .then(function(body) { + const $ = cheerio.load(body) + let value = $(selector).html() + + for (let filter in filtersToApply) { + value = filters[filter](value, filtersToApply[filter]) + } + + output.write(value) + }) + .catch(function(error) { + output.write(error.name, 'error') + + if (verbose) { + output.write(error.message, 'error') + } + + process.exit(1) + })