diff --git a/lib/extract.js b/lib/extract.js index 60e1f60..d2c3d62 100644 --- a/lib/extract.js +++ b/lib/extract.js @@ -3,6 +3,8 @@ var cheerio = require('cheerio'); var Po = require('pofile'); var esprima = require('esprima'); +var langDetect = require('language-detect'); +var langMap = require('language-map'); var _ = require('lodash'); var escapeRegex = /[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g; @@ -58,15 +60,7 @@ var Extractor = (function () { markerName: 'gettext', markerNames: [], lineNumbers: true, - extensions: { - htm: 'html', - html: 'html', - php: 'html', - phtml: 'html', - tml: 'html', - erb: 'html', - js: 'js' - }, + extensions: {}, postProcess: function (po) {} }, options); this.options.markerNames.unshift(this.options.markerName); @@ -283,13 +277,26 @@ var Extractor = (function () { }; Extractor.prototype.parse = function (filename, content) { + // check list of supported extensions var extension = filename.split('.').pop(); - if (this.isSupportedByStrategy('html', extension)) { - this.extractHtml(filename, content); + return this.extractHtml(filename, content); + } else if (this.isSupportedByStrategy('js', extension)) { + return this.extractJs(filename, content); } - if (this.isSupportedByStrategy('js', extension)) { + + // better language detection when all else fails + var lang = langDetect.filename(filename) || langDetect.classify(content); + if (lang === 'JavaScript') { this.extractJs(filename, content); + } else { + var langAttrs = langMap[lang]; + if (langAttrs.type === 'markup' || // covers HTML, most templating langs, and XML + lang === 'PHP' || + extension.indexOf('html') !== -1) /* .cshtml */ { + + this.extractHtml(filename, content); + } } }; diff --git a/package.json b/package.json index b3858c4..46ac9a8 100644 --- a/package.json +++ b/package.json @@ -45,6 +45,8 @@ "dependencies": { "cheerio": "~0.18.0", "esprima": "~1.1.1", + "language-detect": "^1.0.0", + "language-map": "^1.0.0", "lodash": "~2.4.1", "pofile": "~0.2.8" }