forked from dariusk/pos-js
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPOSTagger.js
68 lines (59 loc) · 2.15 KB
/
POSTagger.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
/*!
* jsPOS
*
* Copyright 2010, Percy Wegmann
* Licensed under the LGPLv3 license
* http://www.opensource.org/licenses/lgpl-3.0.html
*/
var TransformationRules = require("./BrillTransformationRules");
var transformationRules = new TransformationRules();
module.exports = POSTagger;
function POSTagger(lexicon) {
this.lexicon = lexicon;
}
POSTagger.prototype.wordInLexicon = function (word) {
var ss = this.lexicon[word];
if (ss != null) return true;
// 1/22/2002 mod (from Lisp code): if not in hash, try lower case:
if (!ss) ss = this.lexicon[word.toLowerCase()];
if (ss) return true;
return false;
};
POSTagger.prototype.tag = function (words) {
var taggedSentence = new Array(words.length);
// Initialise taggedSentence with words and initial categories
for (var i = 0, size = words.length; i < size; i++) {
taggedSentence[i] = new Array(2);
taggedSentence[i][0] = words[i];
// lexicon maps a word to an array of possible categories
var ss = this.lexicon[words[i]];
// 1/22/2002 mod (from Lisp code): if not in hash, try lower case:
if (!ss) ss = this.lexicon[words[i].toLowerCase()];
if (!ss && words[i].length === 1) taggedSentence[i][1] = words[i] + "^";
// We need to catch scenarios where we pass things on the prototype
// that aren't in the lexicon: "constructor" breaks this otherwise
if (!ss || Object.prototype.toString.call(ss) !== "[object Array]")
taggedSentence[i][1] = "NN";
else taggedSentence[i][1] = ss[0];
}
// Apply transformation rules
taggedSentence.forEach(function (taggedWord, index) {
transformationRules.getRules().forEach(function (rule) {
rule(taggedSentence, index);
});
});
return taggedSentence;
};
POSTagger.prototype.prettyPrint = function (taggedWords) {
for (i in taggedWords) {
print(taggedWords[i][0] + "(" + taggedWords[i][1] + ")");
}
};
POSTagger.prototype.extendLexicon = function (lexicon) {
for (var word in lexicon) {
if (!this.lexicon.hasOwnProperty(word)) {
this.lexicon[word] = lexicon[word];
}
}
};
// console.log(new POSTagger().tag(["i", "went", "to", "the", "store", "to", "buy", "5.2", "gallons", "of", "milk"]));