diff --git a/bogo/accent.py b/bogo/accent.py index 832f0ba..f703e5b 100644 --- a/bogo/accent.py +++ b/bogo/accent.py @@ -3,7 +3,7 @@ # This file is part of ibus-bogo project. # # Copyright (C) 2012 Long T. Dam -# Copyright (C) 2012-2013 Trung Ngo +# Copyright (C) 2012-2014 Trung Ngo # Copyright (C) 2013 Duong H. Nguyen # # ibus-bogo is free software: you can redistribute it and/or modify @@ -21,7 +21,7 @@ # """ -Utility functions to deal with accents (should have been called tones), +Utility functions to deal with accents (also called tones), which are diacritical markings that changes the pitch of a character. E.g. the acute accent in á. """ @@ -31,9 +31,11 @@ from __future__ import unicode_literals from bogo import utils +from bogo.syllable import Syllable class Accent: + MAX_VALUE = 6 GRAVE = 5 ACUTE = 4 HOOK = 3 @@ -62,61 +64,65 @@ def get_accent_string(string): return accents[-1] if accents else Accent.NONE -def add_accent(components, accent): +def add_accent(syllable, accent): """ - Add accent to the given components. The parameter components is - the result of function separate() + Add accent to the given syllable. """ - vowel = components[1] - last_consonant = components[2] + vowel = syllable.vowel + + if not vowel: + return syllable + if accent == Accent.NONE: vowel = remove_accent_string(vowel) - return [components[0], vowel, last_consonant] + return Syllable(syllable.initial_consonant, vowel, syllable.final_consonant) - if vowel == "": - return components - #raw_string is a list, not a str object - raw_string = remove_accent_string(vowel).lower() - new_vowel = "" + vowel_wo_accent = remove_accent_string(vowel).lower() + new_vowel = '' + # Highest priority for ê and ơ - index = max(raw_string.find("ê"), raw_string.find("ơ")) - if index != -1: - new_vowel = vowel[:index] + add_accent_char(vowel[index], accent) + vowel[index+1:] - elif len(vowel) == 1 or (len(vowel) == 2 and last_consonant == ""): - new_vowel = add_accent_char(vowel[0], accent) + vowel[1:] + index = max(vowel_wo_accent.find("ê"), vowel_wo_accent.find("ơ")) + found_e_hat_or_o_horn = index != -1 + + if found_e_hat_or_o_horn: + # Add accent mark to the found ê or ơ + new_vowel = \ + vowel[:index] + \ + add_accent_char(vowel[index], accent) + \ + vowel[index + 1:] + elif len(vowel) == 1 or (len(vowel) == 2 and not syllable.final_consonant): + # cá + # cháo + first_vowel_char = vowel[0] + first_vowel_char_with_accent = add_accent_char(first_vowel_char, accent) + new_vowel = first_vowel_char_with_accent + vowel[1:] else: - new_vowel = vowel[:1] + add_accent_char(vowel[1], accent) + vowel[2:] - return [components[0], new_vowel, components[2]] + # biến + # khuỷu + second_vowel_char = vowel[1] + second_vowel_char_with_accent = add_accent_char(second_vowel_char, accent) + new_vowel = vowel[:1] + second_vowel_char_with_accent + vowel[2:] + return Syllable(syllable.initial_consonant, new_vowel, syllable.final_consonant) + +@utils.keep_case def add_accent_char(char, accent): """ - Add accent to a single char. Parameter accent is member of class - Accent + Add accent to a single char. + + Args: + accent: an Accent enum value """ - if char == "": - return "" - case = char.isupper() - char = char.lower() + if not (char and accent in range(0, Accent.MAX_VALUE + 1)): + return char + index = utils.VOWELS.find(char) if (index != -1): index = index - index % 6 + 5 char = utils.VOWELS[index - accent] - return utils.change_case(char, case) - -def add_accent_at(string, accent, index): - """ - Add mark to the index-th character of the given string. Return - the new string after applying change. - (unused) - """ - if index == -1: - return string - # Python can handle the case which index is out of range of given string - return string[:index] + \ - accent.accent.add_accent_char(string[index], accent) + \ - string[index+1:] + return char def remove_accent_char(char): diff --git a/bogo/syllable.py b/bogo/syllable.py new file mode 100644 index 0000000..b4a152f --- /dev/null +++ b/bogo/syllable.py @@ -0,0 +1,82 @@ +import collections +from bogo import utils + + +class Syllable(collections.namedtuple('Syllable', + ['initial_consonant', 'vowel', 'final_consonant'])): + + @staticmethod + def new_from_string(string): + """\ + Make a Syllable from a string. + + Args: + - string: the string to be parsed + + Returns: + a Syllable + + >>> parse_syllable('tuong') + ('t','uo','ng') + >>> parse_syllable('ohmyfkinggod') + ('ohmyfkingg','o','d') + """ + def atomic_separate(string, last_chars, last_is_vowel): + if string == "" or (last_is_vowel != utils.is_vowel(string[-1])): + return (string, last_chars) + else: + return atomic_separate(string[:-1], + string[-1] + last_chars, last_is_vowel) + + head, last_consonant = atomic_separate(string, "", False) + first_consonant, vowel = atomic_separate(head, "", True) + + if last_consonant and not (vowel + first_consonant): + first_consonant = last_consonant + last_consonant = '' + + # 'gi' and 'qu' are considered qualified consonants. + # We want something like this: + # ['g', 'ia', ''] -> ['gi', 'a', ''] + # ['q', 'ua', ''] -> ['qu', 'a', ''] + if len(vowel) > 1 and \ + (first_consonant + vowel[0]).lower() in ['gi', 'qu']: + first_consonant += vowel[0] + vowel = vowel[1:] + + return Syllable(first_consonant, vowel, last_consonant) + + + def append_char(self, char): + """ + Append a character to `comps` following this rule: a vowel is added + to the vowel part if there is no last consonant, else to the last + consonant part; a consonant is added to the first consonant part + if there is no vowel, and to the last consonant part if the + vowel part is not empty. + + >>> transform(['', '', '']) + ['c', '', ''] + >>> transform(['c', '', ''], '+o') + ['c', 'o', ''] + >>> transform(['c', 'o', ''], '+n') + ['c', 'o', 'n'] + >>> transform(['c', 'o', 'n'], '+o') + ['c', 'o', 'no'] + """ + initial_consonant = self.initial_consonant + vowel = self.vowel + final_consonant = self.final_consonant + + if utils.is_vowel(char): + if not self.final_consonant: + vowel = self.vowel + char + else: + final_consonant = self.final_consonant + char + else: + if not self.final_consonant and not self.vowel: + initial_consonant = self.initial_consonant + char + else: + final_consonant = self.final_consonant + char + + return Syllable(initial_consonant, vowel, final_consonant) diff --git a/bogo/test/test_accent.py b/bogo/test/test_accent.py index 4a574b3..f814a2c 100644 --- a/bogo/test/test_accent.py +++ b/bogo/test/test_accent.py @@ -1,3 +1,86 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals +from nose.tools import eq_ +from bogo.accent import add_accent, add_accent_char, Accent +from bogo.syllable import Syllable + + +class TestAddAccentChar(): + + def test_empty_char(self): + result = add_accent_char('', Accent.GRAVE) + expected = '' + eq_(result, expected) + + def test_out_of_range_accent(self): + result = add_accent_char('a', 293432) + expected = 'a' + eq_(result, expected) + + def test_normal_accent(self): + result = add_accent_char('a', Accent.ACUTE) + expected = 'á' + eq_(result, expected) + + def test_upper_case(self): + eq_(add_accent_char('A', Accent.ACUTE), 'Á') + + +class TestAddAccent(): + def test_remove_accent(self): + s = Syllable('c', 'á', 'c') + + result = add_accent(s, Accent.NONE) + expected = Syllable('c', 'a', 'c') + + eq_(result, expected) + + def test_e_hat(self): + s = Syllable('ch', 'uyê', 'n') + + result = add_accent(s, Accent.HOOK) + expected = Syllable('ch', 'uyể', 'n') + + eq_(result, expected) + + def test_o_horn(self): + s = Syllable('ch', 'ươ', 'ng') + + result = add_accent(s, Accent.HOOK) + expected = Syllable('ch', 'ưở', 'ng') + + eq_(result, expected) + + def test_double_vowel_no_final_consonant(self): + s = Syllable('c', 'ua', '') + + result = add_accent(s, Accent.HOOK) + expected = Syllable('c', 'ủa', '') + + eq_(result, expected) + + def test_double_vowel_with_final_consonant(self): + s = Syllable('c', 'uô', 'ng') + + result = add_accent(s, Accent.GRAVE) + expected = Syllable('c', 'uồ', 'ng') + + eq_(result, expected) + + def test_single_vowel(self): + s = Syllable('c', 'a', '') + + result = add_accent(s, Accent.ACUTE) + expected = Syllable('c', 'á', '') + + eq_(result, expected) + + s = Syllable('c', 'a', 'n') + + result = add_accent(s, Accent.ACUTE) + expected = Syllable('c', 'á', 'n') + + eq_(result, expected) + + \ No newline at end of file diff --git a/bogo/test/test_syllable.py b/bogo/test/test_syllable.py new file mode 100644 index 0000000..15821c6 --- /dev/null +++ b/bogo/test/test_syllable.py @@ -0,0 +1,70 @@ +from nose.tools import eq_ +from bogo.syllable import Syllable + + +class TestSyllable(): + + def test_parse_simple_syllable(self): + parsed = Syllable.new_from_string('tuong') + + expected = Syllable('t', 'uo', 'ng') + eq_(parsed, expected) + + def test_parse_qua(self): + parsed = Syllable.new_from_string('qua') + + expected = Syllable('qu', 'a', '') + eq_(parsed, expected) + + def test_parse_gia(self): + parsed = Syllable.new_from_string('gia') + + expected = Syllable('gi', 'a', '') + eq_(parsed, expected) + + def test_parse_gi(self): + parsed = Syllable.new_from_string('gi') + + expected = Syllable('g', 'i', '') + eq_(parsed, expected) + + def test_parse_rubbish(self): + parsed = Syllable.new_from_string('ohmyfkinggod') + + expected = Syllable('ohmyfkingg', 'o', 'd') + eq_(parsed, expected) + + def test_append_initial_consonant(self): + s = Syllable('c', '', '') + s = s.append_char('c') + + expected = Syllable('cc', '', '') + eq_(s, expected) + + def test_append_initial_consonant_empty(self): + s = Syllable('', '', '') + s = s.append_char('c') + + expected = Syllable('c', '', '') + eq_(s, expected) + + def test_append_vowel(self): + s = Syllable('c', 'a', '') + s = s.append_char('a') + + expected = Syllable('c', 'aa', '') + eq_(s, expected) + + def test_append_vowel_empty(self): + s = Syllable('', '', '') + s = s.append_char('a') + + expected = Syllable('', 'a', '') + eq_(s, expected) + + def test_append_final_consonant(self): + s = Syllable('c', 'a', 'c') + s = s.append_char('c') + + expected = Syllable('c', 'a', 'cc') + eq_(s, expected) \ No newline at end of file diff --git a/bogo/test/test_utils.py b/bogo/test/test_utils.py index a96f6b6..ba5d76d 100644 --- a/bogo/test/test_utils.py +++ b/bogo/test/test_utils.py @@ -47,3 +47,62 @@ def test_separate(): eq_(separate('xẻng'), ['x', 'ẻ', 'ng']) eq_(separate('xoáy'), ['x', 'oáy', '']) eq_(separate('quây'), ['qu', 'ây', '']) + + +class TestKeepCase(): + + def test_keep_lower(self): + + @keep_case + def function(string): + return string.upper() + + eq_(function("abc"), "abc") + + def test_keep_title(self): + + @keep_case + def function(string): + return string.upper() + + eq_(function("Abc"), "Abc") + + def test_keep_upper(self): + + @keep_case + def function(string): + return string.title() + + eq_(function("ABC"), "ABC") + + def test_multiple_arguments(self): + + @keep_case + def function(string, arg1, arg2, kwarg1=True): + return "{} {} {} {}".format(string, arg1, arg2, kwarg1) + + result = function("abc", 1, 2, 3) + expected = "abc 1 2 3" + + eq_(result, expected) + + def test_normalize_case(self): + """ + Test that the string argument is always normalized to lower case. + """ + inner = [0] + + @keep_case + def function(string): + inner[0] = string + return string + + function("ABC") + eq_(inner[0], "abc") + + def test_unrecognized_case(self): + @keep_case + def function(string): + return string + + eq_(function("aBcD"), "abcd") diff --git a/bogo/utils.py b/bogo/utils.py index f3b84cb..d70e661 100644 --- a/bogo/utils.py +++ b/bogo/utils.py @@ -129,3 +129,29 @@ def atomic_separate(string, last_chars, last_is_vowel): comps[1] = comps[1][1:] return comps + + +def keep_case(function): + """ + Decorator to ensure that the letter case of the input and + output of a function stays the same. + + This function assumes that the decorated function takes + a string as the first argument and returns a modified + version of it. Also, the string argument will be normalized + to lower case before being passed to the decorated function. + """ + + def inner(string, *args, **kwargs): + restore_case = { + True: str.__str__, # fallback if the string is empty + string.isupper(): str.upper, + string.islower(): str.lower, + string.istitle(): str.title + }[True] + + modified_string = function(string.lower(), *args, **kwargs) + + return restore_case(modified_string) + + return inner