hunkim · Meowdoleon · Jan 11, 2017 · Jan 11, 2017 · Jan 11, 2017 · Jan 12, 2017
diff --git a/.travis.yml b/.travis.yml
@@ -9,11 +9,11 @@ python:
 install:
   # install TensorFlow from https://storage.googleapis.com/tensorflow/
   - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
-      pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl;
+      pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.1.0rc0-cp27-none-linux_x86_64.whl;
     elif [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then
-      pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl;
+      pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.1.0rc0-cp34-cp34m-linux_x86_64.whl;
     elif [[ "$TRAVIS_PYTHON_VERSION" == "3.5" ]]; then
-      pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp35-cp35m-linux_x86_64.whl;
+      pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.1.0rc0-cp35-cp35m-linux_x86_64.whl;
     fi
 
 # command to run tests

diff --git a/README.md b/README.md
@@ -6,22 +6,29 @@ Multi-layer Recurrent Neural Networks (LSTM, RNN) for word-level language models
 Mostly reused code from https://github.com/sherjilozair/char-rnn-tensorflow which was inspired from Andrej Karpathy's [char-rnn](https://github.com/karpathy/char-rnn).
 
 # Requirements
-- [Tensorflow](http://www.tensorflow.org)
+- [Tensorflow 1.1.0rc0](http://www.tensorflow.org)
 
 # Basic Usage
 To train with default parameters on the tinyshakespeare corpus, run:
 ```bash
 python train.py
 ```
 
-To sample from a trained model"
+To sample from a trained model
 ```bash
 python sample.py
 ```
 
+To pick using beam search, use the `--pick` parameter. Beam search can be
+further customized using the `--width` parameter, which sets the number of beams
+to search with. For example:
+```bash
+python sample.py --pick 2 --width 4
+```
+
 # Sample output
 
-## Word-RNN
+### Word-RNN
 ```
 LEONTES:
 Why, my Irish time?
@@ -47,8 +54,7 @@ That He being and
 full of toad, they knew me to joy.
 ```
 
-## Char-RNN
-
+### Char-RNN
 ```
 ESCALUS:
 What is our honours, such a Richard story
@@ -69,11 +75,71 @@ And six nor's mighty wind, I fairs, if?
 Messenger:
 My lank, nobles arms;
 ```
+
+## Beam search
+
+Beam search differs from the other `--pick` options in that it does not greedily
+pick single words; rather, it expands the most promising nodes and keeps a
+running score for each beam.
+
+### Word-RNN (with beam search)
+```
+# python sample.py --prime "KING RICHARD III:" -n 100 --pick 2 --width 4
+
+KING RICHARD III:
+you, and and and and have been to be hanged, I am not to be touched?
+
+Provost:
+A Bohemian born, for tying his own train,
+Forthwith by all that converses more with a crow-keeper;
+I have drunk, Broach'd with the acorn cradled. Follow.
+
+FERDINAND:
+Who would not be conducted.
+
+BISHOP OF ELY:
+If you have been a-bed an acre of barren ground, hath holy;
+I warrant, my lord restored of noon.
+
+ISABELLA:
+'Save my master and his shortness whisper me to the pedlar;
+Money's a medler.
+That I will pamper it to complain.
+
+VOLUMNIA:
+Indeed, I am
+```
+
+### Word-RNN (without beam search)
+```
+# python sample.py --prime "KING RICHARD III:" -n 100
+
+KING RICHARD III:
+marry, so and unto the wind have yours;
+And thou Juliet, sir?
+
+JULIET:
+Well, wherefore speak your disposition cousin;
+May thee flatter.
+My hand will answer him;
+e not to your Mariana Below these those and take this life,
+That stir not light of reason.
+The time Lucentio keeps a root from you.
+Cursed be his potency,
+It was my neighbour till the birth and I drank stay.
+
+MENENIUS:
+Here's the matter,
+I know take this sour place,
+they know allegiance Had made you guilty.
+You do her bear comfort him between him or our noble bosom he did Bolingbroke's
+```
+
 # Projects
 If you have any project using this word-rnn, please let us know. I'll list up your project here.
 
 - http://bot.wpoem.com/ (Simple poem generator in Korean)
- 
+
 
 # Contribution
 Your comments (issues) and PRs are always welcome.
diff --git a/beam.py b/beam.py
@@ -3,27 +3,73 @@
 
 
 class BeamSearch():
-    def __init__(self, probs):
-        self.probs = probs
-
-    def beamsearch(self, oov, empty, eos, k=1, maxsample=4000, use_unk=False):
-        """return k samples (beams) and their NLL scores, each sample is a sequence of labels,
-        all samples starts with an `empty` label and end with `eos` or truncated to length of `maxsample`.
-        You need to supply `predict` which returns the label probability of each sample.
-        `use_unk` allow usage of `oov` (out-of-vocabulary) label in samples
+    def __init__(self, predict, initial_state, prime_labels):
+        """Initializes the beam search.
+
+        Args:
+            predict:
+                A function that takes a `sample` and a `state`. It then performs
+                the computation on the last word in `sample`.
+            initial_state:
+                The initial state of the RNN.
+            prime_labels:
+                A list of labels corresponding to the priming text. This must
+                not be empty.
         """
 
+        if not prime_labels:
+            raise ValueError('prime_labels must be a non-empty list.')
+        self.predict = predict
+        self.initial_state = initial_state
+        self.prime_labels = prime_labels
+
+    def predict_samples(self, samples, states):
+        probs = []
+        next_states = []
+        for i in range(len(samples)):
+            prob, next_state = self.predict(samples[i], states[i])
+            probs.append(prob.squeeze())
+            next_states.append(next_state)
+        return np.array(probs), next_states
+
+    def search(self, oov, eos, k=1, maxsample=4000, use_unk=False):
+        """Return k samples (beams) and their NLL scores.
+
+        Each sample is a sequence of labels, either ending with `eos` or
+        truncated to length of `maxsample`. `use_unk` allow usage of `oov`
+        (out-of-vocabulary) label in samples
+        """
+
+        # A list of probabilities of our samples.
+        probs = []
+
+        prime_sample = []
+        prime_score = 0
+        prime_state = self.initial_state
+
+        # Initialize the live sample with the prime.
+        for i, label in enumerate(self.prime_labels):
+            prime_sample.append(label)
+
+            # The first word does not contribute to the score as the probs have
+            # not yet been determined.
+            if i > 0:
+                prime_score = prime_score - np.log(probs[0, label])
+            probs, prime_state = self.predict(prime_sample, prime_state)
+
         dead_k = 0  # samples that reached eos
         dead_samples = []
         dead_scores = []
+        dead_states = []
+
         live_k = 1  # samples that did not yet reached eos
-        live_samples = [[empty]]
-        live_scores = [0]
+        live_samples = [prime_sample]
+        live_scores = [prime_score]
+        live_states = [prime_state]
 
         while live_k and dead_k < k:
-
             # total score for every sample is sum of -log of word prb
-            cand_scores = np.array(live_scores)[:, None] - np.log(self.probs)
+            cand_scores = np.array(live_scores)[:, None] - np.log(probs)
             if not use_unk and oov is not None:
                 cand_scores[:, oov] = 1e20
             cand_flat = cand_scores.flatten()
@@ -33,19 +79,25 @@ def beamsearch(self, oov, empty, eos, k=1, maxsample=4000, use_unk=False):
             live_scores = cand_flat[ranks_flat]
 
             # append the new words to their appropriate live sample
-            voc_size = self.probs.shape[1]
+            voc_size = probs.shape[1]
             live_samples = [live_samples[r // voc_size] + [r % voc_size] for r in ranks_flat]
+            live_states = [live_states[r // voc_size] for r in ranks_flat]
 
             # live samples that should be dead are...
             zombie = [s[-1] == eos or len(s) >= maxsample for s in live_samples]
 
             # add zombies to the dead
             dead_samples += [s for s, z in zip(live_samples, zombie) if z]  # remove first label == empty
             dead_scores += [s for s, z in zip(live_scores, zombie) if z]
+            dead_states += [s for s, z in zip(live_states, zombie) if z]
             dead_k = len(dead_samples)
             # remove zombies from the living
             live_samples = [s for s, z in zip(live_samples, zombie) if not z]
             live_scores = [s for s, z in zip(live_scores, zombie) if not z]
+            live_states = [s for s, z in zip(live_states, zombie) if not z]
             live_k = len(live_samples)
 
-        return dead_samples + live_samples, dead_scores + live_scores
+            # Finally, compute the next-step probabilities and states.
+            probs, live_states = self.predict_samples(live_samples, live_states)
+
+        return dead_samples + live_samples, dead_scores + live_scores