At the same time, I moved the logic to check if the language should be updated into the new LanguageUpdater class. The README has been updated to reflect the fact that you no longer need to do any of this manually ever.

8 years ago · e4b693b206
--- a/README.md
+++ b/README.md
@@ -17,35 +17,21 @@ but adds a lot of features that go beyond the original purpose of Blather.
 
				
				 
			
 
				
				 ## Usage
			
 
				
				 
			
 
				
				-1. Move commands.tmp to ~/.config/blather/commands.conf and fill the file with
			
 
				
				+1. Move commands.tmp to ~/.config/kaylee/commands.conf and fill the file with
			
 
				
				 sentences and command to run
			
 
				
				-2. Run blather.py, this will generate ~/.config/blather/sentences.corpus based
			
 
				
				-on sentences in the 'commands' file
			
 
				
				-3. Quit Kaylee (there is a good chance it will just segfault)
			
 
				
				-4. Go to <http://www.speech.cs.cmu.edu/tools/lmtool.html> and upload the
			
 
				
				-sentences.corpus file
			
 
				
				-5. Download the resulting XXXX.lm file to the ~/.config/blather/language
			
 
				
				-directory and rename to file to 'lm'
			
 
				
				-6. Download the resulting XXXX.dic file to the ~/.config/blather/language
			
 
				
				-directory and rename to file to 'dic'
			
 
				
				-7. Run blather.py
			
 
				
				+2. Run blather.py.  This will generate ~/.local/share/kaylee/sentences.corpus
			
 
				
				+based on sentences in the 'commands' file, then use
			
 
				
				+<http://www.speech.cs.cmu.edu/tools/lmtool.html> to create and save a new
			
 
				
				+language model and dictionary.
			
 
				
				     * For GTK UI, run blather.py -i g
			
 
				
				     * To start a UI in 'continuous' listen mode, use the -c flag
			
 
				
				     * To use a microphone other than the system default, use the -m flag
			
 
				
				-8. Start talking
			
 
				
				+3. Start talking
			
 
				
				 
			
 
				
				 **Note:** to start Kaylee without needing to enter command line options all the
			
 
				
				-time, copy options.json.tmp to ~/.config/blather/options.json and edit
			
 
				
				+time, copy options.json.tmp to ~/.config/kaylee/options.json and edit
			
 
				
				 accordingly.
			
 
				
				 
			
 
				
				-### Bonus
			
 
				
				-
			
 
				
				-~~Once the sentences.corpus file has been created, run the language_updater.sh
			
 
				
				-script to automate the process of creating and downloading language files.~~
			
 
				
				-
			
 
				
				-Kaylee now updates the language automatically.  You should never need to run
			
 
				
				-language_updater.sh manually.
			
 
				
				-
			
 
				
				 ### Examples
			
 
				
				 
			
 
				
				 * To run Kaylee with the GTK UI and start in continuous listen mode:
			
--- a/blather.py
+++ b/blather.py
@@ -16,6 +16,7 @@ import json
 
				
				 
			
 
				
				 from recognizer import Recognizer
			
 
				
				 from config import Config
			
 
				
				+from languageupdater import LanguageUpdater
			
 
				
				 
			
 
				
				 
			
 
				
				 class Blather:
			
@@ -35,7 +36,7 @@ class Blather:
 
				
				         # Read the commands
			
 
				
				         self.read_commands()
			
 
				
				 
			
 
				
				-        if self.options['interface'] != None:
			
 
				
				+        if self.options['interface']:
			
 
				
				             if self.options['interface'] == "g":
			
 
				
				                 from gtkui import UI
			
 
				
				             elif self.options['interface'] == "gt":
			
@@ -59,7 +60,8 @@ class Blather:
 
				
				             self.history = []
			
 
				
				 
			
 
				
				         # Update the language if necessary
			
 
				
				-        self.update_language()
			
 
				
				+        self.language_updater = LanguageUpdater(self.config)
			
 
				
				+        self.language_updater.update_language_if_changed()
			
 
				
				 
			
 
				
				         # Create the recognizer
			
 
				
				         self.recognizer = Recognizer(self.config)
			
@@ -95,34 +97,6 @@ class Blather:
 
				
				             # Close the file
			
 
				
				             hfile.close()
			
 
				
				 
			
 
				
				-    def update_language(self):
			
 
				
				-        """Update the language if its hash has changed"""
			
 
				
				-        # Load the stored hash from the hash file
			
 
				
				-        try:
			
 
				
				-            with open(self.config.hash_file, 'r') as f:
			
 
				
				-                hashes = json.load(f)
			
 
				
				-            stored_hash = hashes['language']
			
 
				
				-        except (IOError, KeyError, TypeError):
			
 
				
				-            # No stored hash
			
 
				
				-            stored_hash = ''
			
 
				
				-
			
 
				
				-        # Calculate the hash the language file has right now
			
 
				
				-        hasher = hashlib.sha256()
			
 
				
				-        with open(self.config.strings_file, 'rb') as sfile:
			
 
				
				-            buf = sfile.read()
			
 
				
				-            hasher.update(buf)
			
 
				
				-        new_hash = hasher.hexdigest()
			
 
				
				-
			
 
				
				-        # If the hashes differ
			
 
				
				-        if stored_hash != new_hash:
			
 
				
				-            # Update the language
			
 
				
				-            # FIXME: Do this with Python, not Bash
			
 
				
				-            self.run_command('./language_updater.sh')
			
 
				
				-            # Store the new hash
			
 
				
				-            new_hashes = {'language': new_hash}
			
 
				
				-            with open(self.config.hash_file, 'w') as f:
			
 
				
				-                json.dump(new_hashes, f)
			
 
				
				-
			
 
				
				     def run_command(self, cmd):
			
 
				
				         """Print the command, then run it"""
			
 
				
				         print(cmd)
			
--- a/language_updater.sh
+++ b/language_updater.sh
@@ -1,32 +0,0 @@
 
				
				-#!/bin/bash
			
 
				
				-
			
 
				
				-blatherdir=~/.config/kaylee
			
 
				
				-blatherdatadir=~/.local/share/kaylee
			
 
				
				-blathercachedir=~/.cache/kaylee
			
 
				
				-sentences=$blatherdatadir/sentences.corpus
			
 
				
				-sourcefile=$blatherdir/commands.conf
			
 
				
				-tempfile=$blathercachedir/url.txt
			
 
				
				-lmtoolurl=http://www.speech.cs.cmu.edu/cgi-bin/tools/lmtool/run
			
 
				
				-
			
 
				
				-cd $blatherdir
			
 
				
				-
			
 
				
				-sed -f - $sourcefile > $sentences <<EOFcommands
			
 
				
				-  /^$/d
			
 
				
				-  /^#/d
			
 
				
				-  s/\:.*$//
			
 
				
				-EOFcommands
			
 
				
				-
			
 
				
				-# upload corpus file, find the resulting dictionary file url
			
 
				
				-curl -L -F corpus=@"$sentences" -F formtype=simple $lmtoolurl \
			
 
				
				-  |grep -A 1 "base name" |grep http \
			
 
				
				-  | sed -e 's/^.*\="//' | sed -e 's/\.tgz.*$//' | sed -e 's/TAR//' > $tempfile
			
 
				
				-
			
 
				
				-# download the .dic and .lm files
			
 
				
				-curl -C - -O $(cat $tempfile).dic
			
 
				
				-curl -C - -O $(cat $tempfile).lm
			
 
				
				-
			
 
				
				-# mv em to the right name/place
			
 
				
				-mv *.dic $blatherdatadir/dic
			
 
				
				-mv *.lm $blatherdatadir/lm
			
 
				
				-
			
 
				
				-rm $tempfile
			
--- a/languageupdater.py
+++ b/languageupdater.py
@@ -0,0 +1,83 @@
 
				
				+# This is part of Kaylee
			
 
				
				+# -- this code is licensed GPLv3
			
 
				
				+# Copyright 2013 Jezra
			
 
				
				+# Copyright 2015 Clayton G. Hobbs
			
 
				
				+
			
 
				
				+import hashlib
			
 
				
				+import json
			
 
				
				+import re
			
 
				
				+
			
 
				
				+import requests
			
 
				
				+
			
 
				
				+class LanguageUpdater:
			
 
				
				+
			
 
				
				+    def __init__(self, config):
			
 
				
				+        self.config = config
			
 
				
				+
			
 
				
				+    def update_language_if_changed(self):
			
 
				
				+        """Test if the language has changed, and if it has, update it"""
			
 
				
				+        if self.language_has_changed():
			
 
				
				+            self.update_language()
			
 
				
				+            self.save_language_hash()
			
 
				
				+
			
 
				
				+    def language_has_changed(self):
			
 
				
				+        """Use SHA256 hashes to test if the language has changed"""
			
 
				
				+        # Load the stored hash from the hash file
			
 
				
				+        try:
			
 
				
				+            with open(self.config.hash_file, 'r') as f:
			
 
				
				+                hashes = json.load(f)
			
 
				
				+            self.stored_hash = hashes['language']
			
 
				
				+        except (IOError, KeyError, TypeError):
			
 
				
				+            # No stored hash
			
 
				
				+            self.stored_hash = ''
			
 
				
				+
			
 
				
				+        # Calculate the hash the language file has right now
			
 
				
				+        hasher = hashlib.sha256()
			
 
				
				+        with open(self.config.strings_file, 'rb') as sfile:
			
 
				
				+            buf = sfile.read()
			
 
				
				+            hasher.update(buf)
			
 
				
				+        self.new_hash = hasher.hexdigest()
			
 
				
				+
			
 
				
				+        return self.new_hash != self.stored_hash
			
 
				
				+
			
 
				
				+    def update_language(self):
			
 
				
				+        """Update the language using the online lmtool"""
			
 
				
				+        print('Updating language using online lmtool')
			
 
				
				+
			
 
				
				+        host = 'http://www.speech.cs.cmu.edu'
			
 
				
				+        url = host + '/cgi-bin/tools/lmtool/run'
			
 
				
				+
			
 
				
				+        # Prepare request
			
 
				
				+        files = {'corpus': open(self.config.strings_file, 'rb')}
			
 
				
				+        values = {'formtype': 'simple'}
			
 
				
				+
			
 
				
				+        # Send corpus to the server
			
 
				
				+        r = requests.post(url, files=files, data=values)
			
 
				
				+
			
 
				
				+        # Parse response to get URLs of the files we need
			
 
				
				+        for line in r.text.split('\n'):
			
 
				
				+            # If we found the directory, keep it and don't break
			
 
				
				+            if re.search(r'.*<title>Index of (.*?)</title>.*', line):
			
 
				
				+                path = host + re.sub(r'.*<title>Index of (.*?)</title>.*', r'\1', line)
			
 
				
				+            # If we found the number, keep it and break
			
 
				
				+            elif re.search(r'.*TAR[0-9]*?\.tgz.*', line):
			
 
				
				+                number = re.sub(r'.*TAR([0-9]*?)\.tgz.*', r'\1', line)
			
 
				
				+                break
			
 
				
				+
			
 
				
				+        lm_url = path + '/' + number + '.lm'
			
 
				
				+        dic_url = path + '/' + number + '.dic'
			
 
				
				+
			
 
				
				+        self._download_file(lm_url, self.config.lang_file)
			
 
				
				+        self._download_file(dic_url, self.config.dic_file)
			
 
				
				+
			
 
				
				+    def save_language_hash(self):
			
 
				
				+        new_hashes = {'language': self.new_hash}
			
 
				
				+        with open(self.config.hash_file, 'w') as f:
			
 
				
				+            json.dump(new_hashes, f)
			
 
				
				+
			
 
				
				+    def _download_file(self, url, path):
			
 
				
				+        r = requests.get(url, stream=True)
			
 
				
				+        if r.status_code == 200:
			
 
				
				+            with open(path, 'wb') as f:
			
 
				
				+                for chunk in r:
			
 
				
				+                    f.write(chunk)