Browse Source

Rewrote language_updater.sh in Python

At the same time, I moved the logic to check if the language should be
updated into the new LanguageUpdater class.  The README has been updated
to reflect the fact that you no longer need to do any of this manually
ever.
Clara Hobbs 8 years ago
parent
commit
e4b693b206
4 changed files with 94 additions and 83 deletions
  1. 7
    21
      README.md
  2. 4
    30
      blather.py
  3. 0
    32
      language_updater.sh
  4. 83
    0
      languageupdater.py

+ 7
- 21
README.md View File

@@ -17,35 +17,21 @@ but adds a lot of features that go beyond the original purpose of Blather.
17 17
 
18 18
 ## Usage
19 19
 
20
-1. Move commands.tmp to ~/.config/blather/commands.conf and fill the file with
20
+1. Move commands.tmp to ~/.config/kaylee/commands.conf and fill the file with
21 21
 sentences and command to run
22
-2. Run blather.py, this will generate ~/.config/blather/sentences.corpus based
23
-on sentences in the 'commands' file
24
-3. Quit Kaylee (there is a good chance it will just segfault)
25
-4. Go to <http://www.speech.cs.cmu.edu/tools/lmtool.html> and upload the
26
-sentences.corpus file
27
-5. Download the resulting XXXX.lm file to the ~/.config/blather/language
28
-directory and rename to file to 'lm'
29
-6. Download the resulting XXXX.dic file to the ~/.config/blather/language
30
-directory and rename to file to 'dic'
31
-7. Run blather.py
22
+2. Run blather.py.  This will generate ~/.local/share/kaylee/sentences.corpus
23
+based on sentences in the 'commands' file, then use
24
+<http://www.speech.cs.cmu.edu/tools/lmtool.html> to create and save a new
25
+language model and dictionary.
32 26
     * For GTK UI, run blather.py -i g
33 27
     * To start a UI in 'continuous' listen mode, use the -c flag
34 28
     * To use a microphone other than the system default, use the -m flag
35
-8. Start talking
29
+3. Start talking
36 30
 
37 31
 **Note:** to start Kaylee without needing to enter command line options all the
38
-time, copy options.json.tmp to ~/.config/blather/options.json and edit
32
+time, copy options.json.tmp to ~/.config/kaylee/options.json and edit
39 33
 accordingly.
40 34
 
41
-### Bonus
42
-
43
-~~Once the sentences.corpus file has been created, run the language_updater.sh
44
-script to automate the process of creating and downloading language files.~~
45
-
46
-Kaylee now updates the language automatically.  You should never need to run
47
-language_updater.sh manually.
48
-
49 35
 ### Examples
50 36
 
51 37
 * To run Kaylee with the GTK UI and start in continuous listen mode:

+ 4
- 30
blather.py View File

@@ -16,6 +16,7 @@ import json
16 16
 
17 17
 from recognizer import Recognizer
18 18
 from config import Config
19
+from languageupdater import LanguageUpdater
19 20
 
20 21
 
21 22
 class Blather:
@@ -35,7 +36,7 @@ class Blather:
35 36
         # Read the commands
36 37
         self.read_commands()
37 38
 
38
-        if self.options['interface'] != None:
39
+        if self.options['interface']:
39 40
             if self.options['interface'] == "g":
40 41
                 from gtkui import UI
41 42
             elif self.options['interface'] == "gt":
@@ -59,7 +60,8 @@ class Blather:
59 60
             self.history = []
60 61
 
61 62
         # Update the language if necessary
62
-        self.update_language()
63
+        self.language_updater = LanguageUpdater(self.config)
64
+        self.language_updater.update_language_if_changed()
63 65
 
64 66
         # Create the recognizer
65 67
         self.recognizer = Recognizer(self.config)
@@ -95,34 +97,6 @@ class Blather:
95 97
             # Close the file
96 98
             hfile.close()
97 99
 
98
-    def update_language(self):
99
-        """Update the language if its hash has changed"""
100
-        # Load the stored hash from the hash file
101
-        try:
102
-            with open(self.config.hash_file, 'r') as f:
103
-                hashes = json.load(f)
104
-            stored_hash = hashes['language']
105
-        except (IOError, KeyError, TypeError):
106
-            # No stored hash
107
-            stored_hash = ''
108
-
109
-        # Calculate the hash the language file has right now
110
-        hasher = hashlib.sha256()
111
-        with open(self.config.strings_file, 'rb') as sfile:
112
-            buf = sfile.read()
113
-            hasher.update(buf)
114
-        new_hash = hasher.hexdigest()
115
-
116
-        # If the hashes differ
117
-        if stored_hash != new_hash:
118
-            # Update the language
119
-            # FIXME: Do this with Python, not Bash
120
-            self.run_command('./language_updater.sh')
121
-            # Store the new hash
122
-            new_hashes = {'language': new_hash}
123
-            with open(self.config.hash_file, 'w') as f:
124
-                json.dump(new_hashes, f)
125
-
126 100
     def run_command(self, cmd):
127 101
         """Print the command, then run it"""
128 102
         print(cmd)

+ 0
- 32
language_updater.sh View File

@@ -1,32 +0,0 @@
1
-#!/bin/bash
2
-
3
-blatherdir=~/.config/kaylee
4
-blatherdatadir=~/.local/share/kaylee
5
-blathercachedir=~/.cache/kaylee
6
-sentences=$blatherdatadir/sentences.corpus
7
-sourcefile=$blatherdir/commands.conf
8
-tempfile=$blathercachedir/url.txt
9
-lmtoolurl=http://www.speech.cs.cmu.edu/cgi-bin/tools/lmtool/run
10
-
11
-cd $blatherdir
12
-
13
-sed -f - $sourcefile > $sentences <<EOFcommands
14
-  /^$/d
15
-  /^#/d
16
-  s/\:.*$//
17
-EOFcommands
18
-
19
-# upload corpus file, find the resulting dictionary file url
20
-curl -L -F corpus=@"$sentences" -F formtype=simple $lmtoolurl \
21
-  |grep -A 1 "base name" |grep http \
22
-  | sed -e 's/^.*\="//' | sed -e 's/\.tgz.*$//' | sed -e 's/TAR//' > $tempfile
23
-
24
-# download the .dic and .lm files
25
-curl -C - -O $(cat $tempfile).dic
26
-curl -C - -O $(cat $tempfile).lm
27
-
28
-# mv em to the right name/place
29
-mv *.dic $blatherdatadir/dic
30
-mv *.lm $blatherdatadir/lm
31
-
32
-rm $tempfile

+ 83
- 0
languageupdater.py View File

@@ -0,0 +1,83 @@
1
+# This is part of Kaylee
2
+# -- this code is licensed GPLv3
3
+# Copyright 2013 Jezra
4
+# Copyright 2015 Clayton G. Hobbs
5
+
6
+import hashlib
7
+import json
8
+import re
9
+
10
+import requests
11
+
12
+class LanguageUpdater:
13
+
14
+    def __init__(self, config):
15
+        self.config = config
16
+
17
+    def update_language_if_changed(self):
18
+        """Test if the language has changed, and if it has, update it"""
19
+        if self.language_has_changed():
20
+            self.update_language()
21
+            self.save_language_hash()
22
+
23
+    def language_has_changed(self):
24
+        """Use SHA256 hashes to test if the language has changed"""
25
+        # Load the stored hash from the hash file
26
+        try:
27
+            with open(self.config.hash_file, 'r') as f:
28
+                hashes = json.load(f)
29
+            self.stored_hash = hashes['language']
30
+        except (IOError, KeyError, TypeError):
31
+            # No stored hash
32
+            self.stored_hash = ''
33
+
34
+        # Calculate the hash the language file has right now
35
+        hasher = hashlib.sha256()
36
+        with open(self.config.strings_file, 'rb') as sfile:
37
+            buf = sfile.read()
38
+            hasher.update(buf)
39
+        self.new_hash = hasher.hexdigest()
40
+
41
+        return self.new_hash != self.stored_hash
42
+
43
+    def update_language(self):
44
+        """Update the language using the online lmtool"""
45
+        print('Updating language using online lmtool')
46
+
47
+        host = 'http://www.speech.cs.cmu.edu'
48
+        url = host + '/cgi-bin/tools/lmtool/run'
49
+
50
+        # Prepare request
51
+        files = {'corpus': open(self.config.strings_file, 'rb')}
52
+        values = {'formtype': 'simple'}
53
+
54
+        # Send corpus to the server
55
+        r = requests.post(url, files=files, data=values)
56
+
57
+        # Parse response to get URLs of the files we need
58
+        for line in r.text.split('\n'):
59
+            # If we found the directory, keep it and don't break
60
+            if re.search(r'.*<title>Index of (.*?)</title>.*', line):
61
+                path = host + re.sub(r'.*<title>Index of (.*?)</title>.*', r'\1', line)
62
+            # If we found the number, keep it and break
63
+            elif re.search(r'.*TAR[0-9]*?\.tgz.*', line):
64
+                number = re.sub(r'.*TAR([0-9]*?)\.tgz.*', r'\1', line)
65
+                break
66
+
67
+        lm_url = path + '/' + number + '.lm'
68
+        dic_url = path + '/' + number + '.dic'
69
+
70
+        self._download_file(lm_url, self.config.lang_file)
71
+        self._download_file(dic_url, self.config.dic_file)
72
+
73
+    def save_language_hash(self):
74
+        new_hashes = {'language': self.new_hash}
75
+        with open(self.config.hash_file, 'w') as f:
76
+            json.dump(new_hashes, f)
77
+
78
+    def _download_file(self, url, path):
79
+        r = requests.get(url, stream=True)
80
+        if r.status_code == 200:
81
+            with open(path, 'wb') as f:
82
+                for chunk in r:
83
+                    f.write(chunk)

Loading…
Cancel
Save