Переглянути джерело

Rewrote language_updater.sh in Python

At the same time, I moved the logic to check if the language should be
updated into the new LanguageUpdater class.  The README has been updated
to reflect the fact that you no longer need to do any of this manually
ever.
Clara Hobbs 8 роки тому
джерело
коміт
e4b693b206
4 змінених файлів з 94 додано та 83 видалено
  1. 7
    21
      README.md
  2. 4
    30
      blather.py
  3. 0
    32
      language_updater.sh
  4. 83
    0
      languageupdater.py

+ 7
- 21
README.md Переглянути файл

17
 
17
 
18
 ## Usage
18
 ## Usage
19
 
19
 
20
-1. Move commands.tmp to ~/.config/blather/commands.conf and fill the file with
20
+1. Move commands.tmp to ~/.config/kaylee/commands.conf and fill the file with
21
 sentences and command to run
21
 sentences and command to run
22
-2. Run blather.py, this will generate ~/.config/blather/sentences.corpus based
23
-on sentences in the 'commands' file
24
-3. Quit Kaylee (there is a good chance it will just segfault)
25
-4. Go to <http://www.speech.cs.cmu.edu/tools/lmtool.html> and upload the
26
-sentences.corpus file
27
-5. Download the resulting XXXX.lm file to the ~/.config/blather/language
28
-directory and rename to file to 'lm'
29
-6. Download the resulting XXXX.dic file to the ~/.config/blather/language
30
-directory and rename to file to 'dic'
31
-7. Run blather.py
22
+2. Run blather.py.  This will generate ~/.local/share/kaylee/sentences.corpus
23
+based on sentences in the 'commands' file, then use
24
+<http://www.speech.cs.cmu.edu/tools/lmtool.html> to create and save a new
25
+language model and dictionary.
32
     * For GTK UI, run blather.py -i g
26
     * For GTK UI, run blather.py -i g
33
     * To start a UI in 'continuous' listen mode, use the -c flag
27
     * To start a UI in 'continuous' listen mode, use the -c flag
34
     * To use a microphone other than the system default, use the -m flag
28
     * To use a microphone other than the system default, use the -m flag
35
-8. Start talking
29
+3. Start talking
36
 
30
 
37
 **Note:** to start Kaylee without needing to enter command line options all the
31
 **Note:** to start Kaylee without needing to enter command line options all the
38
-time, copy options.json.tmp to ~/.config/blather/options.json and edit
32
+time, copy options.json.tmp to ~/.config/kaylee/options.json and edit
39
 accordingly.
33
 accordingly.
40
 
34
 
41
-### Bonus
42
-
43
-~~Once the sentences.corpus file has been created, run the language_updater.sh
44
-script to automate the process of creating and downloading language files.~~
45
-
46
-Kaylee now updates the language automatically.  You should never need to run
47
-language_updater.sh manually.
48
-
49
 ### Examples
35
 ### Examples
50
 
36
 
51
 * To run Kaylee with the GTK UI and start in continuous listen mode:
37
 * To run Kaylee with the GTK UI and start in continuous listen mode:

+ 4
- 30
blather.py Переглянути файл

16
 
16
 
17
 from recognizer import Recognizer
17
 from recognizer import Recognizer
18
 from config import Config
18
 from config import Config
19
+from languageupdater import LanguageUpdater
19
 
20
 
20
 
21
 
21
 class Blather:
22
 class Blather:
35
         # Read the commands
36
         # Read the commands
36
         self.read_commands()
37
         self.read_commands()
37
 
38
 
38
-        if self.options['interface'] != None:
39
+        if self.options['interface']:
39
             if self.options['interface'] == "g":
40
             if self.options['interface'] == "g":
40
                 from gtkui import UI
41
                 from gtkui import UI
41
             elif self.options['interface'] == "gt":
42
             elif self.options['interface'] == "gt":
59
             self.history = []
60
             self.history = []
60
 
61
 
61
         # Update the language if necessary
62
         # Update the language if necessary
62
-        self.update_language()
63
+        self.language_updater = LanguageUpdater(self.config)
64
+        self.language_updater.update_language_if_changed()
63
 
65
 
64
         # Create the recognizer
66
         # Create the recognizer
65
         self.recognizer = Recognizer(self.config)
67
         self.recognizer = Recognizer(self.config)
95
             # Close the file
97
             # Close the file
96
             hfile.close()
98
             hfile.close()
97
 
99
 
98
-    def update_language(self):
99
-        """Update the language if its hash has changed"""
100
-        # Load the stored hash from the hash file
101
-        try:
102
-            with open(self.config.hash_file, 'r') as f:
103
-                hashes = json.load(f)
104
-            stored_hash = hashes['language']
105
-        except (IOError, KeyError, TypeError):
106
-            # No stored hash
107
-            stored_hash = ''
108
-
109
-        # Calculate the hash the language file has right now
110
-        hasher = hashlib.sha256()
111
-        with open(self.config.strings_file, 'rb') as sfile:
112
-            buf = sfile.read()
113
-            hasher.update(buf)
114
-        new_hash = hasher.hexdigest()
115
-
116
-        # If the hashes differ
117
-        if stored_hash != new_hash:
118
-            # Update the language
119
-            # FIXME: Do this with Python, not Bash
120
-            self.run_command('./language_updater.sh')
121
-            # Store the new hash
122
-            new_hashes = {'language': new_hash}
123
-            with open(self.config.hash_file, 'w') as f:
124
-                json.dump(new_hashes, f)
125
-
126
     def run_command(self, cmd):
100
     def run_command(self, cmd):
127
         """Print the command, then run it"""
101
         """Print the command, then run it"""
128
         print(cmd)
102
         print(cmd)

+ 0
- 32
language_updater.sh Переглянути файл

1
-#!/bin/bash
2
-
3
-blatherdir=~/.config/kaylee
4
-blatherdatadir=~/.local/share/kaylee
5
-blathercachedir=~/.cache/kaylee
6
-sentences=$blatherdatadir/sentences.corpus
7
-sourcefile=$blatherdir/commands.conf
8
-tempfile=$blathercachedir/url.txt
9
-lmtoolurl=http://www.speech.cs.cmu.edu/cgi-bin/tools/lmtool/run
10
-
11
-cd $blatherdir
12
-
13
-sed -f - $sourcefile > $sentences <<EOFcommands
14
-  /^$/d
15
-  /^#/d
16
-  s/\:.*$//
17
-EOFcommands
18
-
19
-# upload corpus file, find the resulting dictionary file url
20
-curl -L -F corpus=@"$sentences" -F formtype=simple $lmtoolurl \
21
-  |grep -A 1 "base name" |grep http \
22
-  | sed -e 's/^.*\="//' | sed -e 's/\.tgz.*$//' | sed -e 's/TAR//' > $tempfile
23
-
24
-# download the .dic and .lm files
25
-curl -C - -O $(cat $tempfile).dic
26
-curl -C - -O $(cat $tempfile).lm
27
-
28
-# mv em to the right name/place
29
-mv *.dic $blatherdatadir/dic
30
-mv *.lm $blatherdatadir/lm
31
-
32
-rm $tempfile

+ 83
- 0
languageupdater.py Переглянути файл

1
+# This is part of Kaylee
2
+# -- this code is licensed GPLv3
3
+# Copyright 2013 Jezra
4
+# Copyright 2015 Clayton G. Hobbs
5
+
6
+import hashlib
7
+import json
8
+import re
9
+
10
+import requests
11
+
12
+class LanguageUpdater:
13
+
14
+    def __init__(self, config):
15
+        self.config = config
16
+
17
+    def update_language_if_changed(self):
18
+        """Test if the language has changed, and if it has, update it"""
19
+        if self.language_has_changed():
20
+            self.update_language()
21
+            self.save_language_hash()
22
+
23
+    def language_has_changed(self):
24
+        """Use SHA256 hashes to test if the language has changed"""
25
+        # Load the stored hash from the hash file
26
+        try:
27
+            with open(self.config.hash_file, 'r') as f:
28
+                hashes = json.load(f)
29
+            self.stored_hash = hashes['language']
30
+        except (IOError, KeyError, TypeError):
31
+            # No stored hash
32
+            self.stored_hash = ''
33
+
34
+        # Calculate the hash the language file has right now
35
+        hasher = hashlib.sha256()
36
+        with open(self.config.strings_file, 'rb') as sfile:
37
+            buf = sfile.read()
38
+            hasher.update(buf)
39
+        self.new_hash = hasher.hexdigest()
40
+
41
+        return self.new_hash != self.stored_hash
42
+
43
+    def update_language(self):
44
+        """Update the language using the online lmtool"""
45
+        print('Updating language using online lmtool')
46
+
47
+        host = 'http://www.speech.cs.cmu.edu'
48
+        url = host + '/cgi-bin/tools/lmtool/run'
49
+
50
+        # Prepare request
51
+        files = {'corpus': open(self.config.strings_file, 'rb')}
52
+        values = {'formtype': 'simple'}
53
+
54
+        # Send corpus to the server
55
+        r = requests.post(url, files=files, data=values)
56
+
57
+        # Parse response to get URLs of the files we need
58
+        for line in r.text.split('\n'):
59
+            # If we found the directory, keep it and don't break
60
+            if re.search(r'.*<title>Index of (.*?)</title>.*', line):
61
+                path = host + re.sub(r'.*<title>Index of (.*?)</title>.*', r'\1', line)
62
+            # If we found the number, keep it and break
63
+            elif re.search(r'.*TAR[0-9]*?\.tgz.*', line):
64
+                number = re.sub(r'.*TAR([0-9]*?)\.tgz.*', r'\1', line)
65
+                break
66
+
67
+        lm_url = path + '/' + number + '.lm'
68
+        dic_url = path + '/' + number + '.dic'
69
+
70
+        self._download_file(lm_url, self.config.lang_file)
71
+        self._download_file(dic_url, self.config.dic_file)
72
+
73
+    def save_language_hash(self):
74
+        new_hashes = {'language': self.new_hash}
75
+        with open(self.config.hash_file, 'w') as f:
76
+            json.dump(new_hashes, f)
77
+
78
+    def _download_file(self, url, path):
79
+        r = requests.get(url, stream=True)
80
+        if r.status_code == 200:
81
+            with open(path, 'wb') as f:
82
+                for chunk in r:
83
+                    f.write(chunk)

Завантаження…
Відмінити
Зберегти