# This is part of Kaylee # -- this code is licensed GPLv3 # Copyright 2015-2016 Clayton G. Hobbs # Portions Copyright 2013 Jezra import re import requests from hasher import Hasher class LanguageUpdater: def __init__(self, config): self.config = config self.hasher = Hasher(config) def update_language_if_changed(self): """Test if the language has changed, and if it has, update it""" if self.language_has_changed(): self.update_language() self.save_language_hash() def language_has_changed(self): """Use hashes to test if the language has changed""" self.stored_hash = self.hasher['language'] # Calculate the hash the language file has right now hasher = self.hasher.get_hash_object() with open(self.config.strings_file, 'rb') as sfile: buf = sfile.read() hasher.update(buf) self.new_hash = hasher.hexdigest() return self.new_hash != self.stored_hash def update_language(self): """Update the language using the online lmtool""" print('Updating language using online lmtool') host = 'http://www.speech.cs.cmu.edu' url = host + '/cgi-bin/tools/lmtool/run' # Submit the corpus to the lmtool response_text = "" with open(self.config.strings_file, 'rb') as corpus: files = {'corpus': corpus} values = {'formtype': 'simple'} r = requests.post(url, files=files, data=values) response_text = r.text # Parse response to get URLs of the files we need path_re = r'.*