# This is part of Kaylee # -- this code is licensed GPLv3 # Copyright 2015 Clayton G. Hobbs # Portions Copyright 2013 Jezra import hashlib import json import re import requests class LanguageUpdater: def __init__(self, config): self.config = config def update_language_if_changed(self): """Test if the language has changed, and if it has, update it""" if self.language_has_changed(): self.update_language() self.save_language_hash() def language_has_changed(self): """Use SHA256 hashes to test if the language has changed""" # Load the stored hash from the hash file try: with open(self.config.hash_file, 'r') as f: hashes = json.load(f) self.stored_hash = hashes['language'] except (IOError, KeyError, TypeError): # No stored hash self.stored_hash = '' # Calculate the hash the language file has right now hasher = hashlib.sha256() with open(self.config.strings_file, 'rb') as sfile: buf = sfile.read() hasher.update(buf) self.new_hash = hasher.hexdigest() return self.new_hash != self.stored_hash def update_language(self): """Update the language using the online lmtool""" print('Updating language using online lmtool') host = 'http://www.speech.cs.cmu.edu' url = host + '/cgi-bin/tools/lmtool/run' # Prepare request files = {'corpus': open(self.config.strings_file, 'rb')} values = {'formtype': 'simple'} # Send corpus to the server r = requests.post(url, files=files, data=values) # Parse response to get URLs of the files we need path_re = r'.*