import CompatMysqldb import os import sys import re import glob count_id = 0; def index_file (parsed_str, filename, db,id): for i in parsed_str: try: cons = "select id from words where word='%s'" % i if db.execute(cons) != 0: #La paraula esta a la llista de words old_id = db.fetchall()[0][0] cons = "insert into occur(file,id) values ('%s',%d)" % (filename,old_id) db.execute(cons); else: cons ="insert into words(word, id) values ('%s',%d)" % (i,id) db.execute(cons) cons = "insert into occur(file,id) values ('%s',%d)" % (filename,id) db.execute(cons); id= id + 1 except CompatMysqldb: print "Error en mysql select de words" return 1 print "word:",i,"id:",id return id def parse_file(filename): file = open (filename,"rt") str = file.read() pattern = re.compile (r"[^\w^\s]") str = pattern.sub(" ",str) pattern = re.compile (r"\s+") str = pattern.sub(" ",str) pattern = re.compile (r"^\s") str = pattern.sub("",str) pattern = re.compile (r"\b\w{1,2}\b") str = pattern.sub("",str) pattern = re.compile (r"\b") str = pattern.sub("X",str) pattern = re.compile (r"X(\w+)X") str = pattern.findall(str) file.close(); return str if __name__ == "__main__": idg = 0; # Needs a DataBase with tables occur(file char(), id int) and word(word char(), id int, primary key (word)) db = CompatMysqldb.mysqldb('DATABASE@localhost USER PASS') for i in glob.glob('./index_dir/*'): print "Indexing ",i," ..." str = parse_file (i) count_id=0; idg = index_file(str, i, db, idg) while 1 : paraula = sys.stdin.readline() pattern = re.compile (r"\W") paraula = pattern.sub("",paraula) print paraula,"a" cons = "select id from words where word='%s'" % paraula print cons if db.execute(cons) != 0: #La paraula esta a la llista de words # Busca paraula cons = "select file from occur where id='%d'" % db.fetchall()[0][0] print cons db.execute(cons) print db.fetchall()