""" Primera prova d'un indexador, prova en memoria local""" import os import sys import re import glob word_index = {} def index_file (parsed_str, filename): # print parsed_str for i in parsed_str : try: # Ja existeix la paraula a l'index list = word_index[i] if filename in list : #el nom d'aquest fitxer ja esta a la llista pass else: #Nova paraula pel fitxer list.append ( filename ) except: list = [ filename ] word_index[i] = list def parse_file(filename): file = open (filename,"rt") str = file.read() pattern = re.compile (r"[^\w^\s]") str = pattern.sub(" ",str) pattern = re.compile (r"\s+") str = pattern.sub(" ",str) pattern = re.compile (r"^\s") str = pattern.sub("",str) pattern = re.compile (r"\b\w{1,2}\b") str = pattern.sub("",str) pattern = re.compile (r"\b") str = pattern.sub("X",str) pattern = re.compile (r"X(\w+)X") str = pattern.findall(str) #print str file.close(); return str if __name__ == "__main__": for i in glob.glob('./index_dir/*'): print "Indexing ",i," ..." str = parse_file (i) str = index_file(str, i) while 1 : paraula = sys.stdin.readline() pattern = re.compile (r"\W") paraula = pattern.sub("",paraula) print paraula,"a" try: for i in word_index[paraula]: print i except: print "no trobat"