| import os, shutil | |
| from uparser import wordparse | |
| from joblib import Parallel, delayed | |
| from tqdm import tqdm | |
| num_jobs = 20 | |
| infolder = 'Original' | |
| outfolder = 'Words' | |
| for fdr in [outfolder]: | |
| if os.path.exists(fdr): | |
| shutil.rmtree(fdr) | |
| os.mkdir(fdr) | |
| flist = os.listdir(infolder) | |
| for fname in flist: | |
| with open(f'{infolder}/{fname}', 'r') as f: | |
| cnts = f.readlines() | |
| i = 0 | |
| words = [] | |
| for l in cnts: | |
| l = l.strip().split('\t') | |
| words.append(l[0]) | |
| fout = fname.split('_')[1] | |
| fout = fout.split('.')[0] | |
| print(fout) | |
| with open(f'{outfolder}/{fout}.words', 'w') as f: | |
| for w in words: | |
| f.write(w + '\n') |