Spaces:
Sleeping
Sleeping
| import re | |
| from MHGTagger.rnn_annotate import annotate | |
| from Tagset_Mappings.tag_mapping import map_tags | |
| from parsing.src.parse import run_parse | |
| from nltk import word_tokenize | |
| def parse_text(text): | |
| tokens = tokenize(text) | |
| tokens, tags, probs = annotate(tokens) | |
| tags = map_tags(tags) | |
| parse_tree = run_parse(tokens, tags)[0] | |
| return tokens, tags, probs, parse_tree | |
| def tokenize(text: str): | |
| text = re.sub(r'\s*([.,;:?!"])\s', r' \1 ', text) | |
| text = re.sub(r'\s*([.,;:?!"]) ', r' \1 ', text) | |
| tokens = word_tokenize(text) | |
| return tokens | |