Spaces:
Runtime error
Runtime error
| import spacy | |
| from transformers import pipeline | |
| import re | |
| from dateutil.parser import parse | |
| # Regex pattern for dates | |
| def extract_entities(email_text, nlp, ner_pipeline): | |
| date_pattern = r'\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{1,2}(?:th|st|nd|rd)?,\s+\d{4}\b' | |
| # Use spaCy for initial extraction | |
| doc = nlp(email_text) | |
| spacy_entities = [{"Text": ent.text, "Type": ent.label_} for ent in doc.ents] | |
| # Use transformer model for refined extraction | |
| transformer_entities = ner_pipeline(email_text) | |
| transformer_entities = [{"Text": ent['word'], "Type": ent['entity'], "Score": ent['score']} for ent in transformer_entities if ent['score'] > 0.75] | |
| # Extract dates using regex | |
| potential_dates = re.findall(date_pattern, email_text) | |
| dates = [parse(date).strftime('%Y-%m-%d') for date in potential_dates] | |
| return { | |
| "spaCy Entities": spacy_entities, | |
| "Transformer Entities": transformer_entities, | |
| "Dates": dates | |
| } | |