Spaces:
Sleeping
Sleeping
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| # Open and read the HTML file | |
| with open("rff.html", 'r', encoding='utf-8') as file: | |
| html_content = file.read() | |
| # Parse the HTML content | |
| soup = BeautifulSoup(html_content, 'html.parser') | |
| # Find all article elements | |
| articles = soup.find_all('article', class_='card4') | |
| # Initialize a list to store the data | |
| data = [] | |
| # Loop through each article to extract the required information | |
| for article in articles: | |
| # Extract the name from the span with class '-a:1 -as:3 -as:t1' | |
| name_span = article.find('span', class_='-a:1 -as:3 -as:t1') | |
| name = name_span.text.strip() if name_span else 'N/A' # Handle cases where the span might not exist | |
| # Extract the state from the paragraph with class 'card4-role -t:11' | |
| state_paragraph = article.find('p', class_='card4-role -t:11') | |
| state = state_paragraph.text.strip() if state_paragraph else 'N/A' # Handle cases where the paragraph might not exist | |
| # Append the extracted data to the list | |
| data.append({'Name': name, 'State': state}) | |
| # Create a DataFrame from the data | |
| df = pd.DataFrame(data) | |
| df.tail() | |
| df.to_csv("rff_endorsements.csv", index = False) |