Spaces:

jedick
/

noteworthy-differences

Running

File size: 4,021 Bytes

48c27bb

# Noteworthy Differences:
# Classification of noteworthy differences between revisions of Wikipedia articles: an AI alignment project
# 20251114 jmd version 1

from google import genai
from google.genai import types
from pydantic import BaseModel
from dotenv import load_dotenv
import json
import os
import pandas as pd
from prompts import analyzer_prompts, judge_prompt
from retry_with_backoff import retry_with_backoff
import logfire

# Load API keys
load_dotenv()

# Setup Logfire
logfire.configure()

# This wraps Google Gen AI client calls
# to capture prompts, responses, and metadata
logfire.instrument_google_genai()

# Initialize the Gemini LLM
client = genai.Client()


@retry_with_backoff()
def classifier(old_revision, new_revision, prompt_style):
    """
    Classify noteworthy differences between revisions of a Wikipedia article

    Args:
        old_revision: Old revision of article
        new_revision: New revision of article

    Returns:
        noteworthy: True if the differences are noteworthy; False if not
        rationale: One-sentence rational for the classification
    """

    # Return None for missing revisions
    if not pd.notna(old_revision) or not pd.notna(new_revision):
        return {"noteworthy": None, "rationale": None}

    # Get prompt template for given style
    prompt_template = analyzer_prompts[prompt_style]

    # Add article revisions to prompt
    prompt = prompt_template.replace("{{old_revision}}", old_revision).replace(
        "{{new_revision}}", new_revision
    )

    # Define response schema
    class Response(BaseModel):
        noteworthy: bool
        rationale: str

    # Generate response
    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=prompt,
        config=types.GenerateContentConfig(
            response_mime_type="application/json",
            response_schema=Response.model_json_schema(),
        ),
    )

    return json.loads(response.text)


@retry_with_backoff()
def judge(old_revision, new_revision, rationale_1, rationale_2, mode="unaligned"):
    """
    AI judge to settle disagreements between classification models

    Args:
        old_revision: Old revision of article
        new_revision: New revision of article
        rationale_1: Rationale provided by model 1 (i.e., heuristic prompt)
        rationale_2: Rationale provided by model 2 (i.e., few-shot prompt)
        mode: Prompt mode: unaligned, aligned-fewshot, or aligned-heuristic

    Returns:
        noteworthy: True if the differences are noteworthy; False if not
        reasoning: One-sentence reason for the judgment
    """

    prompt = judge_prompt
    # Add article revisions to prompt
    prompt = prompt.replace("{{old_revision}}", old_revision).replace(
        "{{new_revision}}", new_revision
    )
    # Add rationales to prompt
    prompt = prompt.replace("{{model_1_rationale}}", rationale_1).replace(
        "{{model_2_rationale}}", rationale_2
    )

    # Optionally add alignment text to prompt
    if mode == "unaligned":
        alignment_text = ""
    elif mode == "aligned-fewshot":
        with open("data/alignment_fewshot.txt", "r") as file:
            lines = file.readlines()
            alignment_text = "".join(lines)
    elif mode == "aligned-heuristic":
        with open("data/alignment_heuristic.txt", "r") as file:
            lines = file.readlines()
            alignment_text = "".join(lines)
    else:
        raise ValueError(f"Unknown mode: {mode}")

    prompt = prompt.replace("{{alignment_text}}", alignment_text)

    # Define response schema
    class Response(BaseModel):
        noteworthy: bool
        reasoning: str

    # Generate response
    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=prompt,
        config=types.GenerateContentConfig(
            response_mime_type="application/json",
            response_schema=Response.model_json_schema(),
        ),
    )

    return json.loads(response.text)