Spaces:

simondh
/

classifieur

Sleeping

App Files Files Community

simondh commited on Apr 15

Commit

4f9ecb6

1 Parent(s): 535a3a5

new endppoints

Browse files

Files changed (2) hide show

server.py +130 -0
test_server.py +85 -1

server.py CHANGED Viewed

@@ -9,6 +9,8 @@ import asyncio
 from client import get_client, initialize_client
 import os
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
@@ -44,14 +46,67 @@ class TextInput(BaseModel):
     text: str
     categories: Optional[List[str]] = None
 class ClassificationResponse(BaseModel):
     category: str
     confidence: float
     explanation: str
 class CategorySuggestionResponse(BaseModel):
     categories: List[str]
 @app.post("/classify", response_model=ClassificationResponse)
 async def classify_text(text_input: TextInput) -> ClassificationResponse:
     try:
@@ -70,6 +125,27 @@ async def classify_text(text_input: TextInput) -> ClassificationResponse:
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/suggest-categories", response_model=CategorySuggestionResponse)
 async def suggest_categories(texts: List[str]) -> CategorySuggestionResponse:
     try:
@@ -78,6 +154,60 @@ async def suggest_categories(texts: List[str]) -> CategorySuggestionResponse:
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run("server:app", host="0.0.0.0", port=8000, reload=True)

 from client import get_client, initialize_client
 import os
 from dotenv import load_dotenv
+import pandas as pd
+from utils import validate_results
 # Load environment variables
 load_dotenv()
     text: str
     categories: Optional[List[str]] = None
+class BatchTextInput(BaseModel):
+    texts: List[str]
+    categories: Optional[List[str]] = None
 class ClassificationResponse(BaseModel):
     category: str
     confidence: float
     explanation: str
+class BatchClassificationResponse(BaseModel):
+    results: List[ClassificationResponse]
 class CategorySuggestionResponse(BaseModel):
     categories: List[str]
+class ModelInfoResponse(BaseModel):
+    model_name: str
+    model_version: str
+    max_tokens: int
+    temperature: float
+class HealthResponse(BaseModel):
+    status: str
+    model_ready: bool
+    api_key_configured: bool
+class ValidationSample(BaseModel):
+    text: str
+    assigned_category: str
+    confidence: float
+class ValidationRequest(BaseModel):
+    samples: List[ValidationSample]
+    current_categories: List[str]
+    text_columns: List[str]
+class ValidationResponse(BaseModel):
+    validation_report: str
+    accuracy_score: Optional[float] = None
+    misclassifications: Optional[List[Dict[str, Any]]] = None
+    suggested_improvements: Optional[List[str]] = None
+@app.get("/health", response_model=HealthResponse)
+async def health_check() -> HealthResponse:
+    """Check the health status of the API"""
+    return HealthResponse(
+        status="healthy",
+        model_ready=client is not None,
+        api_key_configured=api_key is not None
+    )
+@app.get("/model-info", response_model=ModelInfoResponse)
+async def get_model_info() -> ModelInfoResponse:
+    """Get information about the current model configuration"""
+    return ModelInfoResponse(
+        model_name=classifier.model,
+        model_version="1.0",
+        max_tokens=200,
+        temperature=0
+    )
 @app.post("/classify", response_model=ClassificationResponse)
 async def classify_text(text_input: TextInput) -> ClassificationResponse:
     try:
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+@app.post("/classify-batch", response_model=BatchClassificationResponse)
+async def classify_batch(batch_input: BatchTextInput) -> BatchClassificationResponse:
+    """Classify multiple texts in a single request"""
+    try:
+        results: List[Dict[str, Any]] = await classifier.classify_async(
+            batch_input.texts,
+            batch_input.categories
+        )
+        return BatchClassificationResponse(
+            results=[
+                ClassificationResponse(
+                    category=r["category"],
+                    confidence=r["confidence"],
+                    explanation=r["explanation"]
+                ) for r in results
+            ]
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 @app.post("/suggest-categories", response_model=CategorySuggestionResponse)
 async def suggest_categories(texts: List[str]) -> CategorySuggestionResponse:
     try:
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+@app.post("/validate", response_model=ValidationResponse)
+async def validate_classifications(validation_request: ValidationRequest) -> ValidationResponse:
+    """Validate classification results and provide improvement suggestions"""
+    try:
+        # Convert samples to DataFrame
+        df = pd.DataFrame([
+            {
+                "text": sample.text,
+                "Category": sample.assigned_category,
+                "Confidence": sample.confidence
+            }
+            for sample in validation_request.samples
+        ])
+        # Use the validate_results function from utils
+        validation_report: str = validate_results(df, validation_request.text_columns, client)
+        # Parse the validation report to extract structured information
+        accuracy_score: Optional[float] = None
+        misclassifications: Optional[List[Dict[str, Any]]] = None
+        suggested_improvements: Optional[List[str]] = None
+        # Extract accuracy score if present
+        if "accuracy" in validation_report.lower():
+            try:
+                accuracy_str = validation_report.lower().split("accuracy")[1].split("%")[0].strip()
+                accuracy_score = float(accuracy_str) / 100
+            except:
+                pass
+        # Extract misclassifications
+        misclassifications = [
+            {"text": sample.text, "current_category": sample.assigned_category}
+            for sample in validation_request.samples
+            if sample.confidence < 70
+        ]
+        # Extract suggested improvements
+        suggested_improvements = [
+            "Review low confidence classifications",
+            "Consider adding more training examples",
+            "Refine category definitions"
+        ]
+        return ValidationResponse(
+            validation_report=validation_report,
+            accuracy_score=accuracy_score,
+            misclassifications=misclassifications,
+            suggested_improvements=suggested_improvements
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run("server:app", host="0.0.0.0", port=8000, reload=True)

test_server.py CHANGED Viewed

@@ -4,6 +4,18 @@ from typing import List, Dict, Any, Optional
 BASE_URL: str = "http://localhost:8000"
 def test_classify_text() -> None:
     # Load emails from CSV file
     import csv
@@ -23,6 +35,25 @@ def test_classify_text() -> None:
         print(f"Classification of email '{email['sujet']}' with default categories:")
         print(json.dumps(response.json(), indent=2))
 def test_suggest_categories() -> None:
     # Load reviews from CSV file
@@ -43,7 +74,60 @@ def test_suggest_categories() -> None:
     print("\nSuggested categories:")
     print(json.dumps(response.json(), indent=2))
 if __name__ == "__main__":
     print("Testing FastAPI server endpoints...")
     test_classify_text()
-    test_suggest_categories()

 BASE_URL: str = "http://localhost:8000"
+def test_health_check() -> None:
+    """Test the health check endpoint"""
+    response: requests.Response = requests.get(f"{BASE_URL}/health")
+    print("\nHealth check response:")
+    print(json.dumps(response.json(), indent=2))
+def test_model_info() -> None:
+    """Test the model info endpoint"""
+    response: requests.Response = requests.get(f"{BASE_URL}/model-info")
+    print("\nModel info response:")
+    print(json.dumps(response.json(), indent=2))
 def test_classify_text() -> None:
     # Load emails from CSV file
     import csv
         print(f"Classification of email '{email['sujet']}' with default categories:")
         print(json.dumps(response.json(), indent=2))
+def test_classify_batch() -> None:
+    """Test the batch classification endpoint"""
+    # Load emails from CSV file
+    import csv
+    emails: List[Dict[str, str]] = []
+    with open("examples/emails.csv", "r", encoding="utf-8") as file:
+        reader = csv.DictReader(file)
+        for row in reader:
+            emails.append(row)
+    # Use the first 5 emails for batch classification
+    texts: List[str] = [email["contenu"] for email in emails[:5]]
+    response: requests.Response = requests.post(
+        f"{BASE_URL}/classify-batch",
+        json={"texts": texts}
+    )
+    print("\nBatch classification results:")
+    print(json.dumps(response.json(), indent=2))
 def test_suggest_categories() -> None:
     # Load reviews from CSV file
     print("\nSuggested categories:")
     print(json.dumps(response.json(), indent=2))
+def test_validate_classifications() -> None:
+    """Test the validation endpoint"""
+    # Load emails from CSV file
+    import csv
+    emails: List[Dict[str, str]] = []
+    with open("examples/emails.csv", "r", encoding="utf-8") as file:
+        reader = csv.DictReader(file)
+        for row in reader:
+            emails.append(row)
+    # Create validation samples from the first 5 emails
+    samples: List[Dict[str, Any]] = []
+    for email in emails[:5]:
+        # First classify the email
+        classify_response: requests.Response = requests.post(
+            f"{BASE_URL}/classify",
+            json={"text": email["contenu"]}
+        )
+        classification: Dict[str, Any] = classify_response.json()
+        # Create a validation sample
+        samples.append({
+            "text": email["contenu"],
+            "assigned_category": classification["category"],
+            "confidence": classification["confidence"]
+        })
+    # Get current categories
+    categories_response: requests.Response = requests.post(
+        f"{BASE_URL}/suggest-categories",
+        json=[email["contenu"] for email in emails[:5]]
+    )
+    current_categories: List[str] = categories_response.json()["categories"]
+    # Send validation request
+    validation_request: Dict[str, Any] = {
+        "samples": samples,
+        "current_categories": current_categories,
+        "text_columns": ["text"]
+    }
+    response: requests.Response = requests.post(
+        f"{BASE_URL}/validate",
+        json=validation_request
+    )
+    print("\nValidation results:")
+    print(json.dumps(response.json(), indent=2))
 if __name__ == "__main__":
     print("Testing FastAPI server endpoints...")
+    test_health_check()
+    test_model_info()
     test_classify_text()
+    test_classify_batch()
+    test_suggest_categories()
+    test_validate_classifications()