Spaces:
Running
Running
show score composition
Browse files
app.py
CHANGED
|
@@ -44,11 +44,23 @@ def process_ocr_qa(text, lang_choice):
|
|
| 44 |
if 'language' in result:
|
| 45 |
output_lines.append(f"π Language: {result['language']}")
|
| 46 |
|
| 47 |
-
# Quality score
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
score_emoji = "π’" if score >= 0.8 else "π‘" if score >= 0.5 else "π΄"
|
| 51 |
-
output_lines.append(
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
# Diagnostics section
|
| 54 |
if 'diagnostics' in result and result['diagnostics']:
|
|
@@ -61,12 +73,12 @@ def process_ocr_qa(text, lang_choice):
|
|
| 61 |
# Known tokens
|
| 62 |
if 'known_tokens' in diagnostics and diagnostics['known_tokens']:
|
| 63 |
known_tokens = diagnostics['known_tokens']
|
| 64 |
-
output_lines.append(f"β
Known unique tokens ({
|
| 65 |
|
| 66 |
# Unknown tokens (potential OCR errors)
|
| 67 |
if 'unknown_tokens' in diagnostics and diagnostics['unknown_tokens']:
|
| 68 |
unknown_tokens = diagnostics['unknown_tokens']
|
| 69 |
-
output_lines.append(f"β Unrecognized unique tokens (
|
| 70 |
elif 'unknown_tokens' in diagnostics:
|
| 71 |
output_lines.append("β¨ All tokens were known β no OCR errors detected.")
|
| 72 |
|
|
|
|
| 44 |
if 'language' in result:
|
| 45 |
output_lines.append(f"π Language: {result['language']}")
|
| 46 |
|
| 47 |
+
# Quality score (with ratio)
|
| 48 |
+
score = result.get("score")
|
| 49 |
+
diagnostics = result.get("diagnostics", {})
|
| 50 |
+
known_tokens = diagnostics.get("known_tokens", [])
|
| 51 |
+
unknown_tokens = diagnostics.get("unknown_tokens", [])
|
| 52 |
+
|
| 53 |
+
if score is not None:
|
| 54 |
+
k = len(known_tokens)
|
| 55 |
+
u = len(unknown_tokens)
|
| 56 |
+
total = k + u
|
| 57 |
+
ratio = f"{k}/{total}" if total > 0 else "0/0"
|
| 58 |
+
|
| 59 |
score_emoji = "π’" if score >= 0.8 else "π‘" if score >= 0.5 else "π΄"
|
| 60 |
+
output_lines.append(
|
| 61 |
+
f"{score_emoji} Quality Score: {score:.3f} ({ratio})"
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
|
| 65 |
# Diagnostics section
|
| 66 |
if 'diagnostics' in result and result['diagnostics']:
|
|
|
|
| 73 |
# Known tokens
|
| 74 |
if 'known_tokens' in diagnostics and diagnostics['known_tokens']:
|
| 75 |
known_tokens = diagnostics['known_tokens']
|
| 76 |
+
output_lines.append(f"β
Known unique tokens ({k}):\n{' '.join(known_tokens)}")
|
| 77 |
|
| 78 |
# Unknown tokens (potential OCR errors)
|
| 79 |
if 'unknown_tokens' in diagnostics and diagnostics['unknown_tokens']:
|
| 80 |
unknown_tokens = diagnostics['unknown_tokens']
|
| 81 |
+
output_lines.append(f"β Unrecognized unique tokens (u}):\n{' '.join(unknown_tokens)}")
|
| 82 |
elif 'unknown_tokens' in diagnostics:
|
| 83 |
output_lines.append("β¨ All tokens were known β no OCR errors detected.")
|
| 84 |
|