simon-clmtd commited on
Commit
97e3d11
Β·
verified Β·
1 Parent(s): fb6f7ac

show score composition

Browse files
Files changed (1) hide show
  1. app.py +18 -6
app.py CHANGED
@@ -44,11 +44,23 @@ def process_ocr_qa(text, lang_choice):
44
  if 'language' in result:
45
  output_lines.append(f"🌍 Language: {result['language']}")
46
 
47
- # Quality score
48
- if 'score' in result:
49
- score = result['score']
 
 
 
 
 
 
 
 
 
50
  score_emoji = "🟒" if score >= 0.8 else "🟑" if score >= 0.5 else "πŸ”΄"
51
- output_lines.append(f"{score_emoji} Quality Score: {score}")
 
 
 
52
 
53
  # Diagnostics section
54
  if 'diagnostics' in result and result['diagnostics']:
@@ -61,12 +73,12 @@ def process_ocr_qa(text, lang_choice):
61
  # Known tokens
62
  if 'known_tokens' in diagnostics and diagnostics['known_tokens']:
63
  known_tokens = diagnostics['known_tokens']
64
- output_lines.append(f"βœ… Known unique tokens ({len(known_tokens)}):\n {' '.join(known_tokens)}")
65
 
66
  # Unknown tokens (potential OCR errors)
67
  if 'unknown_tokens' in diagnostics and diagnostics['unknown_tokens']:
68
  unknown_tokens = diagnostics['unknown_tokens']
69
- output_lines.append(f"❌ Unrecognized unique tokens ({len(unknown_tokens)}):\n {' '.join(unknown_tokens)}")
70
  elif 'unknown_tokens' in diagnostics:
71
  output_lines.append("✨ All tokens were known – no OCR errors detected.")
72
 
 
44
  if 'language' in result:
45
  output_lines.append(f"🌍 Language: {result['language']}")
46
 
47
+ # Quality score (with ratio)
48
+ score = result.get("score")
49
+ diagnostics = result.get("diagnostics", {})
50
+ known_tokens = diagnostics.get("known_tokens", [])
51
+ unknown_tokens = diagnostics.get("unknown_tokens", [])
52
+
53
+ if score is not None:
54
+ k = len(known_tokens)
55
+ u = len(unknown_tokens)
56
+ total = k + u
57
+ ratio = f"{k}/{total}" if total > 0 else "0/0"
58
+
59
  score_emoji = "🟒" if score >= 0.8 else "🟑" if score >= 0.5 else "πŸ”΄"
60
+ output_lines.append(
61
+ f"{score_emoji} Quality Score: {score:.3f} ({ratio})"
62
+ )
63
+
64
 
65
  # Diagnostics section
66
  if 'diagnostics' in result and result['diagnostics']:
 
73
  # Known tokens
74
  if 'known_tokens' in diagnostics and diagnostics['known_tokens']:
75
  known_tokens = diagnostics['known_tokens']
76
+ output_lines.append(f"βœ… Known unique tokens ({k}):\n{' '.join(known_tokens)}")
77
 
78
  # Unknown tokens (potential OCR errors)
79
  if 'unknown_tokens' in diagnostics and diagnostics['unknown_tokens']:
80
  unknown_tokens = diagnostics['unknown_tokens']
81
+ output_lines.append(f"❌ Unrecognized unique tokens (u}):\n{' '.join(unknown_tokens)}")
82
  elif 'unknown_tokens' in diagnostics:
83
  output_lines.append("✨ All tokens were known – no OCR errors detected.")
84