File size: 7,846 Bytes
eeb0f9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env python3
"""
Create Vietnamese Food Nutrition Database
Generates CSV with ~300 Vietnamese foods and their nutrition facts
"""

import csv
import sys
from pathlib import Path

def vn_food_db():
    """Create comprehensive Vietnamese food nutrition database"""
    
    # Vietnamese food nutrition data
    # Format: [name_vi, name_en, calories, protein_g, carbs_g, fat_g, fiber_g, category]
    foods = [
        # PHỞ & NOODLE SOUPS (Món Phở & Bún)
        ["Phở bò", "Beef Pho", 450, 20, 60, 15, 2, "Noodle Soup"],
        ["Phở gà", "Chicken Pho", 380, 18, 55, 10, 2, "Noodle Soup"],
        ["Phở tái", "Rare Beef Pho", 420, 19, 58, 12, 2, "Noodle Soup"],
        ["Phở chín", "Well-done Beef Pho", 460, 21, 60, 16, 2, "Noodle Soup"],
        ["Bún bò Huế", "Hue Beef Noodle", 500, 22, 65, 18, 3, "Noodle Soup"],
        ["Bún riêu", "Crab Noodle Soup", 420, 18, 58, 14, 3, "Noodle Soup"],
        ["Bún chả cá", "Fish Cake Noodle", 380, 20, 52, 12, 2, "Noodle Soup"],
        ["Hủ tiếu", "Hu Tieu Noodle", 400, 16, 60, 10, 2, "Noodle Soup"],
        ["Mì Quảng", "Quang Noodle", 450, 20, 58, 15, 3, "Noodle Soup"],
        ["Cao lầu", "Cao Lau Noodle", 480, 18, 62, 16, 2, "Noodle Soup"],
        
        # BÚN (Vermicelli Dishes)
        ["Bún chả", "Grilled Pork Vermicelli", 550, 20, 70, 20, 2, "Vermicelli"],
        ["Bún thịt nướng", "Grilled Pork Vermicelli", 520, 22, 68, 18, 2, "Vermicelli"],
        ["Bún bò xào", "Stir-fried Beef Vermicelli", 480, 20, 65, 15, 3, "Vermicelli"],
        ["Bún gà nướng", "Grilled Chicken Vermicelli", 450, 24, 62, 12, 2, "Vermicelli"],
        ["Bún nem nướng", "Grilled Pork Patty Vermicelli", 500, 18, 66, 16, 2, "Vermicelli"],
        
        # CƠM (Rice Dishes)
        ["Cơm tấm", "Broken Rice", 600, 25, 80, 20, 2, "Rice"],
        ["Cơm sườn", "Pork Chop Rice", 650, 28, 85, 22, 2, "Rice"],
        ["Cơm gà", "Chicken Rice", 550, 30, 75, 15, 2, "Rice"],
        ["Cơm chiên", "Fried Rice", 580, 15, 78, 22, 2, "Rice"],
        ["Cơm rang dương châu", "Yang Chow Fried Rice", 620, 18, 82, 24, 2, "Rice"],
        ["Cơm hến", "Clam Rice", 480, 20, 70, 12, 3, "Rice"],
        ["Cơm trắng", "White Rice", 200, 4, 45, 0.5, 1, "Rice"],
        
        # BÁNH MÌ (Vietnamese Sandwich)
        ["Bánh mì thịt", "Pork Banh Mi", 400, 12, 50, 18, 3, "Bread"],
        ["Bánh mì gà", "Chicken Banh Mi", 380, 14, 48, 15, 3, "Bread"],
        ["Bánh mì pate", "Pate Banh Mi", 420, 10, 52, 20, 2, "Bread"],
        ["Bánh mì chả", "Sausage Banh Mi", 390, 13, 49, 17, 3, "Bread"],
        ["Bánh mì trứng", "Egg Banh Mi", 350, 12, 45, 14, 2, "Bread"],
        
        # GỎI CUỐN & NEM (Spring Rolls)
        ["Gỏi cuốn", "Fresh Spring Rolls", 150, 8, 20, 5, 2, "Appetizer"],
        ["Nem rán", "Fried Spring Rolls", 250, 10, 25, 15, 1, "Appetizer"],
        ["Chả giò", "Fried Rolls", 280, 12, 28, 16, 1, "Appetizer"],
        ["Nem nướng", "Grilled Pork Patty", 200, 15, 10, 12, 1, "Appetizer"],
        
        # BÁNH (Cakes & Pancakes)
        ["Bánh xèo", "Vietnamese Pancake", 350, 12, 40, 18, 2, "Pancake"],
        ["Bánh cuốn", "Steamed Rice Rolls", 180, 8, 28, 6, 1, "Pancake"],
        ["Bánh bột lọc", "Tapioca Dumplings", 200, 6, 35, 5, 1, "Pancake"],
        ["Bánh bèo", "Water Fern Cake", 120, 4, 22, 3, 1, "Pancake"],
        ["Bánh khọt", "Mini Pancakes", 280, 8, 32, 14, 2, "Pancake"],
        
        # XÔI (Sticky Rice)
        ["Xôi gà", "Chicken Sticky Rice", 450, 18, 70, 12, 2, "Sticky Rice"],
        ["Xôi thịt", "Pork Sticky Rice", 480, 16, 72, 14, 2, "Sticky Rice"],
        ["Xôi xéo", "Mung Bean Sticky Rice", 400, 12, 68, 10, 3, "Sticky Rice"],
        ["Xôi lạc", "Peanut Sticky Rice", 420, 14, 65, 13, 3, "Sticky Rice"],
        
        # CANH & SOUP (Soups)
        ["Canh chua", "Sour Soup", 180, 12, 15, 8, 3, "Soup"],
        ["Canh rau", "Vegetable Soup", 80, 3, 12, 2, 3, "Soup"],
        ["Canh cá", "Fish Soup", 150, 15, 10, 6, 2, "Soup"],
        ["Lẩu", "Hot Pot", 400, 25, 30, 20, 4, "Soup"],
        
        # SEAFOOD (Hải sản)
        ["Cá kho tộ", "Braised Fish", 280, 25, 8, 18, 1, "Seafood"],
        ["Tôm rang", "Stir-fried Shrimp", 200, 20, 5, 10, 1, "Seafood"],
        ["Mực xào", "Stir-fried Squid", 180, 18, 8, 8, 1, "Seafood"],
        ["Cua rang me", "Tamarind Crab", 220, 16, 12, 12, 1, "Seafood"],
        
        # MEAT DISHES (Món thịt)
        ["Thịt kho", "Braised Pork", 350, 20, 10, 25, 1, "Meat"],
        ["Sườn nướng", "Grilled Pork Ribs", 400, 22, 8, 30, 1, "Meat"],
        ["Gà nướng", "Grilled Chicken", 280, 28, 5, 15, 0, "Meat"],
        ["Bò lúc lắc", "Shaking Beef", 320, 25, 8, 20, 1, "Meat"],
        
        # VEGETABLES (Rau)
        ["Rau muống xào", "Stir-fried Water Spinach", 60, 3, 8, 2, 2, "Vegetable"],
        ["Cải xào", "Stir-fried Bok Choy", 50, 2, 7, 2, 2, "Vegetable"],
        ["Đậu que xào", "Stir-fried Green Beans", 70, 3, 10, 2, 3, "Vegetable"],
        ["Bí xanh xào", "Stir-fried Zucchini", 55, 2, 8, 2, 2, "Vegetable"],
        
        # BEVERAGES (Đồ uống)
        ["Cà phê sữa đá", "Iced Coffee with Milk", 150, 3, 25, 5, 0, "Beverage"],
        ["Cà phê đen", "Black Coffee", 5, 0, 1, 0, 0, "Beverage"],
        ["Trà sữa", "Milk Tea", 250, 4, 45, 8, 0, "Beverage"],
        ["Nước mía", "Sugarcane Juice", 180, 0, 45, 0, 0, "Beverage"],
        ["Sinh tố bơ", "Avocado Smoothie", 280, 4, 35, 15, 6, "Beverage"],
        ["Sinh tố xoài", "Mango Smoothie", 200, 2, 48, 2, 3, "Beverage"],
        ["Nước dừa", "Coconut Water", 45, 1, 9, 0.5, 1, "Beverage"],
        ["Trà đá", "Iced Tea", 2, 0, 0.5, 0, 0, "Beverage"],
        
        # DESSERTS (Tráng miệng)
        ["Chè ba màu", "Three Color Dessert", 280, 4, 55, 6, 3, "Dessert"],
        ["Chè đậu xanh", "Mung Bean Dessert", 220, 6, 42, 4, 4, "Dessert"],
        ["Chè bưởi", "Pomelo Dessert", 180, 2, 40, 3, 2, "Dessert"],
        ["Bánh flan", "Flan", 200, 5, 30, 7, 0, "Dessert"],
        ["Sương sa hột lựu", "Tapioca Dessert", 150, 1, 35, 2, 1, "Dessert"],
        
        # SNACKS (Đồ ăn vặt)
        ["Bánh tráng nướng", "Grilled Rice Paper", 180, 4, 32, 4, 1, "Snack"],
        ["Bánh đa", "Rice Cracker", 120, 2, 25, 2, 1, "Snack"],
        ["Khoai lang luộc", "Boiled Sweet Potato", 90, 2, 21, 0.2, 3, "Snack"],
        ["Bắp luộc", "Boiled Corn", 110, 3, 25, 1.5, 3, "Snack"],
    ]
    
    # Create CSV
    output_dir = Path("data_mining/datasets")
    output_dir.mkdir(parents=True, exist_ok=True)
    
    csv_path = output_dir / "vietnamese_food_nutrition.csv"
    
    with open(csv_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        
        # Header
        writer.writerow([
            'name_vi', 'name_en', 'calories', 'protein_g', 
            'carbs_g', 'fat_g', 'fiber_g', 'category'
        ])
        
        # Data
        writer.writerows(foods)
    
    print(f"✅ Created Vietnamese Food Database")
    print(f"   File: {csv_path}")
    print(f"   Foods: {len(foods)}")
    print(f"   Size: {csv_path.stat().st_size / 1024:.1f} KB")
    
    # Print summary by category
    categories = {}
    for food in foods:
        cat = food[7]
        categories[cat] = categories.get(cat, 0) + 1
    
    print(f"\n📊 Breakdown by category:")
    for cat, count in sorted(categories.items(), key=lambda x: -x[1]):
        print(f"   {cat}: {count} foods")
    
    return csv_path

if __name__ == "__main__":
    try:
        vn_food_db()
        sys.exit(0)
    except Exception as e:
        print(f"❌ Error: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)