ZENLLC commited on
Commit
0ae8b34
·
verified ·
1 Parent(s): 3c0d222

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +554 -0
app.py ADDED
@@ -0,0 +1,554 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import io
3
+ import os
4
+ from typing import List, Tuple, Optional
5
+
6
+ import gradio as gr
7
+ from PIL import Image
8
+
9
+ # -----------------------
10
+ # Optional imports guarded at call-time
11
+ # -----------------------
12
+
13
+ def _get_openai_client(api_key: str):
14
+ from openai import OpenAI # imported here to avoid issues if lib missing
15
+ return OpenAI(api_key=api_key)
16
+
17
+
18
+ def _configure_google(api_key: str):
19
+ import google.generativeai as genai
20
+ genai.configure(api_key=api_key)
21
+ return genai
22
+
23
+
24
+ # -----------------------
25
+ # Helper: apply visual presets
26
+ # -----------------------
27
+
28
+ def apply_preset_to_prompt(
29
+ base_prompt: str,
30
+ preset: str,
31
+ style: str,
32
+ content_type: str,
33
+ ) -> str:
34
+ """Augment the prompt with preset & style language."""
35
+ base_prompt = base_prompt.strip()
36
+
37
+ preset_addons = {
38
+ "None": "",
39
+ "ZEN Glass Dashboard": (
40
+ " ultra-detailed UI, glassmorphism, prismatic alloy panels, "
41
+ "neon cyan and magenta HUD overlays, high-end enterprise dashboard"
42
+ ),
43
+ "Palantir / Anduril Infographic": (
44
+ " dark enterprise command-center aesthetic, clean vector infographics, "
45
+ "military-grade analytics overlays, sharp typography, high contrast, "
46
+ "minimal but dense information layout"
47
+ ),
48
+ "Youth AI Literacy Poster": (
49
+ " vibrant educational poster for teens, clean icons, diverse students, "
50
+ "friendly but serious tone, clear typography, classroom-ready layout"
51
+ ),
52
+ "ZEN AI Arena Card": (
53
+ " holographic trading card style, quantum glass edges, subtle glow, "
54
+ "sharp logo lockup, futuristic typography, dramatic lighting"
55
+ ),
56
+ "Blueprint / Systems Diagram": (
57
+ " technical blueprint, white lines on deep navy background, callout labels, "
58
+ "flow arrows, system nodes, engineering drawing style"
59
+ ),
60
+ }
61
+
62
+ style_addons = {
63
+ "Default": "",
64
+ "Photoreal": " hyper-realistic photography, physically based lighting",
65
+ "Illustration": " clean vector illustration style, flat colors, crisp lines",
66
+ "Futuristic UI": " futuristic interface design, HUD, holographic widgets",
67
+ "Blueprint": " blueprint drawing, schematic lines, engineering grid",
68
+ "Cinematic": " cinematic lighting, dramatic composition, filmic contrast",
69
+ }
70
+
71
+ ct_addon = ""
72
+ if content_type == "Image":
73
+ ct_addon = " high-resolution concept art,"
74
+ elif content_type == "Infographic Spec":
75
+ ct_addon = (
76
+ " detailed infographic design specification, including layout regions, "
77
+ "sections, labels, and visual hierarchy,"
78
+ )
79
+
80
+ extra = " ".join(
81
+ x
82
+ for x in [
83
+ ct_addon,
84
+ preset_addons.get(preset, ""),
85
+ style_addons.get(style, ""),
86
+ ]
87
+ if x
88
+ )
89
+
90
+ if extra:
91
+ if base_prompt:
92
+ return f"{base_prompt}, {extra}"
93
+ else:
94
+ return extra.strip()
95
+ return base_prompt or "high quality image"
96
+
97
+
98
+ # -----------------------
99
+ # OpenAI: Text + Image
100
+ # -----------------------
101
+
102
+ def generate_text_openai(
103
+ api_key: str,
104
+ prompt: str,
105
+ mode: str,
106
+ ) -> str:
107
+ client = _get_openai_client(api_key)
108
+ system_msg = (
109
+ "You are an expert creator for the ZEN AI ecosystem. "
110
+ "Write clear, concise, high-leverage content. "
111
+ "If mode is 'Infographic Spec', output a structured outline with sections, "
112
+ "titles, short captions, and suggested visual elements."
113
+ )
114
+
115
+ if mode == "Infographic Spec":
116
+ user_prompt = (
117
+ f"Create a Palantir/Anduril-level infographic specification based on:\n\n{prompt}\n\n"
118
+ "Return:\n"
119
+ "1) Title options\n"
120
+ "2) 3–5 main sections\n"
121
+ "3) Bullet points for each section\n"
122
+ "4) Suggested charts/visuals\n"
123
+ "5) Color and typography recommendations."
124
+ )
125
+ else:
126
+ user_prompt = prompt
127
+
128
+ # Using Chat Completions interface
129
+ resp = client.chat.completions.create(
130
+ model="gpt-4.1-mini",
131
+ messages=[
132
+ {"role": "system", "content": system_msg},
133
+ {"role": "user", "content": user_prompt},
134
+ ],
135
+ temperature=0.7,
136
+ )
137
+ return resp.choices[0].message.content
138
+
139
+
140
+ def decode_b64_images(b64_list: List[str]) -> List[Image.Image]:
141
+ images: List[Image.Image] = []
142
+ for b64 in b64_list:
143
+ raw = base64.b64decode(b64)
144
+ img = Image.open(io.BytesIO(raw)).convert("RGB")
145
+ images.append(img)
146
+ return images
147
+
148
+
149
+ def generate_image_openai(
150
+ api_key: str,
151
+ model: str,
152
+ prompt: str,
153
+ size: str,
154
+ quality: str,
155
+ n_images: int,
156
+ seed: Optional[int],
157
+ ) -> List[Image.Image]:
158
+ client = _get_openai_client(api_key)
159
+
160
+ # Map size choices to OpenAI-supported ones
161
+ size_map = {
162
+ "Square (1024x1024)": "1024x1024",
163
+ "Portrait (1024x1792)": "1024x1792",
164
+ "Landscape (1792x1024)": "1792x1024",
165
+ }
166
+ size_param = size_map.get(size, "1024x1024")
167
+
168
+ kwargs = {
169
+ "model": model,
170
+ "prompt": prompt,
171
+ "size": size_param,
172
+ "quality": quality,
173
+ "n": n_images,
174
+ }
175
+ # seed is optional on some models; safe to include conditionally
176
+ if seed is not None:
177
+ kwargs["seed"] = seed
178
+
179
+ resp = client.images.generate(**kwargs)
180
+ b64_list = [d.b64_json for d in resp.data]
181
+ return decode_b64_images(b64_list)
182
+
183
+
184
+ # -----------------------
185
+ # Google (Gemini / Nano-Banana)
186
+ # -----------------------
187
+
188
+ def generate_text_google(
189
+ api_key: str,
190
+ prompt: str,
191
+ mode: str,
192
+ ) -> str:
193
+ genai = _configure_google(api_key)
194
+ # Default to a strong text model
195
+ model = genai.GenerativeModel("gemini-1.5-pro")
196
+
197
+ if mode == "Infographic Spec":
198
+ content = (
199
+ "You are an expert enterprise communicator. "
200
+ "Create a Palantir/Anduril-grade infographic spec.\n\n"
201
+ f"Topic / prompt:\n{prompt}\n\n"
202
+ "Return:\n"
203
+ "1) Title options\n"
204
+ "2) Main sections with bullet points\n"
205
+ "3) Visual layout ideas\n"
206
+ "4) Chart/visualization suggestions\n"
207
+ "5) Palette & typography notes."
208
+ )
209
+ else:
210
+ content = prompt
211
+
212
+ resp = model.generate_content(content)
213
+ return resp.text
214
+
215
+
216
+ def generate_image_google(
217
+ api_key: str,
218
+ google_image_model: str,
219
+ prompt: str,
220
+ n_images: int,
221
+ seed: Optional[int],
222
+ ) -> List[Image.Image]:
223
+ """
224
+ NOTE: Model & output handling may need adjustment depending on
225
+ the exact Nano-Banana / Nano-Banana-Pro API you use in Google AI Studio.
226
+
227
+ This assumes a GenerativeModel that returns inline image data.
228
+ """
229
+ genai = _configure_google(api_key)
230
+ model = genai.GenerativeModel(google_image_model)
231
+
232
+ images: List[Image.Image] = []
233
+
234
+ for i in range(n_images):
235
+ # Some image models support generation_config with a seed;
236
+ # here we pass it if present.
237
+ generation_config = {}
238
+ if seed is not None:
239
+ generation_config["seed"] = seed + i
240
+
241
+ resp = model.generate_content(
242
+ prompt,
243
+ generation_config=generation_config or None,
244
+ )
245
+
246
+ # Try to pull image bytes from response parts
247
+ for cand in resp.candidates:
248
+ for part in cand.content.parts:
249
+ if hasattr(part, "inline_data") and getattr(part.inline_data, "data", None):
250
+ raw = base64.b64decode(part.inline_data.data)
251
+ img = Image.open(io.BytesIO(raw)).convert("RGB")
252
+ images.append(img)
253
+
254
+ return images
255
+
256
+
257
+ # -----------------------
258
+ # Core Gradio callback
259
+ # -----------------------
260
+
261
+ def run_generation(
262
+ openai_key: str,
263
+ google_key: str,
264
+ task_type: str,
265
+ provider: str,
266
+ base_prompt: str,
267
+ negative_prompt: str,
268
+ preset: str,
269
+ style: str,
270
+ size: str,
271
+ quality: str,
272
+ n_images: int,
273
+ seed: int,
274
+ use_seed: bool,
275
+ google_image_model: str,
276
+ google_text_model_hint: str,
277
+ ) -> Tuple[str, List[Image.Image], str]:
278
+ """
279
+ Returns: (text_output, images, debug_info)
280
+ """
281
+ text_output = ""
282
+ images: List[Image.Image] = []
283
+ debug_lines = []
284
+
285
+ if not base_prompt.strip():
286
+ return "Please enter a prompt.", [], "No prompt provided."
287
+
288
+ # Build full prompt for images
289
+ content_type = "Image" if task_type == "Image" else task_type
290
+ full_prompt = apply_preset_to_prompt(
291
+ base_prompt=base_prompt,
292
+ preset=preset,
293
+ style=style,
294
+ content_type=content_type,
295
+ )
296
+
297
+ if negative_prompt.strip():
298
+ full_prompt += f". Avoid: {negative_prompt.strip()}"
299
+
300
+ debug_lines.append(f"Task: {task_type}")
301
+ debug_lines.append(f"Provider: {provider}")
302
+ debug_lines.append(f"Preset: {preset}, Style: {style}")
303
+ debug_lines.append(f"OpenAI model size: {size}, quality: {quality}")
304
+ debug_lines.append(f"Google image model: {google_image_model}")
305
+ debug_lines.append(f"Google text model hint: {google_text_model_hint}")
306
+ debug_lines.append(f"Seed enabled: {use_seed}, seed: {seed if use_seed else 'None'}")
307
+
308
+ seed_val: Optional[int] = seed if use_seed else None
309
+
310
+ try:
311
+ # TEXT or INFOGRAPHIC
312
+ if task_type in ["Text", "Infographic Spec"]:
313
+ if provider == "OpenAI":
314
+ if not openai_key.strip():
315
+ return "Missing OpenAI API key.", [], "OpenAI key not provided."
316
+ text_output = generate_text_openai(
317
+ api_key=openai_key.strip(),
318
+ prompt=full_prompt,
319
+ mode=task_type,
320
+ )
321
+ else:
322
+ if not google_key.strip():
323
+ return "Missing Google API key.", [], "Google key not provided."
324
+ text_output = generate_text_google(
325
+ api_key=google_key.strip(),
326
+ prompt=full_prompt,
327
+ mode=task_type,
328
+ )
329
+
330
+ # IMAGE
331
+ if task_type == "Image":
332
+ if provider == "OpenAI":
333
+ if not openai_key.strip():
334
+ return "Missing OpenAI API key.", [], "OpenAI key not provided."
335
+ # Decide OpenAI image model based on preset selection or UI (we can infer)
336
+ # We'll expose choice via size/style; model stays fixed to user-facing dropdown externally
337
+ # but here we assume they want GPT-Image-1 by default
338
+ image_model = "gpt-image-1"
339
+ # For Palantir/Anduril preset, sometimes DALL·E 3 is good – user can switch later by editing code.
340
+ if "Palantir" in preset:
341
+ image_model = "dall-e-3"
342
+
343
+ images = generate_image_openai(
344
+ api_key=openai_key.strip(),
345
+ model=image_model,
346
+ prompt=full_prompt,
347
+ size=size,
348
+ quality=quality,
349
+ n_images=n_images,
350
+ seed=seed_val,
351
+ )
352
+ debug_lines.append(f"OpenAI image model: {image_model}")
353
+ else:
354
+ if not google_key.strip():
355
+ return "Missing Google API key.", [], "Google key not provided."
356
+ images = generate_image_google(
357
+ api_key=google_key.strip(),
358
+ google_image_model=google_image_model.strip(),
359
+ prompt=full_prompt,
360
+ n_images=n_images,
361
+ seed=seed_val,
362
+ )
363
+
364
+ if not text_output and task_type == "Image":
365
+ text_output = (
366
+ "Image(s) generated successfully. Add 'Text' or 'Infographic Spec' mode "
367
+ "to generate descriptive copy or specs."
368
+ )
369
+
370
+ if not images and task_type == "Image":
371
+ debug_lines.append("No images returned from provider.")
372
+
373
+ return text_output, images, "\n".join(debug_lines)
374
+
375
+ except Exception as e:
376
+ return f"Error: {e}", [], "\n".join(debug_lines + [f"Exception: {e}"])
377
+
378
+
379
+ # -----------------------
380
+ # UI
381
+ # -----------------------
382
+
383
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
384
+ gr.Markdown(
385
+ """
386
+ # 🧬 ZEN Omni Studio — Text • Images • Infographics
387
+
388
+ A unified creator for the ZEN ecosystem.
389
+
390
+ - 🔑 **Bring your own keys** for OpenAI + Google (Gemini / Nano-Banana).
391
+ - 🧠 Generate **text** and **infographic specs** for ZEN, AI literacy, dashboards, and more.
392
+ - 🎨 Generate **high-quality images** with advanced controls and presets.
393
+ """
394
+ )
395
+
396
+ with gr.Row():
397
+ with gr.Column():
398
+ gr.Markdown("### 🔐 API Keys (kept only in your browser session)")
399
+ openai_key = gr.Textbox(
400
+ label="OPENAI_API_KEY",
401
+ type="password",
402
+ placeholder="sk-...",
403
+ )
404
+ google_key = gr.Textbox(
405
+ label="GOOGLE_API_KEY (Gemini / Nano-Banana)",
406
+ type="password",
407
+ placeholder="AIza...",
408
+ )
409
+
410
+ gr.Markdown("### 🎯 Task & Provider")
411
+ task_type = gr.Radio(
412
+ ["Image", "Text", "Infographic Spec"],
413
+ value="Image",
414
+ label="Task Type",
415
+ )
416
+ provider = gr.Radio(
417
+ ["Google (Nano-Banana / Gemini)", "OpenAI"],
418
+ value="Google (Nano-Banana / Gemini)",
419
+ label="Primary Provider",
420
+ )
421
+
422
+ # Prompt region
423
+ base_prompt = gr.Textbox(
424
+ label="Main Prompt",
425
+ lines=5,
426
+ placeholder="Describe what you want to create for ZEN (image, copy, infographic, etc.)",
427
+ )
428
+ negative_prompt = gr.Textbox(
429
+ label="Negative Prompt (optional)",
430
+ lines=2,
431
+ placeholder="Things to avoid: low-res, cluttered, distorted text, etc.",
432
+ )
433
+
434
+ with gr.Row():
435
+ preset = gr.Dropdown(
436
+ [
437
+ "None",
438
+ "ZEN Glass Dashboard",
439
+ "Palantir / Anduril Infographic",
440
+ "Youth AI Literacy Poster",
441
+ "ZEN AI Arena Card",
442
+ "Blueprint / Systems Diagram",
443
+ ],
444
+ value="ZEN Glass Dashboard",
445
+ label="Visual Preset",
446
+ )
447
+ style = gr.Dropdown(
448
+ [
449
+ "Default",
450
+ "Photoreal",
451
+ "Illustration",
452
+ "Futuristic UI",
453
+ "Blueprint",
454
+ "Cinematic",
455
+ ],
456
+ value="Futuristic UI",
457
+ label="Style Accent",
458
+ )
459
+
460
+ # OpenAI image options
461
+ gr.Markdown("### 🎛 OpenAI Image Controls (DALL·E / GPT-Image)")
462
+ with gr.Row():
463
+ size = gr.Dropdown(
464
+ [
465
+ "Square (1024x1024)",
466
+ "Portrait (1024x1792)",
467
+ "Landscape (1792x1024)",
468
+ ],
469
+ value="Square (1024x1024)",
470
+ label="Aspect Ratio / Size",
471
+ )
472
+ quality = gr.Dropdown(
473
+ ["standard", "hd"],
474
+ value="hd",
475
+ label="Quality",
476
+ )
477
+ n_images = gr.Slider(
478
+ minimum=1,
479
+ maximum=4,
480
+ value=1,
481
+ step=1,
482
+ label="Number of Images",
483
+ )
484
+
485
+ with gr.Row():
486
+ use_seed = gr.Checkbox(
487
+ value=False,
488
+ label="Lock Seed (repeatable outputs)",
489
+ )
490
+ seed = gr.Slider(
491
+ minimum=1,
492
+ maximum=2**31 - 1,
493
+ value=12345,
494
+ step=1,
495
+ label="Seed",
496
+ )
497
+
498
+ gr.Markdown("### 🧪 Google Image / Text Model Hints")
499
+ google_image_model = gr.Textbox(
500
+ label="Google Image Model (default: Nano-Banana-Pro)",
501
+ value="nano-banana-pro",
502
+ placeholder="e.g. nano-banana-pro (adjust to your actual model id in Google AI Studio)",
503
+ )
504
+ google_text_model_hint = gr.Textbox(
505
+ label="Google Text Model Hint (for future tweaking)",
506
+ value="gemini-1.5-pro",
507
+ placeholder="Used in code as default Gemini text model.",
508
+ )
509
+
510
+ generate_btn = gr.Button("🚀 Generate", variant="primary")
511
+
512
+ with gr.Column():
513
+ gr.Markdown("### 📜 Text / Spec Output")
514
+ text_output = gr.Markdown()
515
+
516
+ gr.Markdown("### 🖼 Image Output")
517
+ image_gallery = gr.Gallery(
518
+ label="Generated Images",
519
+ show_label=False,
520
+ columns=2,
521
+ height=500,
522
+ )
523
+
524
+ gr.Markdown("### 🧾 Debug / Logs (for you, not end users)")
525
+ debug_output = gr.Textbox(
526
+ label="Debug Info",
527
+ lines=10,
528
+ )
529
+
530
+ # Wire up callback
531
+ generate_btn.click(
532
+ fn=run_generation,
533
+ inputs=[
534
+ openai_key,
535
+ google_key,
536
+ task_type,
537
+ provider,
538
+ base_prompt,
539
+ negative_prompt,
540
+ preset,
541
+ style,
542
+ size,
543
+ quality,
544
+ n_images,
545
+ seed,
546
+ use_seed,
547
+ google_image_model,
548
+ google_text_model_hint,
549
+ ],
550
+ outputs=[text_output, image_gallery, debug_output],
551
+ )
552
+
553
+ if __name__ == "__main__":
554
+ demo.launch()