import base64 import io import os from typing import List, Tuple, Optional import gradio as gr from PIL import Image # ----------------------- # Optional imports guarded at call-time # ----------------------- def _get_openai_client(api_key: str): from openai import OpenAI # imported here to avoid issues if lib missing return OpenAI(api_key=api_key) def _configure_google(api_key: str): import google.generativeai as genai genai.configure(api_key=api_key) return genai # ----------------------- # Helper: apply visual presets # ----------------------- def apply_preset_to_prompt( base_prompt: str, preset: str, style: str, content_type: str, ) -> str: """Augment the prompt with preset & style language.""" base_prompt = base_prompt.strip() preset_addons = { "None": "", "ZEN Glass Dashboard": ( " ultra-detailed UI, glassmorphism, prismatic alloy panels, " "neon cyan and magenta HUD overlays, high-end enterprise dashboard" ), "Palantir / Anduril Infographic": ( " dark enterprise command-center aesthetic, clean vector infographics, " "military-grade analytics overlays, sharp typography, high contrast, " "minimal but dense information layout" ), "Youth AI Literacy Poster": ( " vibrant educational poster for teens, clean icons, diverse students, " "friendly but serious tone, clear typography, classroom-ready layout" ), "ZEN AI Arena Card": ( " holographic trading card style, quantum glass edges, subtle glow, " "sharp logo lockup, futuristic typography, dramatic lighting" ), "Blueprint / Systems Diagram": ( " technical blueprint, white lines on deep navy background, callout labels, " "flow arrows, system nodes, engineering drawing style" ), } style_addons = { "Default": "", "Photoreal": " hyper-realistic photography, physically based lighting", "Illustration": " clean vector illustration style, flat colors, crisp lines", "Futuristic UI": " futuristic interface design, HUD, holographic widgets", "Blueprint": " blueprint drawing, schematic lines, engineering grid", "Cinematic": " cinematic lighting, dramatic composition, filmic contrast", } ct_addon = "" if content_type == "Image": ct_addon = " high-resolution concept art," elif content_type == "Infographic Spec": ct_addon = ( " detailed infographic design specification, including layout regions, " "sections, labels, and visual hierarchy," ) extra = " ".join( x for x in [ ct_addon, preset_addons.get(preset, ""), style_addons.get(style, ""), ] if x ) if extra: if base_prompt: return f"{base_prompt}, {extra}" else: return extra.strip() return base_prompt or "high quality image" # ----------------------- # OpenAI: Text + Image # ----------------------- def generate_text_openai( api_key: str, prompt: str, mode: str, ) -> str: client = _get_openai_client(api_key) system_msg = ( "You are an expert creator for the ZEN AI ecosystem. " "Write clear, concise, high-leverage content. " "If mode is 'Infographic Spec', output a structured outline with sections, " "titles, short captions, and suggested visual elements." ) if mode == "Infographic Spec": user_prompt = ( f"Create a Palantir/Anduril-level infographic specification based on:\n\n{prompt}\n\n" "Return:\n" "1) Title options\n" "2) 3–5 main sections\n" "3) Bullet points for each section\n" "4) Suggested charts/visuals\n" "5) Color and typography recommendations." ) else: user_prompt = prompt # Using Chat Completions interface resp = client.chat.completions.create( model="gpt-4.1-mini", messages=[ {"role": "system", "content": system_msg}, {"role": "user", "content": user_prompt}, ], temperature=0.7, ) return resp.choices[0].message.content def decode_b64_images(b64_list: List[str]) -> List[Image.Image]: images: List[Image.Image] = [] for b64 in b64_list: raw = base64.b64decode(b64) img = Image.open(io.BytesIO(raw)).convert("RGB") images.append(img) return images def generate_image_openai( api_key: str, model: str, prompt: str, size: str, quality: str, n_images: int, seed: Optional[int], ) -> List[Image.Image]: client = _get_openai_client(api_key) # Map size choices to OpenAI-supported ones size_map = { "Square (1024x1024)": "1024x1024", "Portrait (1024x1792)": "1024x1792", "Landscape (1792x1024)": "1792x1024", } size_param = size_map.get(size, "1024x1024") kwargs = { "model": model, "prompt": prompt, "size": size_param, "quality": quality, "n": n_images, } # seed is optional on some models; safe to include conditionally if seed is not None: kwargs["seed"] = seed resp = client.images.generate(**kwargs) b64_list = [d.b64_json for d in resp.data] return decode_b64_images(b64_list) # ----------------------- # Google (Gemini / Nano-Banana) # ----------------------- def generate_text_google( api_key: str, prompt: str, mode: str, ) -> str: genai = _configure_google(api_key) # Default to a strong text model model = genai.GenerativeModel("gemini-1.5-pro") if mode == "Infographic Spec": content = ( "You are an expert enterprise communicator. " "Create a Palantir/Anduril-grade infographic spec.\n\n" f"Topic / prompt:\n{prompt}\n\n" "Return:\n" "1) Title options\n" "2) Main sections with bullet points\n" "3) Visual layout ideas\n" "4) Chart/visualization suggestions\n" "5) Palette & typography notes." ) else: content = prompt resp = model.generate_content(content) return resp.text def generate_image_google( api_key: str, google_image_model: str, prompt: str, n_images: int, seed: Optional[int], ) -> List[Image.Image]: """ NOTE: Model & output handling may need adjustment depending on the exact Nano-Banana / Nano-Banana-Pro API you use in Google AI Studio. This assumes a GenerativeModel that returns inline image data. """ genai = _configure_google(api_key) model = genai.GenerativeModel(google_image_model) images: List[Image.Image] = [] for i in range(n_images): # Some image models support generation_config with a seed; # here we pass it if present. generation_config = {} if seed is not None: generation_config["seed"] = seed + i resp = model.generate_content( prompt, generation_config=generation_config or None, ) # Try to pull image bytes from response parts for cand in resp.candidates: for part in cand.content.parts: if hasattr(part, "inline_data") and getattr(part.inline_data, "data", None): raw = base64.b64decode(part.inline_data.data) img = Image.open(io.BytesIO(raw)).convert("RGB") images.append(img) return images # ----------------------- # Core Gradio callback # ----------------------- def run_generation( openai_key: str, google_key: str, task_type: str, provider: str, base_prompt: str, negative_prompt: str, preset: str, style: str, size: str, quality: str, n_images: int, seed: int, use_seed: bool, google_image_model: str, google_text_model_hint: str, ) -> Tuple[str, List[Image.Image], str]: """ Returns: (text_output, images, debug_info) """ text_output = "" images: List[Image.Image] = [] debug_lines = [] if not base_prompt.strip(): return "Please enter a prompt.", [], "No prompt provided." # Build full prompt for images content_type = "Image" if task_type == "Image" else task_type full_prompt = apply_preset_to_prompt( base_prompt=base_prompt, preset=preset, style=style, content_type=content_type, ) if negative_prompt.strip(): full_prompt += f". Avoid: {negative_prompt.strip()}" debug_lines.append(f"Task: {task_type}") debug_lines.append(f"Provider: {provider}") debug_lines.append(f"Preset: {preset}, Style: {style}") debug_lines.append(f"OpenAI model size: {size}, quality: {quality}") debug_lines.append(f"Google image model: {google_image_model}") debug_lines.append(f"Google text model hint: {google_text_model_hint}") debug_lines.append(f"Seed enabled: {use_seed}, seed: {seed if use_seed else 'None'}") seed_val: Optional[int] = seed if use_seed else None try: # TEXT or INFOGRAPHIC if task_type in ["Text", "Infographic Spec"]: if provider == "OpenAI": if not openai_key.strip(): return "Missing OpenAI API key.", [], "OpenAI key not provided." text_output = generate_text_openai( api_key=openai_key.strip(), prompt=full_prompt, mode=task_type, ) else: if not google_key.strip(): return "Missing Google API key.", [], "Google key not provided." text_output = generate_text_google( api_key=google_key.strip(), prompt=full_prompt, mode=task_type, ) # IMAGE if task_type == "Image": if provider == "OpenAI": if not openai_key.strip(): return "Missing OpenAI API key.", [], "OpenAI key not provided." # Decide OpenAI image model based on preset selection or UI (we can infer) # We'll expose choice via size/style; model stays fixed to user-facing dropdown externally # but here we assume they want GPT-Image-1 by default image_model = "gpt-image-1" # For Palantir/Anduril preset, sometimes DALLΒ·E 3 is good – user can switch later by editing code. if "Palantir" in preset: image_model = "dall-e-3" images = generate_image_openai( api_key=openai_key.strip(), model=image_model, prompt=full_prompt, size=size, quality=quality, n_images=n_images, seed=seed_val, ) debug_lines.append(f"OpenAI image model: {image_model}") else: if not google_key.strip(): return "Missing Google API key.", [], "Google key not provided." images = generate_image_google( api_key=google_key.strip(), google_image_model=google_image_model.strip(), prompt=full_prompt, n_images=n_images, seed=seed_val, ) if not text_output and task_type == "Image": text_output = ( "Image(s) generated successfully. Add 'Text' or 'Infographic Spec' mode " "to generate descriptive copy or specs." ) if not images and task_type == "Image": debug_lines.append("No images returned from provider.") return text_output, images, "\n".join(debug_lines) except Exception as e: return f"Error: {e}", [], "\n".join(debug_lines + [f"Exception: {e}"]) # ----------------------- # UI # ----------------------- with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown( """ # 🧬 ZEN Omni Studio β€” Text β€’ Images β€’ Infographics A unified creator for the ZEN ecosystem. - πŸ”‘ **Bring your own keys** for OpenAI + Google (Gemini / Nano-Banana). - 🧠 Generate **text** and **infographic specs** for ZEN, AI literacy, dashboards, and more. - 🎨 Generate **high-quality images** with advanced controls and presets. """ ) with gr.Row(): with gr.Column(): gr.Markdown("### πŸ” API Keys (kept only in your browser session)") openai_key = gr.Textbox( label="OPENAI_API_KEY", type="password", placeholder="sk-...", ) google_key = gr.Textbox( label="GOOGLE_API_KEY (Gemini / Nano-Banana)", type="password", placeholder="AIza...", ) gr.Markdown("### 🎯 Task & Provider") task_type = gr.Radio( ["Image", "Text", "Infographic Spec"], value="Image", label="Task Type", ) provider = gr.Radio( ["Google (Nano-Banana / Gemini)", "OpenAI"], value="Google (Nano-Banana / Gemini)", label="Primary Provider", ) # Prompt region base_prompt = gr.Textbox( label="Main Prompt", lines=5, placeholder="Describe what you want to create for ZEN (image, copy, infographic, etc.)", ) negative_prompt = gr.Textbox( label="Negative Prompt (optional)", lines=2, placeholder="Things to avoid: low-res, cluttered, distorted text, etc.", ) with gr.Row(): preset = gr.Dropdown( [ "None", "ZEN Glass Dashboard", "Palantir / Anduril Infographic", "Youth AI Literacy Poster", "ZEN AI Arena Card", "Blueprint / Systems Diagram", ], value="ZEN Glass Dashboard", label="Visual Preset", ) style = gr.Dropdown( [ "Default", "Photoreal", "Illustration", "Futuristic UI", "Blueprint", "Cinematic", ], value="Futuristic UI", label="Style Accent", ) # OpenAI image options gr.Markdown("### πŸŽ› OpenAI Image Controls (DALLΒ·E / GPT-Image)") with gr.Row(): size = gr.Dropdown( [ "Square (1024x1024)", "Portrait (1024x1792)", "Landscape (1792x1024)", ], value="Square (1024x1024)", label="Aspect Ratio / Size", ) quality = gr.Dropdown( ["standard", "hd"], value="hd", label="Quality", ) n_images = gr.Slider( minimum=1, maximum=4, value=1, step=1, label="Number of Images", ) with gr.Row(): use_seed = gr.Checkbox( value=False, label="Lock Seed (repeatable outputs)", ) seed = gr.Slider( minimum=1, maximum=2**31 - 1, value=12345, step=1, label="Seed", ) gr.Markdown("### πŸ§ͺ Google Image / Text Model Hints") google_image_model = gr.Textbox( label="Google Image Model (default: Nano-Banana-Pro)", value="nano-banana-pro", placeholder="e.g. nano-banana-pro (adjust to your actual model id in Google AI Studio)", ) google_text_model_hint = gr.Textbox( label="Google Text Model Hint (for future tweaking)", value="gemini-1.5-pro", placeholder="Used in code as default Gemini text model.", ) generate_btn = gr.Button("πŸš€ Generate", variant="primary") with gr.Column(): gr.Markdown("### πŸ“œ Text / Spec Output") text_output = gr.Markdown() gr.Markdown("### πŸ–Ό Image Output") image_gallery = gr.Gallery( label="Generated Images", show_label=False, columns=2, height=500, ) gr.Markdown("### 🧾 Debug / Logs (for you, not end users)") debug_output = gr.Textbox( label="Debug Info", lines=10, ) # Wire up callback generate_btn.click( fn=run_generation, inputs=[ openai_key, google_key, task_type, provider, base_prompt, negative_prompt, preset, style, size, quality, n_images, seed, use_seed, google_image_model, google_text_model_hint, ], outputs=[text_output, image_gallery, debug_output], ) if __name__ == "__main__": demo.launch()