Spaces:
Build error
Build error
| from PIL import Image | |
| from transformers import VisionEncoderDecoderModel , ViTFeatureExtractor , PreTrainedTokenizerFast | |
| import gradio as gr | |
| model = VisionEncoderDecoderModel.from_pretrained("ydshieh/vit-gpt2-coco-en") | |
| vit_feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch32-224-in21k") | |
| tokenizer = PreTrainedTokenizerFast.from_pretrained("distilgpt2") | |
| def caption_images(image): | |
| pixel_values = vit_feature_extractor(images=image,return_tensors="pt").pixel_values | |
| encoder_outputs = model.generate(pixel_values.to('cpu'),num_beams=5) | |
| generated_sentence = tokenizer.batch_decode(encoder_outputs,skip_special_tokens=True) | |
| return (generated_sentence[0].strip()) | |
| inputs = [ | |
| gr.components.Image(type='pil',label='Original Image') | |
| ] | |
| outputs = [ | |
| gr.components.Textbox(label='Caption') | |
| ] | |
| title = "Simple Image captioning Application" | |
| description = "Upload an image to see the caption generated" | |
| example =['messi.jpg'] | |
| gr.Interface( | |
| caption_images, | |
| inputs, | |
| outputs, | |
| title=title, | |
| description = description, | |
| examples = example, | |
| ).launch(debug=True) |