commit 7210933a15b089e77894aed3e11815797fe81887 Author: Guilherme Werner Date: Fri Oct 6 19:45:45 2023 -0300 Initial commit diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..88df879 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,12 @@ +root = true + +[*] +end_of_line = lf +indent_style = space +indent_size = 4 +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +[*.md] +trim_trailing_whitespace = false diff --git a/README.md b/README.md new file mode 100644 index 0000000..24aa287 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# ImageCaptionAPI diff --git a/main.py b/main.py new file mode 100644 index 0000000..d11fec5 --- /dev/null +++ b/main.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 + +from flask import Flask, request, jsonify +import requests +import torch +from PIL import Image +from transformers import * +from tqdm import tqdm +import urllib.parse as parse +import os + +app = Flask(__name__) + +device = "cuda" if torch.cuda.is_available() else "cpu" +print(device) + +# Carregar o modelo, tokenizer e processador de imagem +finetuned_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning").to(device) +finetuned_tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning") +finetuned_image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning") + +# Função para carregar uma imagem +def load_image(image_path): + return Image.open(requests.get(image_path, stream=True).raw) + +# Função para obter a legenda de uma imagem +def get_caption(model, image_processor, tokenizer, image_path): + image = load_image(image_path) + img = image_processor(image, return_tensors="pt").to(device) + output = model.generate(**img) + caption = tokenizer.batch_decode(output, skip_special_tokens=True)[0] + return caption + +# Rota da api para obter a caption da imagem +@app.route('/caption', methods=['POST']) +def caption_image(): + data = request.get_json() + if 'image_url' in data: + image_url = data['image_url'] + caption = get_caption(finetuned_model, finetuned_image_processor, finetuned_tokenizer, image_url) + response = {"caption": caption} + return jsonify(response) + else: + return jsonify({"error": "Missing 'image_url'"}), 400 + +if __name__ == '__main__': + app.run(debug=True) diff --git a/setup.sh b/setup.sh new file mode 100644 index 0000000..0e786c7 --- /dev/null +++ b/setup.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env sh + +pip3 install transformers rouge_score evaluate datasets flask +pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 diff --git a/test.rest b/test.rest new file mode 100644 index 0000000..5cdf89a --- /dev/null +++ b/test.rest @@ -0,0 +1,19 @@ +### + +POST http://localhost:5000/caption +Content-Type: application/json + +{ + "image_url": "http://images.cocodataset.org/test-stuff2017/000000009384.jpg" +} + +### + +POST http://localhost:5000/caption +Content-Type: application/json + +{ + "image_url": "https://static.todamateria.com.br/upload/ur/so/ursopolarreproducao-cke.jpg?auto_optimize=low" +} + +###