From 7210933a15b089e77894aed3e11815797fe81887 Mon Sep 17 00:00:00 2001 From: Guilherme Werner Date: Fri, 6 Oct 2023 19:45:45 -0300 Subject: [PATCH] Initial commit --- .editorconfig | 12 ++++++++++++ README.md | 1 + main.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ setup.sh | 4 ++++ test.rest | 19 +++++++++++++++++++ 5 files changed, 83 insertions(+) create mode 100644 .editorconfig create mode 100644 README.md create mode 100644 main.py create mode 100644 setup.sh create mode 100644 test.rest diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..88df879 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,12 @@ +root = true + +[*] +end_of_line = lf +indent_style = space +indent_size = 4 +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +[*.md] +trim_trailing_whitespace = false diff --git a/README.md b/README.md new file mode 100644 index 0000000..24aa287 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# ImageCaptionAPI diff --git a/main.py b/main.py new file mode 100644 index 0000000..d11fec5 --- /dev/null +++ b/main.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 + +from flask import Flask, request, jsonify +import requests +import torch +from PIL import Image +from transformers import * +from tqdm import tqdm +import urllib.parse as parse +import os + +app = Flask(__name__) + +device = "cuda" if torch.cuda.is_available() else "cpu" +print(device) + +# Carregar o modelo, tokenizer e processador de imagem +finetuned_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning").to(device) +finetuned_tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning") +finetuned_image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning") + +# Função para carregar uma imagem +def load_image(image_path): + return Image.open(requests.get(image_path, stream=True).raw) + +# Função para obter a legenda de uma imagem +def get_caption(model, image_processor, tokenizer, image_path): + image = load_image(image_path) + img = image_processor(image, return_tensors="pt").to(device) + output = model.generate(**img) + caption = tokenizer.batch_decode(output, skip_special_tokens=True)[0] + return caption + +# Rota da api para obter a caption da imagem +@app.route('/caption', methods=['POST']) +def caption_image(): + data = request.get_json() + if 'image_url' in data: + image_url = data['image_url'] + caption = get_caption(finetuned_model, finetuned_image_processor, finetuned_tokenizer, image_url) + response = {"caption": caption} + return jsonify(response) + else: + return jsonify({"error": "Missing 'image_url'"}), 400 + +if __name__ == '__main__': + app.run(debug=True) diff --git a/setup.sh b/setup.sh new file mode 100644 index 0000000..0e786c7 --- /dev/null +++ b/setup.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env sh + +pip3 install transformers rouge_score evaluate datasets flask +pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 diff --git a/test.rest b/test.rest new file mode 100644 index 0000000..5cdf89a --- /dev/null +++ b/test.rest @@ -0,0 +1,19 @@ +### + +POST http://localhost:5000/caption +Content-Type: application/json + +{ + "image_url": "http://images.cocodataset.org/test-stuff2017/000000009384.jpg" +} + +### + +POST http://localhost:5000/caption +Content-Type: application/json + +{ + "image_url": "https://static.todamateria.com.br/upload/ur/so/ursopolarreproducao-cke.jpg?auto_optimize=low" +} + +###