From 7210933a15b089e77894aed3e11815797fe81887 Mon Sep 17 00:00:00 2001
From: Guilherme Werner <guilherme.werner@tribufu.com>
Date: Fri, 6 Oct 2023 19:45:45 -0300
Subject: [PATCH] Initial commit

---
 .editorconfig | 12 ++++++++++++
 README.md     |  1 +
 main.py       | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 setup.sh      |  4 ++++
 test.rest     | 19 +++++++++++++++++++
 5 files changed, 83 insertions(+)
 create mode 100644 .editorconfig
 create mode 100644 README.md
 create mode 100644 main.py
 create mode 100644 setup.sh
 create mode 100644 test.rest

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..88df879
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,12 @@
+root = true
+
+[*]
+end_of_line = lf
+indent_style = space
+indent_size = 4
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+[*.md]
+trim_trailing_whitespace = false
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..24aa287
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+# ImageCaptionAPI
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..d11fec5
--- /dev/null
+++ b/main.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+
+from flask import Flask, request, jsonify
+import requests
+import torch
+from PIL import Image
+from transformers import *
+from tqdm import tqdm
+import urllib.parse as parse
+import os
+
+app = Flask(__name__)
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(device)
+
+# Carregar o modelo, tokenizer e processador de imagem
+finetuned_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning").to(device)
+finetuned_tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+finetuned_image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+
+# Função para carregar uma imagem
+def load_image(image_path):
+    return Image.open(requests.get(image_path, stream=True).raw)
+
+# Função para obter a legenda de uma imagem
+def get_caption(model, image_processor, tokenizer, image_path):
+    image = load_image(image_path)
+    img = image_processor(image, return_tensors="pt").to(device)
+    output = model.generate(**img)
+    caption = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
+    return caption
+
+# Rota da api para obter a caption da imagem
+@app.route('/caption', methods=['POST'])
+def caption_image():
+    data = request.get_json()
+    if 'image_url' in data:
+        image_url = data['image_url']
+        caption = get_caption(finetuned_model, finetuned_image_processor, finetuned_tokenizer, image_url)
+        response = {"caption": caption}
+        return jsonify(response)
+    else:
+        return jsonify({"error": "Missing 'image_url'"}), 400
+
+if __name__ == '__main__':
+    app.run(debug=True)
diff --git a/setup.sh b/setup.sh
new file mode 100644
index 0000000..0e786c7
--- /dev/null
+++ b/setup.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env sh
+
+pip3 install transformers rouge_score evaluate datasets flask
+pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
diff --git a/test.rest b/test.rest
new file mode 100644
index 0000000..5cdf89a
--- /dev/null
+++ b/test.rest
@@ -0,0 +1,19 @@
+###
+
+POST http://localhost:5000/caption
+Content-Type: application/json
+
+{
+    "image_url": "http://images.cocodataset.org/test-stuff2017/000000009384.jpg"
+}
+
+###
+
+POST http://localhost:5000/caption
+Content-Type: application/json
+
+{
+    "image_url": "https://static.todamateria.com.br/upload/ur/so/ursopolarreproducao-cke.jpg?auto_optimize=low"
+}
+
+###