3 *
4 * This application uses Google's Gemini 1.5 Pro AI model to perform object detection
5 * on user-uploaded images and visualize the results by drawing bounding boxes.
6 *
7 * Key features:
8 * - Integrates with Google's Generative AI API
9 * - Processes user-uploaded images for object detection
10 * - Visualizes detected objects with bounding boxes
11 * - Allows customization of detection parameters (grid, contrast)
27<h1>Gemini AI Bounding Box Demo</h1>
28
29<p>This application visualizes object detection results by drawing bounding boxes on images using the <a href="https://ai.google.dev/">Google's Gemini 1.5 Pro AI model</a>.</p>
30
31<p>API keys are only stored in your browser's local storage.</p>
32<p>Images are only sent to Google's Gemini API servers for processing.</p>
33
34<p>Try the following images (click the links to add them):</p>
35<ul>
36 <li><a href="https://animalcafes.com/pix/800-izu-anikin9.jpg" onclick="handleImageUrlClick(event)">Bunny and a capybara</a></li>
37 <li><a href="https://img.itch.zone/aW1nLzEyMTE0NDk5LnBuZw==/315x250%23c/kQQauY.png" onclick="handleImageUrlClick(event)">A grid of Stardew valley foods (slice it up with the bounding boxes)</a></li>
38 <li><a href="https://i.pinimg.com/736x/6b/83/d7/6b83d7cd7009a1267be71ba643170590.jpg" onclick="handleImageUrlClick(event)">Uniqlo shopping store website</a></li>
39 <li><a href="https://media.istockphoto.com/id/1363713248/photo/eclectus-parrot.jpg?s=612x612&w=0&k=20&c=dtLxU9HMWv3Z-Cz4npZgYDe0lGaJcgKGSsOudDlQx3s=" onclick="handleImageUrlClick(event)">Two parrots</a></li>
40</ul>
41
62 }
63
64 // Combined function to fetch and handle images
65 async function fetchImage(url, options = {}) {
66 const { returnType = 'blob' } = options;
67
68 console.log("fetchImage url:", url);
69 if (!url) return null;
70
73 const response = await fetch(url, {
74 mode: 'no-cors',
75 headers: { 'Accept': 'image/*' }
76 });
77
78 console.log("fetchImage response:", response)
79
80 if (response.ok) {
81 const blob = await response.blob();
82 return returnType === 'preview' ? URL.createObjectURL(blob) : new File([blob], 'image.jpg', { type: blob.type });
83 }
84
90
91 if (!proxyResponse.ok) {
92 throw new Error('Failed to fetch image through CORS proxy');
93 }
94
95 const proxyBlob = await proxyResponse.blob();
96 return returnType === 'preview' ? URL.createObjectURL(proxyBlob) : new File([proxyBlob], 'image.jpg', { type: proxyBlob.type });
97
98 } catch (error) {
99 console.error('Error fetching image:', error);
100 throw new Error('Failed to fetch image: ' + error.message);
101 }
102 }
103
104 // Update the image preview function to use the combined fetchImage
105 async function updateImagePreview(url) {
106 const imageContainer = document.getElementById('imageContainer');
107 const canvas = document.getElementById('canvas');
108
109 // Show loading spinner
110 canvas.style.display = 'none';
111 imageContainer.innerHTML = '<div class="w-full h-64 rounded-md mb-4 bg-accent border border-app-accent flex items-center justify-center"><div class="animate-spin rounded-full h-12 w-12 border-4 border-button border-t-transparent"></div></div>';
112
113 try {
114 const objectUrl = await fetchImage(url, { returnType: 'preview' });
115 if (!objectUrl) {
116 throw new Error('Failed to load image');
117 }
118
119 const img = new Image();
120 img.onload = function() {
121 // Remove spinner and restore container
122 imageContainer.innerHTML = '<canvas id="canvas"></canvas>';
123 const canvas = document.getElementById('canvas');
124 canvas.style.display = 'block';
126 canvas.height = img.height;
127 const ctx = canvas.getContext('2d');
128 ctx.drawImage(img, 0, 0);
129 URL.revokeObjectURL(objectUrl);
130 };
131 img.onerror = () => {
132 imageContainer.innerHTML = '<div class="w-full h-64 rounded-md mb-4 bg-red-100 border border-red-300 flex items-center justify-center text-red-700">Failed to load image</div>';
133 URL.revokeObjectURL(objectUrl);
134 };
136 } catch (error) {
137 console.error('Preview error:', error);
138 imageContainer.innerHTML = '<div class="w-full h-64 rounded-md mb-4 bg-red-100 border border-red-300 flex items-center justify-center text-red-700">Error loading image</div>';
139 }
140 }
153 }
154
155 // Debounced version of updateImagePreview
156 const debouncedUpdatePreview = debounce(updateImagePreview, 500);
157
158 // Setup URL input listener
159 window.addEventListener('DOMContentLoaded', () => {
160 const urlInput = document.getElementById('imageUrlInput');
161 urlInput.addEventListener('input', (e) => {
162 debouncedUpdatePreview(e.target.value);
164 });
165
166 function handleImageUrlClick(event) {
167 console.log("IMAGE CLICK")
168 event.preventDefault();
169 const url = event.target.href;
170 document.getElementById('imageUrlInput').value = url;
171 updateImagePreview(url); // Immediate preview for clicks
172 }
173
174 window.handleImageUrlClick = handleImageUrlClick;
175
176 // Retrieves the API key from local storage or prompts the user to enter it
207 }
208
209 // Applies a high contrast filter to the image on a canvas
210 function applyHighContrast(canvas) {
211 const ctx = canvas.getContext('2d');
212 const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
213 const data = imageData.data;
214
215 const darkThreshold = parseInt(document.getElementById('darkThresholdInput').value);
229 }
230
231 ctx.putImageData(imageData, 0, 0);
232 }
233
234 // Resizes and compresses an image file, optionally applying high contrast
235 function resizeAndCompressImage(file, maxWidth = 1000) {
236 return new Promise((resolve) => {
237 const reader = new FileReader();
238 reader.onload = function(event) {
239 const img = new Image();
240 img.onload = function() {
241 const canvas = document.createElement('canvas');
253 canvas.height = height;
254
255 ctx.drawImage(img, 0, 0, width, height);
256
257 // Apply high contrast only if the checkbox is checked
261
262 canvas.toBlob((blob) => {
263 resolve(new File([blob], "processed_image.jpg", { type: "image/jpeg" }));
264 }, 'image/jpeg', 0.7);
265 };
266 img.src = event.target.result;
270 }
271
272 // Splits an image into a grid of smaller tiles
273 function splitImage(file, rows, cols) {
274 return new Promise((resolve) => {
275 const reader = new FileReader();
276 reader.onload = function(event) {
277 const img = new Image();
278 img.onload = function() {
279 const tileWidth = Math.floor(img.width / cols);
288 const ctx = canvas.getContext('2d');
289
290 ctx.drawImage(img, x * tileWidth, y * tileHeight, tileWidth, tileHeight, 0, 0, tileWidth, tileHeight);
291
292 tiles.push(new Promise((resolve) => {
293 canvas.toBlob((blob) => {
294 resolve({
295 file: new File([blob], 'tile_' + y + '_' + x + '.jpg', { type: 'image/jpeg' }),
296 x: x * tileWidth,
297 y: y * tileHeight,
301 originalWidth: img.width // Store original width
302 });
303 }, 'image/jpeg', 0.7);
304 }));
305 }
323 for (let attempt = 1; attempt <= maxRetries; attempt++) {
324 try {
325 const imagePart = await fileToGenerativePart(tile.file);
326 const result = await model.generateContent([prompt, imagePart]);
327 const response = await result.response;
328 const text = response.text();
344 }
345
346 // Main function to process the image and prompt, coordinating the entire detection process
347 async function processImageAndPrompt() {
348 const fileInput = document.getElementById('imageInput');
349 const urlInput = document.getElementById('imageUrlInput');
350 const promptInput = document.getElementById('promptInput');
351 const resultDiv = document.getElementById('result');
358 const promptInputAdd = ' RETURN PROPER JSON ARRAY';
359
360 let imageFile;
361 if (fileInput.files[0]) {
362 imageFile = fileInput.files[0];
363 } else if (urlInput.value) {
364 try {
365 imageFile = await fetchImage(urlInput.value);
366 } catch (error) {
367 alert('Error loading image from URL: ' + error.message);
368 return;
369 }
370 } else {
371 alert('Please select an image file or enter an image URL.');
372 return;
373 }
386 try {
387 const model = await getGenerativeModel({ model: "gemini-1.5-pro" });
388 const processedImage = await resizeAndCompressImage(imageFile, 2000);
389 const rows = parseInt(rowsInput.value);
390 const cols = parseInt(colsInput.value);
391 const delayMs = parseInt(delayInput.value);
392 const tiles = await splitImage(processedImage, rows, cols);
393
394 let allCoordinates = [];
427 const flattenedCoordinates = allCoordinates.reduce((acc, tile) => acc.concat(tile.coordinates), []);
428
429 displayImageWithBoundingBoxes(processedImage, allCoordinates, rows, cols);
430 } catch (error) {
431 resultDiv.innerHTML = 'Error: ' + error.message;
458 }
459
460 // Displays the full image with bounding boxes drawn on a canvas
461 function displayImageWithBoundingBoxes(file, coordinates, rows, cols) {
462 const reader = new FileReader();
463 reader.onload = function(event) {
464 const image = new Image();
465 image.onload = function() {
466 const canvas = document.getElementById('canvas');
467 const ctx = canvas.getContext('2d');
468 canvas.width = image.width;
469 canvas.height = image.height;
470
471 // Clear the canvas before drawing
472 ctx.clearRect(0, 0, canvas.width, canvas.height);
473
474 // Draw the image
475 ctx.drawImage(image, 0, 0);
476
477 const tileWidth = image.width / cols;
478 const tileHeight = image.height / rows;
479 const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF'];
480
481 console.log("displayImageWithBoundingBoxes coord:", coordinates);
482
483 // Draw bounding boxes
509 // ctx.fillText('Total # of Boundaries: ' + totalBoundaries, 10, 30);
510 };
511 image.src = event.target.result;
512 };
513 reader.readAsDataURL(file);
518 const reader = new FileReader();
519 reader.onload = function(event) {
520 const image = new Image();
521 image.onload = function() {
522 const padding = 80;
523 const labelPadding = 40;
533 canvasWrapper.className = 'canvas-wrapper';
534
535 // Create and append the original image canvas
536 const originalCanvas = document.createElement('canvas');
537 originalCanvas.width = tile.width;
538 originalCanvas.height = tile.height;
539 const originalCtx = originalCanvas.getContext('2d');
540 originalCtx.drawImage(image, 0, 0);
541 canvasWrapper.appendChild(originalCanvas);
542
547 const ctx = annotatedCanvas.getContext('2d');
548
549 // Draw the tile image on the annotated canvas
550 ctx.drawImage(image, padding, padding);
551
552 console.log('Tile coordinates:', coordinates);
657 tilesContainer.appendChild(tileContainer);
658 };
659 image.src = event.target.result;
660 };
661 reader.readAsDataURL(tile.file);
662 }
663
664 // Generates a description of the image using the AI model
665 async function getImageDescription() {
666 const fileInput = document.getElementById('imageInput');
667 const urlInput = document.getElementById('imageUrlInput');
668 const descriptionPromptInput = document.getElementById('descriptionPromptInput');
669 const descriptionResultDiv = document.getElementById('descriptionResult');
670 const descriptionBtn = document.getElementById('descriptionBtn');
671
672 let imageFile;
673 if (fileInput.files[0]) {
674 imageFile = fileInput.files[0];
675 } else if (urlInput.value) {
676 try {
677 imageFile = await fetchImage(urlInput.value);
678 } catch (error) {
679 alert('Error loading image from URL: ' + error.message);
680 return;
681 }
682 } else {
683 alert('Please select an image file or enter an image URL.');
684 return;
685 }
697 try {
698 const model = await getGenerativeModel({ model: "gemini-1.5-pro" });
699 const processedImage = await resizeAndCompressImage(imageFile, 2000);
700 const imagePart = await fileToGenerativePart(processedImage);
701
702 const result = await model.generateContent([descriptionPromptInput.value, imagePart]);
703 const response = await result.response;
704 const text = response.text();
716
717 // Attach event listeners
718 document.getElementById('submitBtn').addEventListener('click', processImageAndPrompt);
719 document.getElementById('descriptionBtn').addEventListener('click', getImageDescription);
720</script>
721
730 <meta charset="UTF-8">
731 <meta name="viewport" content="width=device-width, initial-scale=1.0">
732 <link rel="icon" type="image/png" href="https://labspace.ai/ls2-circle.png" />
733 <title>Gemini AI Bounding Box Demo</title>
734 <meta property="og:title" content="Gemini AI Bounding Box Demo" />
735 <meta property="og:description" content="Visualize object detection results using Google's Gemini 1.5 Pro AI model" />
736 <meta property="og:image" content="https://yawnxyz-og.web.val.run/img?link=https://geminibbox.labspace.ai&title=Gemini+AI+Bounding+Box&subtitle=Visualize+object+detection+with+bounding+boxes" />
737 <meta property="og:url" content="https://geminibbox.labspace.ai" />
738 <meta property="og:type" content="website" />
739 <meta name="twitter:card" content="summary_large_image" />
740 <meta name="twitter:title" content="Gemini AI Bounding Box Demo" />
741 <meta name="twitter:description" content="Visualize object detection results using Google's Gemini 1.5 Pro AI model" />
742 <meta name="twitter:image" content="https://yawnxyz-og.web.val.run/img?link=https://geminibbox.labspace.ai&title=Gemini+AI+Bounding+Box&subtitle=Visualize+object+detection+with+bounding+boxes" />
743 <script src="https://cdn.tailwindcss.com"></script>
744 <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
865 <div class="AppIntro break-words">${intro}</div>
866 <div class="mt-4">
867 <label for="imageInput">Upload an image</label>
868 <input type="file" id="imageInput" accept="image/*" onchange="loadImage(event)">
869 </div>
870 <div class="mt-4">
871 <label for="imageUrlInput">Or enter an image URL:</label>
872 <input
873 type="text"
874 id="imageUrlInput"
875 placeholder="Image URL"
876 class="mt-2"
877 >
878 </div>
879 <script>
880 function loadImage(event) {
881 const file = event.target.files[0];
882 const reader = new FileReader();
885 canvas.style.display = 'block';
886 const ctx = canvas.getContext('2d');
887 const img = new Image();
888 img.onload = function() {
889 canvas.width = img.width;
890 canvas.height = img.height;
891 ctx.drawImage(img, 0, 0);
892 }
893 img.src = e.target.result;
897 </script>
898
899 <div id="imageContainer" class="my-4">
900 <canvas id="canvas" style="display: none;"></canvas>
901 </div>
902
903 <div class="my-4 border border-app-secondary rounded-md p-4">
904 <h3 class="text-md font-semibold mb-2">Get an image description</h3>
905 <div class="mt-2">
906 <p>Sometimes getting a description of the image helps you understand what the AI sees. This can help you better set up the prompt and settings.</p>
907 <textarea id="descriptionPromptInput">Describe what you see in this image in detail.</textarea>
908 <button id="descriptionBtn">Get Description</button>
909 <div id="descriptionResult" style="display: none;"></div>
914 <h3 class="text-md font-semibold mb-2">Detect Bounding Boxes</h3>
915 <div class="mt-2">
916 <p>Use this to detect bounding boxes of objects in an image. Use the image description to help with setting up the prompt. Rows and columns determine how many tiles the image will be divided into — for simple images, 1x1 is best.</p>
917 <p>For the free API you might get the "Resource has been exhausted" error if you make too many requests too quickly.</p>
918 <textarea id="promptInput" rows=6>Identify and return bounding boxes of the (MAIN SUBJECT) \n[ymin, xmin, ymax, xmax]</textarea>
991 const response = await fetch(url, {
992 headers: {
993 'Accept': 'image/*',
994 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
995 }