Merge pull request #98 from roboflow/release/0.9.2

Release/0.9.2
roboflow · Oct 13, 2023 · 22da70a · 22da70a
2 parents 03003cd + e93cc9d
commit 22da70a
Show file tree

Hide file tree

Showing 83 changed files with 7,288 additions and 55 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,34 @@
+inference/landing/*
+!inference/landing/out/
+
+# NodeJS (landing page)
+**/node_modules/
+**/dist
+.git
+npm-debug.log
+.coverage
+.coverage.*
+.env
+.aws
+
+# Python (inference)
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+env
+pip-log.txt
+pip-delete-this-directory.txt
+.tox
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.log
+.git
+.mypy_cache
+.pytest_cache
+.hypothesis
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -45,7 +45,7 @@ jobs:
           PORT=9101 INFERENCE_SERVER_REPO=roboflow-inference-server-cpu DOCKERFILE=Dockerfile.onnx.cpu make start_test_docker
       - name: 🧪 Regression Tests
         run: |
-          PORT=9101 API_KEY=${{ secrets.API_KEY }} asl_instance_segmentation_API_KEY=${{ secrets.ASL_INSTANCE_SEGMENTATION_API_KEY }} asl_poly_instance_seg_API_KEY=${{ secrets.ASL_POLY_INSTANCE_SEG_API_KEY }} bccd_favz3_API_KEY=${{ secrets.BCCD_FAVZ3_API_KEY }} bccd_i4nym_API_KEY=${{ secrets.BCCD_I4NYM_API_KEY }} cats_and_dogs_smnpl_API_KEY=${{ secrets.CATS_AND_DOGS_SMNPL_API_KEY }} coins_xaz9i_API_KEY=${{ secrets.COINS_XAZ9I_API_KEY }} python -m pytest tests/inference
+          PORT=9101 API_KEY=${{ secrets.API_KEY }} asl_instance_segmentation_API_KEY=${{ secrets.ASL_INSTANCE_SEGMENTATION_API_KEY }} asl_poly_instance_seg_API_KEY=${{ secrets.ASL_POLY_INSTANCE_SEG_API_KEY }} bccd_favz3_API_KEY=${{ secrets.BCCD_FAVZ3_API_KEY }} bccd_i4nym_API_KEY=${{ secrets.BCCD_I4NYM_API_KEY }} cats_and_dogs_smnpl_API_KEY=${{ secrets.CATS_AND_DOGS_SMNPL_API_KEY }} coins_xaz9i_API_KEY=${{ secrets.COINS_XAZ9I_API_KEY }} melee_API_KEY=${{ secrets.MELEE_API_KEY }} python -m pytest tests/inference
       - name: 🧪 Unit Tests of clients
         run: |
           pip install -r requirements/requirements.sdk.http.txt

diff --git a/.gitignore b/.gitignore
@@ -139,6 +139,7 @@ profile/
 *.pstats
 *.jpeg
 *.png
+!icon.png
 *.mp4
 *.roboflow.txt
 dev_tools/
@@ -150,3 +151,5 @@ _annotations*
 export_api_keys.sh
 inference_cli/version.py
 inference_sdk/version.py
+
+**/.DS_Store
diff --git a/Makefile b/Makefile
@@ -16,7 +16,7 @@ check_code_quality:
 	flake8 $(check_dirs) --count --max-line-length=88 --exit-zero  --ignore=D --extend-ignore=E203,E501,W503  --statistics
 
 start_test_docker:
-	docker run -d --rm -p $(PORT):$(PORT) -e PORT=$(PORT) --name inference-test roboflow/roboflow-inference-server-cpu:test
+	docker run -d --rm -p $(PORT):$(PORT) -e PORT=$(PORT) -e MAX_BATCH_SIZE=17 --name inference-test roboflow/roboflow-inference-server-cpu:test
 
 create_wheels:
 	rm -f dist/*

diff --git a/inference/core/env.py b/inference/core/env.py
@@ -184,7 +184,11 @@ def required_providers_env(val):
 MAX_ACTIVE_MODELS = int(os.getenv("MAX_ACTIVE_MODELS", 8))
 
 # Maximum batch size, default is 8
-MAX_BATCH_SIZE = int(os.getenv("MAX_BATCH_SIZE", 8))
+MAX_BATCH_SIZE = os.getenv("MAX_BATCH_SIZE", None)
+if MAX_BATCH_SIZE is not None:
+    MAX_BATCH_SIZE = int(MAX_BATCH_SIZE)
+else:
+    MAX_BATCH_SIZE = float("inf")
 
 # Maximum number of candidates, default is 3000
 MAX_CANDIDATES = int(os.getenv("MAX_CANDIDATES", 3000))

diff --git a/inference/core/interfaces/http/http_api.py b/inference/core/interfaces/http/http_api.py
@@ -5,7 +5,8 @@
 import uvicorn
 from fastapi import Body, FastAPI, Path, Query, Request
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse, Response
+from fastapi.responses import FileResponse, JSONResponse, RedirectResponse, Response
+from fastapi.staticfiles import StaticFiles
 from fastapi_cprofile.profiler import CProfileMiddleware
 
 from inference.core import data_models as M
@@ -275,23 +276,13 @@ def load_core_model(
         The GAZE model ID.
         """
 
-        @app.get(
-            "/",
-            response_model=M.ServerVersionInfo,
-            summary="Root",
-            description="Get the server name and version number",
-        )
-        async def root():
-            """Endpoint to get the server name and version number.
+        # @app.get("/")
+        # async def index():
+        #     return FileResponse("./inference/landing/out/index.html")
 
-            Returns:
-                M.ServerVersionInfo: The server version information.
-            """
-            return M.ServerVersionInfo(
-                name="Roboflow Inference Server",
-                version=__version__,
-                uuid=GLOBAL_INFERENCE_SERVER_ID,
-            )
+        # @app.get("/")
+        # async def read_root():
+        #     return RedirectResponse(url="/app")
 
         @app.get(
             "/info",
@@ -868,8 +859,9 @@ async def legacy_infer_from_request(
                             )
                 else:
                     request_model_id = model_id
-
-                self.model_manager.add_model(request_model_id, api_key)
+                self.model_manager.add_model(
+                    request_model_id, api_key, model_id_alias=model_id
+                )
 
                 task_type = self.model_manager.get_task_type(request_model_id)
                 inference_request_type = M.ObjectDetectionInferenceRequest
@@ -952,5 +944,11 @@ async def model_add(dataset_id: str, version_id: str, api_key: str = None):
                     }
                 )
 
+        app.mount(
+            "/",
+            StaticFiles(directory="./inference/landing/out", html=True),
+            name="static",
+        )
+
     def run(self):
         uvicorn.run(self.app, host="127.0.0.1", port=8080)
diff --git a/inference/core/managers/base.py b/inference/core/managers/base.py
@@ -1,6 +1,6 @@
 import time
 from dataclasses import dataclass, field
-from typing import Dict, List, Tuple
+from typing import Dict, List, Optional, Tuple
 
 import numpy as np
 from fastapi.encoders import jsonable_encoder
@@ -36,7 +36,9 @@ def init_pingback(self):
             self.pingback = PingbackInfo(self)
             self.pingback.start()
 
-    def add_model(self, model_id: str, api_key: str) -> None:
+    def add_model(
+        self, model_id: str, api_key: str, model_id_alias: Optional[str] = None
+    ) -> None:
         """Adds a new model to the manager.
 
         Args:
@@ -45,7 +47,9 @@ def add_model(self, model_id: str, api_key: str) -> None:
         """
         if model_id in self._models:
             return
-        model = self.model_registry.get_model(model_id, api_key)(
+        model = self.model_registry.get_model(
+            model_id if model_id_alias is None else model_id_alias, api_key
+        )(
             model_id=model_id,
             api_key=api_key,
         )

diff --git a/inference/core/managers/decorators/base.py b/inference/core/managers/decorators/base.py
@@ -1,8 +1,10 @@
+from typing import Optional
+
 from inference.core.data_models import InferenceRequest, InferenceResponse
 from inference.core.managers.base import Model, ModelManager
 
 
-class ModelManagerDecorator(ModelManager):
+class ModelManagerDecorator:
     """Basic decorator, it acts like a `ModelManager` and contains a `ModelManager`.
 
     Args:
@@ -26,14 +28,18 @@ def __init__(self, model_manager: ModelManager):
         """Initializes the decorator with an instance of a ModelManager."""
         self.model_manager = model_manager
 
-    def add_model(self, model_id: str, api_key: str):
+    def add_model(
+        self, model_id: str, api_key: str, model_id_alias: Optional[str] = None
+    ):
         """Adds a model to the manager.
 
         Args:
             model_id (str): The identifier of the model.
             model (Model): The model instance.
         """
-        self.model_manager.add_model(model_id, api_key)
+        if model_id in self:
+            return
+        self.model_manager.add_model(model_id, api_key, model_id_alias=model_id_alias)
 
     def infer_from_request(
         self, model_id: str, request: InferenceRequest

diff --git a/inference/core/managers/decorators/fixed_size_cache.py b/inference/core/managers/decorators/fixed_size_cache.py
@@ -1,5 +1,5 @@
 from collections import deque
-from typing import List
+from typing import List, Optional
 
 from inference.core.data_models import InferenceRequest, InferenceResponse
 from inference.core.managers.base import Model, ModelManager
@@ -19,20 +19,34 @@ def __init__(self, model_manager: ModelManager, max_size: int = 8):
         self.max_size = max_size
         self._key_queue = deque(self.model_manager.keys())
 
-    def add_model(self, model_id: str, api_key: str):
+    def add_model(
+        self, model_id: str, api_key: str, model_id_alias: Optional[str] = None
+    ):
         """Adds a model to the manager and evicts the least recently used if the cache is full.
 
         Args:
             model_id (str): The identifier of the model.
             model (Model): The model instance.
         """
+        if model_id in self:
+            self._key_queue.remove(model_id)
+            self._key_queue.append(model_id)
+            return
+
         should_pop = len(self) == self.max_size
         if should_pop:
             to_remove_model_id = self._key_queue.popleft()
             self.remove(to_remove_model_id)
 
         self._key_queue.append(model_id)
-        return super().add_model(model_id, api_key)
+        return super().add_model(model_id, api_key, model_id_alias=model_id_alias)
+
+    def remove(self, model_id: str) -> Model:
+        try:
+            self._key_queue.remove(model_id)
+        except ValueError:
+            pass
+        return super().remove(model_id)
 
     def infer_from_request(
         self, model_id: str, request: InferenceRequest

diff --git a/inference/core/managers/decorators/logger.py b/inference/core/managers/decorators/logger.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from inference.core.data_models import InferenceRequest, InferenceResponse
 from inference.core.logger import logger
 from inference.core.managers.base import Model
@@ -7,7 +9,9 @@
 class WithLogger(ModelManagerDecorator):
     """Logger Decorator, it logs what's going on inside the manager."""
 
-    def add_model(self, model_id: str, api_key: str):
+    def add_model(
+        self, model_id: str, api_key: str, model_id_alias: Optional[str] = None
+    ):
         """Adds a model to the manager and logs the action.
 
         Args:
@@ -18,7 +22,7 @@ def add_model(self, model_id: str, api_key: str):
             The result of the add_model method from the superclass.
         """
         logger.info(f"🤖 {model_id} added.")
-        return super().add_model(model_id, api_key)
+        return super().add_model(model_id, api_key, model_id_alias=model_id_alias)
 
     def infer_from_request(
         self, model_id: str, request: InferenceRequest

diff --git a/inference/core/models/object_detection_base.py b/inference/core/models/object_detection_base.py
@@ -8,6 +8,7 @@
     ObjectDetectionPrediction,
 )
 from inference.core.env import FIX_BATCH_SIZE, MAX_BATCH_SIZE
+from inference.core.logger import logger
 from inference.core.models.roboflow import OnnxRoboflowInferenceModel
 from inference.core.models.types import PreprocessReturnMetadata
 from inference.core.nms import w_np_non_max_suppression
@@ -71,6 +72,10 @@ def infer(
             raise ValueError(
                 f"Batching is not enabled for this model, but {batch_size} images were passed in the request"
             )
+        if batch_size > MAX_BATCH_SIZE:
+            raise ValueError(
+                f"Request has {batch_size} images but MAX_BATCH_SIZE is set to {MAX_BATCH_SIZE}"
+            )
         return super().infer(
             image,
             class_agnostic_nms=class_agnostic_nms,
@@ -109,6 +114,9 @@ def make_response(
         if isinstance(img_dims, dict) and "img_dims" in img_dims:
             img_dims = img_dims["img_dims"]
 
+        predictions = predictions[
+            : len(img_dims)
+        ]  # If the batch size was fixed we have empty preds at the end
         responses = [
             ObjectDetectionInferenceResponse(
                 predictions=[
@@ -218,11 +226,22 @@ def preprocess(
         img_in /= 255.0
 
         if self.batching_enabled:
-            batch_padding = (
-                MAX_BATCH_SIZE - img_in.shape[0]
-                if FIX_BATCH_SIZE or fix_batch_size
-                else 0
-            )
+            batch_padding = 0
+            if FIX_BATCH_SIZE or fix_batch_size:
+                if MAX_BATCH_SIZE == float("inf"):
+                    logger.warn(
+                        "Requested fix_batch_size but MAX_BATCH_SIZE is not set. Using dynamic batching."
+                    )
+                    batch_padding = 0
+                else:
+                    batch_padding = MAX_BATCH_SIZE - img_in.shape[0]
+            if batch_padding < 0:
+                raise ValueError(
+                    f"Requested fix_batch_size but passed in {img_in.shape[0]} images "
+                    f"when the model's batch size is {MAX_BATCH_SIZE}\n"
+                    f"Consider turning off fix_batch_size, changing `MAX_BATCH_SIZE` in"
+                    f"your inference server config, or passing at most {MAX_BATCH_SIZE} images at a time"
+                )
             width_remainder = img_in.shape[2] % 32
             height_remainder = img_in.shape[3] % 32
             if width_remainder > 0:
@@ -233,6 +252,7 @@ def preprocess(
                 height_padding = 32 - (img_in.shape[3] % 32)
             else:
                 height_padding = 0
+            print(width_padding, height_padding, batch_padding)
             img_in = np.pad(
                 img_in,
                 ((0, batch_padding), (0, 0), (0, width_padding), (0, height_padding)),

diff --git a/inference/core/utils/image_utils.py b/inference/core/utils/image_utils.py
@@ -127,20 +127,10 @@ def load_image_base64(value: str, cv_imread_flags=cv2.IMREAD_COLOR) -> np.ndarra
     # New routes accept images via json body (str), legacy routes accept bytes which need to be decoded as strings
     if not isinstance(value, str):
         value = value.decode("utf-8")
-
-    try:
-        value = pybase64.b64decode(value)
-        image_np = np.frombuffer(value, np.uint8)
-        return cv2.imdecode(image_np, cv_imread_flags)
-    except Exception as e:
-        # The variable "pattern" isn't defined in the original function. Assuming it exists somewhere in your code.
-        # Sometimes base64 strings that were encoded by a browser are padded with extra characters, so we need to remove them
-        # print traceback
-        traceback.print_exc()
-        value = pattern.sub("", value)
-        value = pybase64.b64decode(value)
-        image_np = np.frombuffer(value, np.uint8)
-        return cv2.imdecode(image_np, cv_imread_flags)
+    value = pattern.sub("", value)
+    value = pybase64.b64decode(value)
+    image_np = np.frombuffer(value, np.uint8)
+    return cv2.imdecode(image_np, cv_imread_flags)
 
 
 def load_image_multipart(value, cv_imread_flags=cv2.IMREAD_COLOR) -> np.ndarray:

diff --git a/inference/core/version.py b/inference/core/version.py
@@ -1,4 +1,4 @@
-__version__ = "0.9.1"
+__version__ = "0.9.2"
 
 
 if __name__ == "__main__":

diff --git a/inference/landing/.eslintrc.json b/inference/landing/.eslintrc.json
@@ -0,0 +1,3 @@
+{
+  "extends": "next/core-web-vitals"
+}