Merge pull request #629 from roboflow/feature/redesign_of_llm_and_vlm

Redesign of VLM and LLM blocks
roboflow · Sep 6, 2024 · 297d420 · 297d420
2 parents f45660b + d9a196d
commit 297d420
Show file tree

Hide file tree

Showing 36 changed files with 6,430 additions and 120 deletions.
diff --git a/development/docs/build_block_docs.py b/development/docs/build_block_docs.py
@@ -361,7 +361,7 @@ def format_block_connections(
     connections = [
         (
             f"[`{block_type2manifest_type_identifier[connection]}`]"
-            f"(/workflows/blocks/{camel_to_snake(block_type2manifest_type_identifier[connection])})"
+            f"(/workflows/blocks/{slugify_block_name(block_type2manifest_type_identifier[connection])})"
         )
         for connection in connections
     ]

diff --git a/docs/workflows/blocks.md b/docs/workflows/blocks.md
@@ -72,6 +72,11 @@ hide:
 <p class="card block-card" data-url="image_threshold" data-name="Image Threshold" data-desc="Apply a threshold to an image." data-labels="CLASSICAL_COMPUTER_VISION, APACHE-2.0" data-author="dummy"></p>
 <p class="card block-card" data-url="image_contours" data-name="Image Contours" data-desc="Find and count the contours on an image." data-labels="CLASSICAL_COMPUTER_VISION, APACHE-2.0" data-author="dummy"></p>
 <p class="card block-card" data-url="camera_focus" data-name="Camera Focus" data-desc="Helps focus a camera by providing a focus measure." data-labels="CLASSICAL_COMPUTER_VISION, APACHE-2.0" data-author="dummy"></p>
+<p class="card block-card" data-url="json_parser" data-name="JSON Parser" data-desc="Parses raw string into JSON." data-labels="FORMATTER, APACHE-2.0" data-author="dummy"></p>
+<p class="card block-card" data-url="vl_mas_classifier" data-name="VLM as Classifier" data-desc="Parses raw string into classification prediction." data-labels="FORMATTER, APACHE-2.0" data-author="dummy"></p>
+<p class="card block-card" data-url="google_gemini" data-name="Google Gemini" data-desc="Run Google's Gemini model with vision capabilities" data-labels="MODEL, APACHE-2.0" data-author="dummy"></p>
+<p class="card block-card" data-url="vl_mas_detector" data-name="VLM as Detector" data-desc="Parses raw string into object-detection prediction." data-labels="FORMATTER, APACHE-2.0" data-author="dummy"></p>
+<p class="card block-card" data-url="anthropic_claude" data-name="Anthropic Claude" data-desc="Run Anthropic Claude model with vision capabilities" data-labels="MODEL, APACHE-2.0" data-author="dummy"></p>
 <!--- AUTOGENERATED_BLOCKS_LIST -->
     </div>
   </div>

diff --git a/docs/workflows/create_workflow_block.md b/docs/workflows/create_workflow_block.md
@@ -1050,7 +1050,7 @@ def run(self, predictions: List[dict]) -> BlockResult:
     )
     from inference.core.workflows.execution_engine.entities.types import (
         StepOutputSelector,
-        BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND,
+        OBJECT_DETECTION_PREDICTION_KIND,
     )
     from inference.core.workflows.prototypes.block import (
         BlockResult,
@@ -1063,7 +1063,7 @@ def run(self, predictions: List[dict]) -> BlockResult:
     class BlockManifest(WorkflowBlockManifest):
         type: Literal["my_plugin/fusion_of_predictions@v1"]
         name: str
-        predictions: List[StepOutputSelector(kind=[BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND])] = Field(
+        predictions: List[StepOutputSelector(kind=[OBJECT_DETECTION_PREDICTION_KIND])] = Field(
             description="Selectors to step outputs",
             examples=[["$steps.model_1.predictions", "$steps.model_2.predictions"]],
         )
@@ -1073,7 +1073,7 @@ def run(self, predictions: List[dict]) -> BlockResult:
             return [
               OutputDefinition(
                 name="predictions", 
-                kind=[BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND],
+                kind=[OBJECT_DETECTION_PREDICTION_KIND],
               )
             ]
 
@@ -1251,8 +1251,8 @@ the method signatures.
             ImageParentMetadata,
         )
         from inference.core.workflows.execution_engine.entities.types import (
-            BATCH_OF_IMAGES_KIND,
-            BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND,
+            IMAGE_KIND,
+            OBJECT_DETECTION_PREDICTION_KIND,
             StepOutputImageSelector,
             StepOutputSelector,
             WorkflowImageSelector,
@@ -1267,7 +1267,7 @@ the method signatures.
             type: Literal["my_block/dynamic_crop@v1"]
             image: Union[WorkflowImageSelector, StepOutputImageSelector]
             predictions: StepOutputSelector(
-                kind=[BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND],
+                kind=[OBJECT_DETECTION_PREDICTION_KIND],
             )
         
             @classmethod
@@ -1277,7 +1277,7 @@ the method signatures.
             @classmethod
             def describe_outputs(cls) -> List[OutputDefinition]:
                 return [
-                    OutputDefinition(name="crops", kind=[BATCH_OF_IMAGES_KIND]),
+                    OutputDefinition(name="crops", kind=[IMAGE_KIND]),
                 ]
         
             @classmethod
@@ -1340,8 +1340,8 @@ the method signatures.
             WorkflowImageData,
         )
         from inference.core.workflows.execution_engine.entities.types import (
-            BATCH_OF_IMAGES_KIND,
-            BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND,
+            IMAGE_KIND,
+            OBJECT_DETECTION_PREDICTION_KIND,
             StepOutputImageSelector,
             StepOutputSelector,
             WorkflowImageSelector,
@@ -1357,7 +1357,7 @@ the method signatures.
             type: Literal["my_plugin/tile_detections@v1"]
             crops: Union[WorkflowImageSelector, StepOutputImageSelector]
             crops_predictions: StepOutputSelector(
-                kind=[BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND]
+                kind=[OBJECT_DETECTION_PREDICTION_KIND]
             )
         
             @classmethod
@@ -1367,7 +1367,7 @@ the method signatures.
             @classmethod
             def describe_outputs(cls) -> List[OutputDefinition]:
                 return [
-                    OutputDefinition(name="visualisations", kind=[BATCH_OF_IMAGES_KIND]),
+                    OutputDefinition(name="visualisations", kind=[IMAGE_KIND]),
                 ]
         
         
@@ -1427,7 +1427,7 @@ the method signatures.
             WorkflowImageData,
         )
         from inference.core.workflows.execution_engine.entities.types import (
-            BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND,
+            OBJECT_DETECTION_PREDICTION_KIND,
             StepOutputImageSelector,
             StepOutputSelector,
             WorkflowImageSelector,
@@ -1443,7 +1443,7 @@ the method signatures.
             type: Literal["my_plugin/stitch@v1"]
             image: Union[WorkflowImageSelector, StepOutputImageSelector]
             image_predictions: StepOutputSelector(
-                kind=[BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND],
+                kind=[OBJECT_DETECTION_PREDICTION_KIND],
             )
         
             @classmethod
@@ -1463,7 +1463,7 @@ the method signatures.
                     OutputDefinition(
                         name="predictions",
                         kind=[
-                            BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND,
+                            OBJECT_DETECTION_PREDICTION_KIND,
                         ],
                     ),
                 ]
@@ -1526,8 +1526,8 @@ the method signatures.
             Batch,
         )
         from inference.core.workflows.execution_engine.entities.types import (
-            BATCH_OF_IMAGES_KIND,
-            BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND,
+            IMAGE_KIND,
+            OBJECT_DETECTION_PREDICTION_KIND,
             StepOutputImageSelector,
             StepOutputSelector,
             WorkflowImageSelector,
@@ -1542,7 +1542,7 @@ the method signatures.
             type: Literal["my_block/dynamic_crop@v1"]
             image: Union[WorkflowImageSelector, StepOutputImageSelector]
             predictions: StepOutputSelector(
-                kind=[BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND],
+                kind=[OBJECT_DETECTION_PREDICTION_KIND],
             )
 
             @classmethod
@@ -1556,7 +1556,7 @@ the method signatures.
             @classmethod
             def describe_outputs(cls) -> List[OutputDefinition]:
                 return [
-                    OutputDefinition(name="crops", kind=[BATCH_OF_IMAGES_KIND]),
+                    OutputDefinition(name="crops", kind=[IMAGE_KIND]),
                 ]
         
             @classmethod
@@ -1629,8 +1629,8 @@ the method signatures.
             WorkflowImageData,
         )
         from inference.core.workflows.execution_engine.entities.types import (
-            BATCH_OF_IMAGES_KIND,
-            BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND,
+            IMAGE_KIND,
+            OBJECT_DETECTION_PREDICTION_KIND,
             StepOutputImageSelector,
             StepOutputSelector,
             WorkflowImageSelector,
@@ -1646,7 +1646,7 @@ the method signatures.
             type: Literal["my_plugin/tile_detections@v1"]
             images_crops: Union[WorkflowImageSelector, StepOutputImageSelector]
             crops_predictions: StepOutputSelector(
-                kind=[BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND]
+                kind=[OBJECT_DETECTION_PREDICTION_KIND]
             )
 
             @classmethod
@@ -1660,7 +1660,7 @@ the method signatures.
             @classmethod
             def describe_outputs(cls) -> List[OutputDefinition]:
                 return [
-                    OutputDefinition(name="visualisations", kind=[BATCH_OF_IMAGES_KIND]),
+                    OutputDefinition(name="visualisations", kind=[IMAGE_KIND]),
                 ]
         
         
@@ -1726,7 +1726,7 @@ the method signatures.
             WorkflowImageData,
         )
         from inference.core.workflows.execution_engine.entities.types import (
-            BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND,
+            OBJECT_DETECTION_PREDICTION_KIND,
             StepOutputImageSelector,
             StepOutputSelector,
             WorkflowImageSelector,
@@ -1742,7 +1742,7 @@ the method signatures.
             type: Literal["my_plugin/stitch@v1"]
             images: Union[WorkflowImageSelector, StepOutputImageSelector]
             images_predictions: StepOutputSelector(
-                kind=[BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND],
+                kind=[OBJECT_DETECTION_PREDICTION_KIND],
             )
 
             @classmethod
@@ -1766,7 +1766,7 @@ the method signatures.
                     OutputDefinition(
                         name="predictions",
                         kind=[
-                            BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND,
+                            OBJECT_DETECTION_PREDICTION_KIND,
                         ],
                     ),
                 ]

diff --git a/docs/workflows/gallery_index.md b/docs/workflows/gallery_index.md
@@ -7,6 +7,7 @@ Browse through the various categories to find inspiration and ideas for building
 	<li><a href="/workflows/gallery/workflows_with_multiple_models">Workflows with multiple models</a></li>
 	<li><a href="/workflows/gallery/workflows_enhanced_by_roboflow_platform">Workflows enhanced by Roboflow Platform</a></li>
 	<li><a href="/workflows/gallery/workflows_with_classical_computer_vision_methods">Workflows with classical Computer Vision methods</a></li>
+	<li><a href="/workflows/gallery/workflows_with_visual_language_models">Workflows with Visual Language Models</a></li>
 	<li><a href="/workflows/gallery/basic_workflows">Basic Workflows</a></li>
 	<li><a href="/workflows/gallery/workflows_with_dynamic_python_blocks">Workflows with dynamic Python Blocks</a></li>
 	<li><a href="/workflows/gallery/workflows_with_data_transformations">Workflows with data transformations</a></li>

diff --git a/docs/workflows/kinds.md b/docs/workflows/kinds.md
@@ -23,49 +23,50 @@ for the presence of a mask in the input.
 
 !!! Warning
 
-    The list presented below contains elements with `Batch[X]` markers - those will 
-    get soon deprecated and we will use only `X` markers. For now, developers are asked 
-    to create their blocks using the `Batch[X]` markers, but raise the 
-    [issue here](https://github.com/roboflow/inference/issues/608). This GH issue will be used 
-    as a point of communication regarding deprecation process.
+    In `inference` release `0.18.0` we decided to make drastic move to heal the ecosystem 
+    from the problem with ambiguous kinds names (`Batch[X]` vs `X` - see more 
+    [here](https://github.com/roboflow/inference/issues/608)). 
+
+    The change is breaking only if there is remote Workflow plugin depending on imports
+    from `inference.core.workflows.execution_engine.entities.types` module, which is
+    not the case to the best of our knowledge. We removed problematic kinds as if they
+    never existed in the ecosystem and fixed all blocks from `roboflow_core` plugin.
+    If there is anyone impacted by the change - here is the 
+    [migration guide](https://github.com/roboflow/inference/releases/tag/v0.18.0).
 
 
 ## Kinds declared in Roboflow plugins
 <!--- AUTOGENERATED_KINDS_LIST -->
-* [`zone`](/workflows/kinds/zone): Definition of polygon zone
-* [`Batch[dictionary]`](/workflows/kinds/batch_dictionary): Batch of dictionaries
-* [`dictionary`](/workflows/kinds/dictionary): Dictionary
-* [`point`](/workflows/kinds/point): Single point in 2D
-* [`Batch[parent_id]`](/workflows/kinds/batch_parent_id): Identifier of parent for step output
-* [`roboflow_model_id`](/workflows/kinds/roboflow_model_id): Roboflow model id
-* [`Batch[classification_prediction]`](/workflows/kinds/batch_classification_prediction): `'predictions'` key from Classification Model outputs
-* [`Batch[top_class]`](/workflows/kinds/batch_top_class): Batch of string values representing top class predicted by classification model
-* [`rgb_color`](/workflows/kinds/rgb_color): RGB color
-* [`Batch[keypoint_detection_prediction]`](/workflows/kinds/batch_keypoint_detection_prediction): `'predictions'` key from Keypoint Detection Model output
-* [`Batch[serialised_payloads]`](/workflows/kinds/batch_serialised_payloads): List of serialised elements that can be registered in the sink
+* [`bar_code_detection`](/workflows/kinds/bar_code_detection): Prediction with barcode detection
+* [`language_model_output`](/workflows/kinds/language_model_output): LLM / VLM output
+* [`top_class`](/workflows/kinds/top_class): String value representing top class predicted by classification model
+* [`prediction_type`](/workflows/kinds/prediction_type): String value with type of prediction
+* [`object_detection_prediction`](/workflows/kinds/object_detection_prediction): Prediction with detected bounding boxes in form of sv.Detections(...) object
+* [`qr_code_detection`](/workflows/kinds/qr_code_detection): Prediction with QR code detection
+* [`image_metadata`](/workflows/kinds/image_metadata): Dictionary with image metadata required by supervision
 * [`float_zero_to_one`](/workflows/kinds/float_zero_to_one): `float` value in range `[0.0, 1.0]`
-* [`Batch[boolean]`](/workflows/kinds/batch_boolean): Boolean values batch
-* [`list_of_values`](/workflows/kinds/list_of_values): List of values of any types
-* [`Batch[instance_segmentation_prediction]`](/workflows/kinds/batch_instance_segmentation_prediction): `'predictions'` key from Instance Segmentation Model outputs
-* [`Batch[qr_code_detection]`](/workflows/kinds/batch_qr_code_detection): Prediction with QR code detection
+* [`parent_id`](/workflows/kinds/parent_id): Identifier of parent for step output
+* [`keypoint_detection_prediction`](/workflows/kinds/keypoint_detection_prediction): Prediction with detected bounding boxes and detected keypoints in form of sv.Detections(...) object
+* [`float`](/workflows/kinds/float): Float value
+* [`*`](/workflows/kinds/*): Equivalent of any element
 * [`contours`](/workflows/kinds/contours): List of numpy arrays where each array represents contour points
-* [`Batch[image]`](/workflows/kinds/batch_image): Image in workflows
+* [`boolean`](/workflows/kinds/boolean): Boolean flag
 * [`detection`](/workflows/kinds/detection): Single element of detections-based prediction (like `object_detection_prediction`)
-* [`Batch[prediction_type]`](/workflows/kinds/batch_prediction_type): String value with type of prediction
+* [`roboflow_project`](/workflows/kinds/roboflow_project): Roboflow project name
+* [`dictionary`](/workflows/kinds/dictionary): Dictionary
+* [`numpy_array`](/workflows/kinds/numpy_array): Numpy array
 * [`roboflow_api_key`](/workflows/kinds/roboflow_api_key): Roboflow API key
 * [`string`](/workflows/kinds/string): String value
-* [`*`](/workflows/kinds/*): Equivalent of any element
-* [`float`](/workflows/kinds/float): Float value
-* [`keypoint_detection_prediction`](/workflows/kinds/keypoint_detection_prediction): Prediction with detected bounding boxes and detected keypoints in form of sv.Detections(...) object
-* [`Batch[object_detection_prediction]`](/workflows/kinds/batch_object_detection_prediction): `'predictions'` key from Object Detection Model output
-* [`integer`](/workflows/kinds/integer): Integer value
-* [`roboflow_project`](/workflows/kinds/roboflow_project): Roboflow project name
-* [`Batch[string]`](/workflows/kinds/batch_string): Batch of string values
-* [`image`](/workflows/kinds/image): Image in workflows
-* [`Batch[bar_code_detection]`](/workflows/kinds/batch_bar_code_detection): Prediction with barcode detection
-* [`object_detection_prediction`](/workflows/kinds/object_detection_prediction): Prediction with detected bounding boxes in form of sv.Detections(...) object
-* [`boolean`](/workflows/kinds/boolean): Boolean flag
+* [`roboflow_model_id`](/workflows/kinds/roboflow_model_id): Roboflow model id
+* [`list_of_values`](/workflows/kinds/list_of_values): List of values of any types
 * [`instance_segmentation_prediction`](/workflows/kinds/instance_segmentation_prediction): Prediction with detected bounding boxes and segmentation masks in form of sv.Detections(...) object
+* [`image`](/workflows/kinds/image): Image in workflows
+* [`video_metadata`](/workflows/kinds/video_metadata): Video image metadata
+* [`serialised_payloads`](/workflows/kinds/serialised_payloads): Serialised element that is usually accepted by sink
+* [`integer`](/workflows/kinds/integer): Integer value
+* [`rgb_color`](/workflows/kinds/rgb_color): RGB color
+* [`classification_prediction`](/workflows/kinds/classification_prediction): Predictions from classifier
 * [`image_keypoints`](/workflows/kinds/image_keypoints): Image keypoints detected by classical Computer Vision method
-* [`Batch[image_metadata]`](/workflows/kinds/batch_image_metadata): Dictionary with image metadata required by supervision
+* [`point`](/workflows/kinds/point): Single point in 2D
+* [`zone`](/workflows/kinds/zone): Definition of polygon zone
 <!--- AUTOGENERATED_KINDS_LIST -->
diff --git a/inference/core/version.py b/inference/core/version.py
@@ -1,4 +1,4 @@
-__version__ = "0.17.1"
+__version__ = "0.18.0"
 
 
 if __name__ == "__main__":

diff --git a/inference/core/workflows/core_steps/formatters/json_parser/__init__.py b/inference/core/workflows/core_steps/formatters/json_parser/__init__.py