alibaba · charles9304 · Jan 22, 2025 · Jan 22, 2025 · Jan 22, 2025 · Jan 23, 2025
diff --git a/chatlearn/models/vllm/hooks/__init__.py b/chatlearn/models/vllm/hooks/__init__.py
@@ -19,26 +19,18 @@
 from .. import is_vllm_v2
 
 
-if is_vllm_v2():
-    if importlib.util.find_spec("vllm"):
-        from . import ray_gpu_executor
-        from chatlearn.utils.constant import CURRENT_VLLM_VERSION, VLLMVersion
-        if CURRENT_VLLM_VERSION == VLLMVersion.v_0_6_3:
-            from chatlearn.models.vllm.hooks import input_preprocess
-            from chatlearn.models.vllm.hooks import async_llm_engine
-            from chatlearn.models.vllm.hooks import llm
-            from chatlearn.models.vllm.hooks import loader
-            from chatlearn.models.vllm.hooks import worker_base
-else:
-    if importlib.util.find_spec("vllm"):
-        import vllm
-        from chatlearn.utils.constant import CURRENT_VLLM_VERSION, VLLMVersion # pylint: disable=ungrouped-imports
-        if CURRENT_VLLM_VERSION == VLLMVersion.v_0_3_0:
-            from chatlearn.models.vllm.hooks import sampler
-        elif CURRENT_VLLM_VERSION in [VLLMVersion.v_0_5_1, VLLMVersion.v_0_6_3]:
-            from chatlearn.models.vllm.hooks import llm_engine, logits_processor
-            if CURRENT_VLLM_VERSION == VLLMVersion.v_0_5_1:
-                from chatlearn.models.vllm.hooks import worker
-            else:
-                from chatlearn.models.vllm.hooks import input_preprocess
-                from chatlearn.models.vllm.hooks import format_device_name
+if importlib.util.find_spec("vllm"):
+
+    from chatlearn.utils.constant import CURRENT_VLLM_VERSION, VLLMVersion
+
+    if CURRENT_VLLM_VERSION == VLLMVersion.v_0_3_0:
+        from chatlearn.models.vllm.hooks.vllm_0_3_0 import *
+    elif CURRENT_VLLM_VERSION == VLLMVersion.v_0_5_1:
+        from chatlearn.models.vllm.hooks.vllm_0_5_1 import *
+    elif CURRENT_VLLM_VERSION == VLLMVersion.v_0_6_3:
+        from chatlearn.models.vllm.hooks.vllm_0_6_3 import *
+    elif CURRENT_VLLM_VERSION == VLLMVersion.v_0_6_6:
+        from .vllm_0_6_6 import *
+    else:
+        raise RuntimeError(
+            f"vLLM version expected in {list(member.value for member in VLLMVersion)}, while {CURRENT_VLLM_VERSION}.")
diff --git a/chatlearn/models/vllm/hooks/input_preprocess.py b/chatlearn/models/vllm/hooks/input_preprocess.py
diff --git a/chatlearn/models/vllm/hooks/vllm_0_3_0/__init__.py b/chatlearn/models/vllm/hooks/vllm_0_3_0/__init__.py
@@ -0,0 +1,21 @@
+# Copyright 2024 Alibaba Group Holding Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Additional hooks of vllm-0.3.0."""
+
+from ... import is_vllm_v2
+
+assert not is_vllm_v2(), "vLLM-0.3.0 only supports vLLM Module v1. Set env `ENABLE_VLLM_V2=False`."
+
+from . import sampler
diff --git a/chatlearn/models/vllm/hooks/sampler.py → ...n/models/vllm/hooks/vllm_0_3_0/sampler.py b/chatlearn/models/vllm/hooks/sampler.py → ...n/models/vllm/hooks/vllm_0_3_0/sampler.py
diff --git a/chatlearn/models/vllm/hooks/vllm_0_5_1/__init__.py b/chatlearn/models/vllm/hooks/vllm_0_5_1/__init__.py
@@ -0,0 +1,23 @@
+# Copyright 2024 Alibaba Group Holding Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Additional hooks of vllm-0.5.1."""
+
+from ... import is_vllm_v2
+
+assert not is_vllm_v2(), "vLLM-0.5.1 only supports vLLM Module v1. Set env `ENABLE_VLLM_V2=False`."
+
+from . import llm_engine
+from . import logits_processor
+from . import worker
diff --git a/chatlearn/models/vllm/hooks/llm_engine.py → ...odels/vllm/hooks/vllm_0_5_1/llm_engine.py b/chatlearn/models/vllm/hooks/llm_engine.py → ...odels/vllm/hooks/vllm_0_5_1/llm_engine.py
diff --git a/...arn/models/vllm/hooks/logits_processor.py → ...vllm/hooks/vllm_0_5_1/logits_processor.py b/...arn/models/vllm/hooks/logits_processor.py → ...vllm/hooks/vllm_0_5_1/logits_processor.py
diff --git a/chatlearn/models/vllm/hooks/worker.py → ...rn/models/vllm/hooks/vllm_0_5_1/worker.py b/chatlearn/models/vllm/hooks/worker.py → ...rn/models/vllm/hooks/vllm_0_5_1/worker.py
diff --git a/chatlearn/models/vllm/hooks/vllm_0_6_3/__init__.py b/chatlearn/models/vllm/hooks/vllm_0_6_3/__init__.py
@@ -0,0 +1,29 @@
+# Copyright 2024 Alibaba Group Holding Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Additional hooks of vllm-0.6.3."""
+
+from ... import is_vllm_v2
+from . import format_device_name
+from . import input_preprocess
+
+if is_vllm_v2():
+    from . import async_llm_engine
+    from . import llm
+    from . import loader
+    from . import ray_gpu_executor
+    from . import worker_base
+else:
+    from . import llm_engine
+    from . import logits_processor
diff --git a/...arn/models/vllm/hooks/async_llm_engine.py → ...vllm/hooks/vllm_0_6_3/async_llm_engine.py b/...arn/models/vllm/hooks/async_llm_engine.py → ...vllm/hooks/vllm_0_6_3/async_llm_engine.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Hooks of vllm-0.6.3 del init_ray_cluster in AsyncLLMEngine."""
+"""del init_ray_cluster in AsyncLLMEngine."""
 
 from typing import Dict, Optional
 

diff --git a/...n/models/vllm/hooks/format_device_name.py → ...lm/hooks/vllm_0_6_3/format_device_name.py b/...n/models/vllm/hooks/format_device_name.py → ...lm/hooks/vllm_0_6_3/format_device_name.py
diff --git a/chatlearn/models/vllm/hooks/vllm_0_6_3/input_preprocess.py b/chatlearn/models/vllm/hooks/vllm_0_6_3/input_preprocess.py
@@ -0,0 +1,55 @@
+# Copyright 2024 Alibaba Group Holding Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Hooks of vllm-0.6.3 input preprocess to pass prompt text."""
+
+# pylint: disable=unused-import,unused-argument
+from vllm.inputs import preprocess
+from vllm.inputs.parse import parse_singleton_prompt
+
+def extract_prompt_components(
+    self,
+    prompt,
+    request_id,
+    lora_request=None):
+    '''
+    Extract the components of any single encoder or decoder input prompt.
+
+    Arguments:
+
+    * request_id
+    * prompt: single encoder or decoder input prompt
+    * lora_request: this is only valid for decoder prompts
+
+    Returns:
+
+    * prompt
+    * prompt_token_ids
+    * multi_modal_data
+    * mm_processor_kwargs (request-level input processor/mapper overrides)
+    '''
+    parsed = parse_singleton_prompt(prompt)
+
+    assert parsed["type"] == "tokens", \
+        f"you must pass prompt_token_ids when add request to scheduler. while prompt {prompt}"
+
+    prompt_text = parsed["content"]["prompt"]
+    prompt_token_ids = parsed["content"]["prompt_token_ids"]
+    multi_modal_data = parsed["content"].get("multi_modal_data")
+    mm_processor_kwargs = parsed["content"].get("mm_processor_kwargs")
+
+    return (prompt_text, prompt_token_ids, multi_modal_data,
+            mm_processor_kwargs)
+
+preprocess.InputPreprocessor._extract_prompt_components = extract_prompt_components
diff --git a/chatlearn/models/vllm/hooks/llm.py → ...learn/models/vllm/hooks/vllm_0_6_3/llm.py b/chatlearn/models/vllm/hooks/llm.py → ...learn/models/vllm/hooks/vllm_0_6_3/llm.py
diff --git a/chatlearn/models/vllm/hooks/vllm_0_6_3/llm_engine.py b/chatlearn/models/vllm/hooks/vllm_0_6_3/llm_engine.py
@@ -0,0 +1,30 @@
+# Copyright 2024 Alibaba Group Holding Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Hooks of vllm-0.5.1 llm_engine remove __reduce__ function."""
+
+import inspect
+
+# pylint: disable=unused-import,wildcard-import,unused-argument
+from vllm.engine import llm_engine
+
+
+source = inspect.getsource(llm_engine.LLMEngine.__reduce__)
+if 'RuntimeError' in source:
+    def __reduce__(self):
+        # This is to ensure that the LLMEngine can be referenced in
+        # the closure used to initialize Ray worker actors
+        pass
+
+    del llm_engine.LLMEngine.__reduce__
diff --git a/chatlearn/models/vllm/hooks/loader.py → ...rn/models/vllm/hooks/vllm_0_6_3/loader.py b/chatlearn/models/vllm/hooks/loader.py → ...rn/models/vllm/hooks/vllm_0_6_3/loader.py
@@ -73,7 +73,6 @@ def init(self, load_config):
 
 loader.DummyModelLoader.__init__ = init
 
-
 # add ckpt loading of megatron format
 def load_model(self, *, model_config,
                 device_config,

diff --git a/chatlearn/models/vllm/hooks/vllm_0_6_3/logits_processor.py b/chatlearn/models/vllm/hooks/vllm_0_6_3/logits_processor.py
@@ -0,0 +1,42 @@
+# Copyright 2024 Alibaba Group Holding Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Hooks of vllm-0.5.1 logits_processor to allgather logits of all ranks."""
+
+import inspect
+
+# pylint: disable=wildcard-import,ungrouped-imports
+from vllm.model_executor.layers import logits_processor
+
+
+source = inspect.getsource(logits_processor.LogitsProcessor._get_logits)
+if 'tensor_model_parallel_gather' in source:
+    import torch
+    from typing import Optional
+    from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
+    def _get_logits(self, hidden_states: torch.Tensor,
+                    lm_head: VocabParallelEmbedding,
+                    embedding_bias: Optional[torch.Tensor]) -> torch.Tensor:
+        # Get the logits for the next tokens.
+        logits = lm_head.linear_method.apply(lm_head,
+                                             hidden_states,
+                                             bias=embedding_bias)
+        from vllm.distributed.communication_op import tensor_model_parallel_all_gather # pylint: disable=import-outside-toplevel
+        logits = tensor_model_parallel_all_gather(logits)
+        # Remove paddings in vocab (if any).
+        if logits is not None:
+            logits = logits[:, :self.org_vocab_size]
+        return logits
+
+    logits_processor.LogitsProcessor._get_logits = _get_logits
diff --git a/...arn/models/vllm/hooks/ray_gpu_executor.py → ...vllm/hooks/vllm_0_6_3/ray_gpu_executor.py b/...arn/models/vllm/hooks/ray_gpu_executor.py → ...vllm/hooks/vllm_0_6_3/ray_gpu_executor.py
diff --git a/chatlearn/models/vllm/hooks/worker_base.py → ...dels/vllm/hooks/vllm_0_6_3/worker_base.py b/chatlearn/models/vllm/hooks/worker_base.py → ...dels/vllm/hooks/vllm_0_6_3/worker_base.py
diff --git a/chatlearn/models/vllm/hooks/vllm_0_6_6/__init__.py b/chatlearn/models/vllm/hooks/vllm_0_6_6/__init__.py
@@ -0,0 +1,27 @@
+# Copyright 2024 Alibaba Group Holding Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Additional hooks of vllm-0.6.6."""
+
+from ... import is_vllm_v2
+
+assert is_vllm_v2(), "vLLM-0.6.6 only supports vLLM Module v2."
+
+from . import async_llm_engine
+from . import input_preprocess
+from . import llm
+from . import llm_engine
+from . import loader
+from . import ray_gpu_executor
+from . import worker_base