Skip to content

speculators.models.eagle3

Modules:

Classes:

Eagle3DraftModel

Eagle3DraftModel(
    config: Eagle3SpeculatorConfig,
    t2d: Tensor | None,
    d2t: Tensor | None,
)

Bases: SpeculatorModel

Methods:

Source code in speculators/models/eagle3/core.py
def __init__(
    self,
    config: Eagle3SpeculatorConfig,
    t2d: torch.Tensor | None,
    d2t: torch.Tensor | None,
):
    super().__init__(
        config=config,
        verifier=None,
        verifier_attachment_mode="train_only",
    )
    self.hidden_size = config.transformer_layer_config.hidden_size
    self.draft_vocab_size = config.draft_vocab_size

    # Verify that if one mapping tensor is provided, the other is as well
    if (t2d is None) != (d2t is None):
        raise ValueError(
            "Both t2d and d2t must be provided together, or both must be None. "
            f"Got t2d={'provided' if t2d is not None else 'None'}, "
            f"d2t={'provided' if d2t is not None else 'None'}"
        )

    # Register buffers - they can be None
    if t2d is not None:
        self.register_buffer("t2d", t2d)  # shape: [verifier_vocab_size], bool
        if int(t2d.sum(dtype=torch.long).item()) != self.draft_vocab_size:
            raise ValueError(
                f"t2d has {int(t2d.sum(dtype=torch.long).item())} non-zero values, "
                f"expected {self.draft_vocab_size}."
            )
    else:
        self.register_buffer("t2d", None)

    if d2t is not None:
        self.register_buffer("d2t", d2t)  # shape: [draft_vocab_size], int offsets
        if d2t.shape[0] != self.draft_vocab_size:
            raise ValueError(
                f"d2t.shape[0] ({d2t.shape[0]}) must match"
                f" draft_vocab_size ({self.draft_vocab_size})."
            )
    else:
        self.register_buffer("d2t", None)

    self.fc = torch.nn.Linear(3 * self.hidden_size, self.hidden_size, bias=False)
    self._model_definitions = model_classes[
        config.transformer_layer_config.model_type
    ]
    self._setup_decoder_layers(
        config.transformer_layer_config, config.norm_before_residual
    )
    self.norm = self._model_definitions.norm_class(
        self.hidden_size, eps=config.transformer_layer_config.rms_norm_eps
    )
    self._setup_rotary_embedding(config.transformer_layer_config)
    self._setup_embeddings_and_lm_heads(config.speculators_config.verifier, t2d)

from_training_args classmethod

from_training_args(
    verifier_config: PretrainedConfig, **kwargs
) -> Eagle3DraftModel

Create Eagle3 model from training arguments.

Args: verifier_config: Verifier model configuration **kwargs: Training arguments with Eagle3-specific params - num_layers: Number of decoder layers - norm_before_residual: Whether to normalize before residual connection - t2d: Target-to-draft vocabulary mapping tensor - d2t: Draft-to-target vocabulary mapping tensor - ttt_steps: Number of TTT steps - verifier_name_or_path: Path to verifier model

Returns: Initialized Eagle3DraftModel

Source code in speculators/models/eagle3/core.py
@classmethod
def from_training_args(
    cls,
    verifier_config: PretrainedConfig,
    **kwargs,
) -> "Eagle3DraftModel":
    """Create Eagle3 model from training arguments.

    Args:
        verifier_config: Verifier model configuration
        **kwargs: Training arguments with Eagle3-specific params
            - num_layers: Number of decoder layers
            - norm_before_residual: Whether to normalize before residual connection
            - t2d: Target-to-draft vocabulary mapping tensor
            - d2t: Draft-to-target vocabulary mapping tensor
            - ttt_steps: Number of TTT steps
            - verifier_name_or_path: Path to verifier model

    Returns:
        Initialized Eagle3DraftModel
    """
    config = Eagle3SpeculatorConfig(
        transformer_layer_config=verifier_config,
        draft_vocab_size=kwargs["draft_vocab_size"],
        norm_before_residual=kwargs["norm_before_residual"],
        speculators_config=SpeculatorsConfig(
            algorithm="eagle3",
            proposal_methods=[
                GreedyTokenProposalConfig(
                    speculative_tokens=kwargs["ttt_steps"],
                )
            ],
            default_proposal_method="greedy",
            verifier=VerifierConfig.from_config(
                verifier_config, name_or_path=kwargs["verifier_name_or_path"]
            ),
        ),
    )

    return cls(config=config, t2d=kwargs.get("t2d"), d2t=kwargs.get("d2t"))

get_trainer_kwargs staticmethod

get_trainer_kwargs(**kwargs) -> tuple[dict, dict]

Get training and validation kwargs for Eagle3.

Args: **kwargs: Training arguments

Returns: Tuple of (train_call_kwargs, val_call_kwargs)

Source code in speculators/models/eagle3/core.py
@staticmethod
def get_trainer_kwargs(**kwargs) -> tuple[dict, dict]:
    """Get training and validation kwargs for Eagle3.

    Args:
        **kwargs: Training arguments

    Returns:
        Tuple of (train_call_kwargs, val_call_kwargs)
    """
    train_kwargs = {
        "use_off_policy_tokens": kwargs["use_off_policy_tokens"],
        "ttt_steps": kwargs["ttt_steps"],
        "ttt_step_loss_decay": kwargs["ttt_step_loss_decay"],
    }
    val_kwargs = {
        "use_off_policy_tokens": False,
        "ttt_steps": kwargs["ttt_steps"],
        "ttt_step_loss_decay": kwargs["ttt_step_loss_decay"],
    }
    return train_kwargs, val_kwargs

Eagle3SpeculatorConfig

Eagle3SpeculatorConfig(**kwargs)

Bases: SpeculatorModelConfig

Configuration for EAGLE-3 speculator with vocabulary mapping.

EAGLE-3 features vocabulary mapping between draft (32K) and target (128K) vocabularies, enabling cross-tokenizer speculation.

Parameters:

  • transformer_layer_config

    Configuration for the transformer decoder layer

  • draft_vocab_size

    Size of draft model vocabulary for speculation

  • norm_before_residual

    Apply hidden_norm before storing residual

Methods:

Attributes:

Source code in speculators/config.py
def __init__(self, **kwargs):
    # initialize the Pydantic arguments first to set all valid fields
    PydanticClassRegistryMixin.__init__(self, **kwargs)

    # reset kwargs handled by Pydantic so PretrainedConfig doesn't override
    for field in self.__class__.model_fields:
        kwargs[field] = getattr(self, field)

    # initialize the Hugging Face PretrainedConfig arguments for the model
    PretrainedConfig.__init__(self, **kwargs)

    # ensure we always update the transformers version
    self.transformers_version = version("transformers")

target_vocab_size property

target_vocab_size: int

Get target vocabulary size from transformer config.

serialize_transformer_config

serialize_transformer_config(
    value: PretrainedConfig,
) -> dict

Serialize transformer config to dict.

Source code in speculators/models/eagle3/config.py
@field_serializer("transformer_layer_config")
def serialize_transformer_config(self, value: PretrainedConfig) -> dict:
    """Serialize transformer config to dict."""
    return value.to_diff_dict()

validate_transformer_config classmethod

validate_transformer_config(value: Any) -> PretrainedConfig

Validate and convert transformer config.

Source code in speculators/models/eagle3/config.py
@field_validator("transformer_layer_config", mode="before")
@classmethod
def validate_transformer_config(cls, value: Any) -> PretrainedConfig:
    """Validate and convert transformer config."""
    if isinstance(value, dict):
        config_class: type[PretrainedConfig] = LlamaConfig
        if "model_type" in value:
            config_class = AutoConfig.for_model(
                model_type=value["model_type"]
            ).__class__
        return config_class(**value)
    return value