Merge pull request #18108 from YetheSamartaka/main

fix: "Cannot handle batch sizes > 1 if no padding token is defined" for some reranking models
This commit is contained in:
Tim Jaeryang Baek 2025-10-07 10:57:34 -05:00 committed by GitHub
commit 696876d393
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -191,6 +191,22 @@ def get_rf(
log.error(f"CrossEncoder: {e}") log.error(f"CrossEncoder: {e}")
raise Exception(ERROR_MESSAGES.DEFAULT("CrossEncoder error")) raise Exception(ERROR_MESSAGES.DEFAULT("CrossEncoder error"))
# Safely adjust pad_token_id if missing as some models do not have this in config
try:
model_cfg = getattr(rf, "model", None)
if model_cfg and hasattr(model_cfg, "config"):
cfg = model_cfg.config
if getattr(cfg, "pad_token_id", None) is None:
# Fallback to eos_token_id when available
eos = getattr(cfg, "eos_token_id", None)
if eos is not None:
cfg.pad_token_id = eos
log.debug(f"Missing pad_token_id detected; set to eos_token_id={eos}")
else:
log.warning("Neither pad_token_id nor eos_token_id present in model config")
except Exception as e2:
log.warning(f"Failed to adjust pad_token_id on CrossEncoder: {e2}")
return rf return rf