mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-09 02:09:16 +08:00
Fix transformers API compatibility: support v4.26+ and v5.0+ with version-aware parameter selection (#1514)
* Initial plan * Fix transformers API compatibility issues Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com> * Add backward compatibility for transformers v4.26+ by version check Support both tokenizer (v4.26-4.43) and processing_class (v4.44+) parameters based on installed transformers version. Fallback to tokenizer if version check fails. Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com> * Improve exception handling specificity Use specific exception types (ImportError, AttributeError, ValueError) instead of broad Exception catch for better error handling. Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com> * Run pre-commit formatting on all files Applied black formatting to fix code style across the repository. Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com>
This commit is contained in:
@@ -1196,16 +1196,31 @@ class TransformersEstimator(BaseEstimator):
|
||||
control.should_save = True
|
||||
control.should_evaluate = True
|
||||
|
||||
self._trainer = TrainerForAuto(
|
||||
args=self._training_args,
|
||||
model_init=self._model_init,
|
||||
train_dataset=train_dataset,
|
||||
eval_dataset=eval_dataset,
|
||||
tokenizer=self.tokenizer,
|
||||
data_collator=self.data_collator,
|
||||
compute_metrics=self._compute_metrics_by_dataset_name,
|
||||
callbacks=[EarlyStoppingCallbackForAuto],
|
||||
)
|
||||
# Use processing_class for transformers >= 4.44.0, tokenizer for older versions
|
||||
trainer_kwargs = {
|
||||
"args": self._training_args,
|
||||
"model_init": self._model_init,
|
||||
"train_dataset": train_dataset,
|
||||
"eval_dataset": eval_dataset,
|
||||
"data_collator": self.data_collator,
|
||||
"compute_metrics": self._compute_metrics_by_dataset_name,
|
||||
"callbacks": [EarlyStoppingCallbackForAuto],
|
||||
}
|
||||
|
||||
# Check if processing_class parameter is supported (transformers >= 4.44.0)
|
||||
try:
|
||||
import transformers
|
||||
from packaging import version
|
||||
|
||||
if version.parse(transformers.__version__) >= version.parse("4.44.0"):
|
||||
trainer_kwargs["processing_class"] = self.tokenizer
|
||||
else:
|
||||
trainer_kwargs["tokenizer"] = self.tokenizer
|
||||
except (ImportError, AttributeError, ValueError):
|
||||
# Fallback to tokenizer if version check fails
|
||||
trainer_kwargs["tokenizer"] = self.tokenizer
|
||||
|
||||
self._trainer = TrainerForAuto(**trainer_kwargs)
|
||||
|
||||
if self._task in NLG_TASKS:
|
||||
setattr(self._trainer, "_is_seq2seq", True)
|
||||
|
||||
@@ -211,7 +211,6 @@ def tokenize_onedataframe(
|
||||
hf_args=None,
|
||||
prefix_str=None,
|
||||
):
|
||||
with tokenizer.as_target_tokenizer():
|
||||
_, tokenized_column_names = tokenize_row(
|
||||
dict(X.iloc[0]),
|
||||
tokenizer,
|
||||
|
||||
Reference in New Issue
Block a user