Files
FLAML/test/test_gpu.py
Will Charles 840f76e5e5 Changed tune.report import for ray>=2 (#1392)
* Changed tune.report import for ray>=2

* env: Changed pydantic restriction in env

* Reverted Pydantic install conditions

* Reverted Pydantic install conditions

* test: Check if GPU is available

* tests: uncommented a line

* tests: Better fix for Ray GPU checking

* tests: Added timeout to dataset loading

* tests: Deleted _test_hf_data()

* test: Reduce lrl2 dataset size

* bug: timeout error

* bug: timeout error

* fix: Added threading check for timout issue

* Undo old commits

* Timeout fix from #1406

---------

Co-authored-by: Daniel Grindrod <dannycg1996@gmail.com>
2025-02-14 09:38:33 +08:00

129 lines
3.4 KiB
Python

import pickle
import shutil
import sys
import pytest
def test_xgboost():
import numpy as np
import scipy.sparse
from sklearn.datasets import make_moons
from xgboost.core import XGBoostError
from flaml import AutoML
try:
X_train = scipy.sparse.eye(900000)
y_train = np.random.randint(2, size=900000)
automl = AutoML()
automl.fit(
X_train,
y_train,
estimator_list=["xgb_limitdepth", "xgboost"],
time_budget=5,
gpu_per_trial=1,
)
train, label = make_moons(n_samples=300000, shuffle=True, noise=0.3, random_state=None)
automl = AutoML()
automl.fit(
train,
label,
estimator_list=["xgb_limitdepth", "xgboost"],
time_budget=5,
gpu_per_trial=1,
)
automl.fit(
train,
label,
estimator_list=["xgb_limitdepth", "xgboost"],
time_budget=5,
)
except XGBoostError:
# No visible GPU is found for XGBoost.
return
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
def _test_hf_data():
import requests
from datasets import load_dataset
from flaml import AutoML
try:
train_dataset = load_dataset("glue", "mrpc", split="train[:1%]").to_pandas()
dev_dataset = load_dataset("glue", "mrpc", split="validation[:1%]").to_pandas()
test_dataset = load_dataset("glue", "mrpc", split="test[:1%]").to_pandas()
except requests.exceptions.ConnectionError:
return
# Tests will only run if there is a GPU available
try:
import ray
pg = ray.util.placement_group([{"CPU": 1, "GPU": 1}])
if not pg.wait(timeout_seconds=10): # Wait 10 seconds for resources
raise RuntimeError("No available node types can fulfill resource request!")
except RuntimeError:
return
custom_sent_keys = ["sentence1", "sentence2"]
label_key = "label"
X_train = train_dataset[custom_sent_keys]
y_train = train_dataset[label_key]
X_val = dev_dataset[custom_sent_keys]
y_val = dev_dataset[label_key]
X_test = test_dataset[custom_sent_keys]
automl = AutoML()
automl_settings = {
"gpu_per_trial": 1,
"max_iter": 2,
"time_budget": 5000,
"task": "seq-classification",
"metric": "accuracy",
"log_file_name": "seqclass.log",
"use_ray": True,
}
automl_settings["fit_kwargs_by_estimator"] = {
"transformer": {
"model_path": "facebook/muppet-roberta-base",
"output_dir": "test/data/output/",
"fp16": True,
}
}
automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)
automl = AutoML()
automl.retrain_from_log(X_train=X_train, y_train=y_train, train_full=True, record_id=0, **automl_settings)
with open("automl.pkl", "wb") as f:
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
with open("automl.pkl", "rb") as f:
automl = pickle.load(f)
shutil.rmtree("test/data/output/")
automl.predict(X_test)
automl.predict(["test test", "test test"])
automl.predict(
[
["test test", "test test"],
["test test", "test test"],
["test test", "test test"],
]
)
automl.predict_proba(X_test)
print(automl.classes_)
if __name__ == "__main__":
_test_hf_data()