mirror of
https://github.com/microsoft/FLAML.git
synced 2026-02-15 05:09:16 +08:00
Compare commits
26 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
01c3c83653 | ||
|
|
9b66103f7c | ||
|
|
48dfd72e64 | ||
|
|
dec92e5b02 | ||
|
|
22911ea1ef | ||
|
|
12183e5f73 | ||
|
|
c2b25310fc | ||
|
|
0f9420590d | ||
|
|
5107c506b4 | ||
|
|
9e219ef8dc | ||
|
|
6e4083743b | ||
|
|
17e95edd9e | ||
|
|
468bc62d27 | ||
|
|
437c239c11 | ||
|
|
8e753f1092 | ||
|
|
a3b57e11d4 | ||
|
|
a80dcf9925 | ||
|
|
7157af44e0 | ||
|
|
1798c4591e | ||
|
|
dd26263330 | ||
|
|
2ba5f8bed1 | ||
|
|
d0a11958a5 | ||
|
|
0ef9b00a75 | ||
|
|
840f76e5e5 | ||
|
|
d8b7d25b80 | ||
|
|
6d53929803 |
21
.github/workflows/CD.yml
vendored
21
.github/workflows/CD.yml
vendored
@@ -12,26 +12,17 @@ jobs:
|
||||
deploy:
|
||||
strategy:
|
||||
matrix:
|
||||
os: ['ubuntu-latest']
|
||||
python-version: [3.8]
|
||||
os: ["ubuntu-latest"]
|
||||
python-version: ["3.10"]
|
||||
runs-on: ${{ matrix.os }}
|
||||
environment: package
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: Cache conda
|
||||
uses: actions/cache@v3
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
path: ~/conda_pkgs_dir
|
||||
key: conda-${{ matrix.os }}-python-${{ matrix.python-version }}-${{ hashFiles('environment.yml') }}
|
||||
- name: Setup Miniconda
|
||||
uses: conda-incubator/setup-miniconda@v2
|
||||
with:
|
||||
auto-update-conda: true
|
||||
auto-activate-base: false
|
||||
activate-environment: hcrystalball
|
||||
python-version: ${{ matrix.python-version }}
|
||||
use-only-tar-bz2: true
|
||||
- name: Install from source
|
||||
# This is required for the pre-commit tests
|
||||
shell: pwsh
|
||||
@@ -42,7 +33,7 @@ jobs:
|
||||
- name: Build
|
||||
shell: pwsh
|
||||
run: |
|
||||
pip install twine
|
||||
pip install twine wheel setuptools
|
||||
python setup.py sdist bdist_wheel
|
||||
- name: Publish to PyPI
|
||||
env:
|
||||
|
||||
8
.github/workflows/deploy-website.yml
vendored
8
.github/workflows/deploy-website.yml
vendored
@@ -37,11 +37,11 @@ jobs:
|
||||
- name: setup python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.8"
|
||||
python-version: "3.10"
|
||||
- name: pydoc-markdown install
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pydoc-markdown==4.5.0
|
||||
pip install pydoc-markdown==4.7.0
|
||||
- name: pydoc-markdown run
|
||||
run: |
|
||||
pydoc-markdown
|
||||
@@ -73,11 +73,11 @@ jobs:
|
||||
- name: setup python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.8"
|
||||
python-version: "3.10"
|
||||
- name: pydoc-markdown install
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pydoc-markdown==4.5.0
|
||||
pip install pydoc-markdown==4.7.0
|
||||
- name: pydoc-markdown run
|
||||
run: |
|
||||
pydoc-markdown
|
||||
|
||||
12
.github/workflows/python-package.yml
vendored
12
.github/workflows/python-package.yml
vendored
@@ -14,6 +14,12 @@ on:
|
||||
- 'setup.py'
|
||||
pull_request:
|
||||
branches: ['main']
|
||||
paths:
|
||||
- 'flaml/**'
|
||||
- 'test/**'
|
||||
- 'notebook/**'
|
||||
- '.github/workflows/python-package.yml'
|
||||
- 'setup.py'
|
||||
merge_group:
|
||||
types: [checks_requested]
|
||||
|
||||
@@ -30,7 +36,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
python-version: ["3.8", "3.9", "3.10", "3.11"]
|
||||
python-version: ["3.9", "3.10", "3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
@@ -50,7 +56,7 @@ jobs:
|
||||
export LDFLAGS="$LDFLAGS -Wl,-rpath,/usr/local/opt/libomp/lib -L/usr/local/opt/libomp/lib -lomp"
|
||||
- name: Install packages and dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip wheel
|
||||
python -m pip install --upgrade pip wheel setuptools
|
||||
pip install -e .
|
||||
python -c "import flaml"
|
||||
pip install -e .[test]
|
||||
@@ -85,7 +91,7 @@ jobs:
|
||||
- name: Test with pytest
|
||||
if: matrix.python-version != '3.10'
|
||||
run: |
|
||||
pytest test
|
||||
pytest test/
|
||||
- name: Coverage
|
||||
if: matrix.python-version == '3.10'
|
||||
run: |
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# basic setup
|
||||
FROM mcr.microsoft.com/devcontainers/python:3.8
|
||||
FROM mcr.microsoft.com/devcontainers/python:3.10
|
||||
RUN apt-get update && apt-get -y update
|
||||
RUN apt-get install -y sudo git npm
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ FLAML has a .NET implementation in [ML.NET](http://dot.net/ml), an open-source,
|
||||
|
||||
## Installation
|
||||
|
||||
FLAML requires **Python version >= 3.8**. It can be installed from pip:
|
||||
FLAML requires **Python version >= 3.9**. It can be installed from pip:
|
||||
|
||||
```bash
|
||||
pip install flaml
|
||||
|
||||
@@ -156,7 +156,7 @@ class MathUserProxyAgent(UserProxyAgent):
|
||||
when the number of auto reply reaches the max_consecutive_auto_reply or when is_termination_msg is True.
|
||||
default_auto_reply (str or dict or None): the default auto reply message when no code execution or llm based reply is generated.
|
||||
max_invalid_q_per_step (int): (ADDED) the maximum number of invalid queries per step.
|
||||
**kwargs (dict): other kwargs in [UserProxyAgent](user_proxy_agent#__init__).
|
||||
**kwargs (dict): other kwargs in [UserProxyAgent](../user_proxy_agent#__init__).
|
||||
"""
|
||||
super().__init__(
|
||||
name=name,
|
||||
|
||||
@@ -123,7 +123,7 @@ class RetrieveUserProxyAgent(UserProxyAgent):
|
||||
can be found at `https://www.sbert.net/docs/pretrained_models.html`. The default model is a
|
||||
fast model. If you want to use a high performance model, `all-mpnet-base-v2` is recommended.
|
||||
- customized_prompt (Optional, str): the customized prompt for the retrieve chat. Default is None.
|
||||
**kwargs (dict): other kwargs in [UserProxyAgent](user_proxy_agent#__init__).
|
||||
**kwargs (dict): other kwargs in [UserProxyAgent](../user_proxy_agent#__init__).
|
||||
"""
|
||||
super().__init__(
|
||||
name=name,
|
||||
|
||||
@@ -10,6 +10,7 @@ import os
|
||||
import random
|
||||
import sys
|
||||
import time
|
||||
from concurrent.futures import as_completed
|
||||
from functools import partial
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
@@ -187,9 +188,16 @@ class AutoML(BaseEstimator):
|
||||
mem_thres: A float of the memory size constraint in bytes.
|
||||
pred_time_limit: A float of the prediction latency constraint in seconds.
|
||||
It refers to the average prediction time per row in validation data.
|
||||
train_time_limit: A float of the training time constraint in seconds.
|
||||
train_time_limit: None or a float of the training time constraint in seconds for each trial.
|
||||
Only valid for sequential search.
|
||||
verbose: int, default=3 | Controls the verbosity, higher means more
|
||||
messages.
|
||||
verbose=0: logger level = CRITICAL
|
||||
verbose=1: logger level = ERROR
|
||||
verbose=2: logger level = WARNING
|
||||
verbose=3: logger level = INFO
|
||||
verbose=4: logger level = DEBUG
|
||||
verbose>5: logger level = NOTSET
|
||||
retrain_full: bool or str, default=True | whether to retrain the
|
||||
selected model on the full training data when using holdout.
|
||||
True - retrain only after search finishes; False - no retraining;
|
||||
@@ -424,6 +432,8 @@ class AutoML(BaseEstimator):
|
||||
If `model_history` was set to True, then the returned model is trained.
|
||||
"""
|
||||
state = self._search_states.get(estimator_name)
|
||||
if state and estimator_name == self._best_estimator:
|
||||
return self.model
|
||||
return state and getattr(state, "trained_estimator", None)
|
||||
|
||||
@property
|
||||
@@ -1332,7 +1342,8 @@ class AutoML(BaseEstimator):
|
||||
mem_thres: A float of the memory size constraint in bytes.
|
||||
pred_time_limit: A float of the prediction latency constraint in seconds.
|
||||
It refers to the average prediction time per row in validation data.
|
||||
train_time_limit: None or a float of the training time constraint in seconds.
|
||||
train_time_limit: None or a float of the training time constraint in seconds for each trial.
|
||||
Only valid for sequential search.
|
||||
X_val: None or a numpy array or a pandas dataframe of validation data.
|
||||
y_val: None or a numpy array or a pandas series of validation labels.
|
||||
sample_weight_val: None or a numpy array of the sample weight of
|
||||
@@ -1345,6 +1356,12 @@ class AutoML(BaseEstimator):
|
||||
for training data.
|
||||
verbose: int, default=3 | Controls the verbosity, higher means more
|
||||
messages.
|
||||
verbose=0: logger level = CRITICAL
|
||||
verbose=1: logger level = ERROR
|
||||
verbose=2: logger level = WARNING
|
||||
verbose=3: logger level = INFO
|
||||
verbose=4: logger level = DEBUG
|
||||
verbose>5: logger level = NOTSET
|
||||
retrain_full: bool or str, default=True | whether to retrain the
|
||||
selected model on the full training data when using holdout.
|
||||
True - retrain only after search finishes; False - no retraining;
|
||||
@@ -1623,6 +1640,13 @@ class AutoML(BaseEstimator):
|
||||
_ch.setFormatter(logger_formatter)
|
||||
logger.addHandler(_ch)
|
||||
|
||||
if model_history:
|
||||
logger.warning(
|
||||
"With `model_history` set to `True` by default, all intermediate models are retained in memory, "
|
||||
"which may significantly increase memory usage and slow down training. "
|
||||
"Consider setting `model_history=False` to optimize memory and accelerate the training process."
|
||||
)
|
||||
|
||||
if not use_ray and not use_spark and n_concurrent_trials > 1:
|
||||
if ray_available:
|
||||
logger.warning(
|
||||
@@ -1708,7 +1732,7 @@ class AutoML(BaseEstimator):
|
||||
if not (mlflow.active_run() is not None or is_autolog_enabled()):
|
||||
self.mlflow_integration.only_history = True
|
||||
except KeyError:
|
||||
print("Not in Fabric, Skipped")
|
||||
logger.info("Not in Fabric, Skipped")
|
||||
task.validate_data(
|
||||
self,
|
||||
self._state,
|
||||
@@ -2529,6 +2553,21 @@ class AutoML(BaseEstimator):
|
||||
self._selected = state = self._search_states[estimator]
|
||||
state.best_config_sample_size = self._state.data_size[0]
|
||||
state.best_config = state.init_config[0] if state.init_config else {}
|
||||
self._track_iter = 0
|
||||
self._config_history[self._track_iter] = (estimator, state.best_config, self._state.time_from_start)
|
||||
self._best_iteration = self._track_iter
|
||||
state.val_loss = getattr(state, "val_loss", float("inf"))
|
||||
state.best_loss = getattr(state, "best_loss", float("inf"))
|
||||
state.config = getattr(state, "config", state.best_config.copy())
|
||||
state.metric_for_logging = getattr(state, "metric_for_logging", None)
|
||||
state.sample_size = getattr(state, "sample_size", self._state.data_size[0])
|
||||
state.learner_class = getattr(state, "learner_class", self._state.learner_classes.get(estimator))
|
||||
if hasattr(self, "mlflow_integration") and self.mlflow_integration:
|
||||
self.mlflow_integration.record_state(
|
||||
automl=self,
|
||||
search_state=state,
|
||||
estimator=estimator,
|
||||
)
|
||||
elif self._use_ray is False and self._use_spark is False:
|
||||
self._search_sequential()
|
||||
else:
|
||||
@@ -2700,16 +2739,47 @@ class AutoML(BaseEstimator):
|
||||
):
|
||||
if mlflow.active_run() is None:
|
||||
mlflow.start_run(run_id=self.mlflow_integration.parent_run_id)
|
||||
self.mlflow_integration.log_model(
|
||||
self._trained_estimator.model,
|
||||
self.best_estimator,
|
||||
signature=self.estimator_signature,
|
||||
)
|
||||
self.mlflow_integration.pickle_and_log_automl_artifacts(
|
||||
self, self.model, self.best_estimator, signature=self.pipeline_signature
|
||||
)
|
||||
if self.best_estimator.endswith("_spark"):
|
||||
self.mlflow_integration.log_model(
|
||||
self._trained_estimator.model,
|
||||
self.best_estimator,
|
||||
signature=self.estimator_signature,
|
||||
run_id=self.mlflow_integration.parent_run_id,
|
||||
)
|
||||
else:
|
||||
self.mlflow_integration.pickle_and_log_automl_artifacts(
|
||||
self,
|
||||
self.model,
|
||||
self.best_estimator,
|
||||
signature=self.pipeline_signature,
|
||||
run_id=self.mlflow_integration.parent_run_id,
|
||||
)
|
||||
else:
|
||||
logger.info("not retraining because the time budget is too small.")
|
||||
logger.warning("not retraining because the time budget is too small.")
|
||||
self.wait_futures()
|
||||
|
||||
def wait_futures(self):
|
||||
if self.mlflow_integration is not None:
|
||||
logger.debug("Collecting results from submitted record_state tasks")
|
||||
t1 = time.perf_counter()
|
||||
for future in as_completed(self.mlflow_integration.futures):
|
||||
_task = self.mlflow_integration.futures[future]
|
||||
try:
|
||||
result = future.result()
|
||||
logger.debug(f"Result for record_state task {_task}: {result}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Exception for record_state task {_task}: {e}")
|
||||
for future in as_completed(self.mlflow_integration.futures_log_model):
|
||||
_task = self.mlflow_integration.futures_log_model[future]
|
||||
try:
|
||||
result = future.result()
|
||||
logger.debug(f"Result for log_model task {_task}: {result}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Exception for log_model task {_task}: {e}")
|
||||
t2 = time.perf_counter()
|
||||
logger.debug(f"Collecting results from tasks submitted to executors costs {t2-t1} seconds.")
|
||||
else:
|
||||
logger.debug("No futures to wait for.")
|
||||
|
||||
def __del__(self):
|
||||
if (
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
try:
|
||||
from sklearn.ensemble import HistGradientBoostingClassifier, HistGradientBoostingRegressor
|
||||
except ImportError:
|
||||
pass
|
||||
except ImportError as e:
|
||||
print(f"scikit-learn is required for HistGradientBoostingEstimator. Please install it; error: {e}")
|
||||
|
||||
from flaml import tune
|
||||
from flaml.automl.model import SKLearnEstimator
|
||||
|
||||
@@ -2,13 +2,17 @@
|
||||
# * Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# * Licensed under the MIT License. See LICENSE file in the
|
||||
# * project root for license information.
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
import random
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from decimal import ROUND_HALF_UP, Decimal
|
||||
from typing import TYPE_CHECKING, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from flaml.automl.spark import DataFrame, Series, pd, ps, psDataFrame, psSeries
|
||||
from flaml.automl.spark import DataFrame, F, Series, T, pd, ps, psDataFrame, psSeries
|
||||
from flaml.automl.training_log import training_log_reader
|
||||
|
||||
try:
|
||||
@@ -19,6 +23,7 @@ except ImportError:
|
||||
if TYPE_CHECKING:
|
||||
from flaml.automl.task import Task
|
||||
|
||||
|
||||
TS_TIMESTAMP_COL = "ds"
|
||||
TS_VALUE_COL = "y"
|
||||
|
||||
@@ -445,3 +450,331 @@ class DataTransformer:
|
||||
def group_counts(groups):
|
||||
_, i, c = np.unique(groups, return_counts=True, return_index=True)
|
||||
return c[np.argsort(i)]
|
||||
|
||||
|
||||
def get_random_dataframe(n_rows: int = 200, ratio_none: float = 0.1, seed: int = 42) -> DataFrame:
|
||||
"""Generate a random pandas DataFrame with various data types for testing.
|
||||
This function creates a DataFrame with multiple column types including:
|
||||
- Timestamps
|
||||
- Integers
|
||||
- Floats
|
||||
- Categorical values
|
||||
- Booleans
|
||||
- Lists (tags)
|
||||
- Decimal strings
|
||||
- UUIDs
|
||||
- Binary data (as hex strings)
|
||||
- JSON blobs
|
||||
- Nullable text fields
|
||||
Parameters
|
||||
----------
|
||||
n_rows : int, default=200
|
||||
Number of rows in the generated DataFrame
|
||||
ratio_none : float, default=0.1
|
||||
Probability of generating None values in applicable columns
|
||||
seed : int, default=42
|
||||
Random seed for reproducibility
|
||||
Returns
|
||||
-------
|
||||
pd.DataFrame
|
||||
A DataFrame with 14 columns of various data types
|
||||
Examples
|
||||
--------
|
||||
>>> df = get_random_dataframe(100, 0.05, 123)
|
||||
>>> df.shape
|
||||
(100, 14)
|
||||
>>> df.dtypes
|
||||
timestamp datetime64[ns]
|
||||
id int64
|
||||
score float64
|
||||
status object
|
||||
flag object
|
||||
count object
|
||||
value object
|
||||
tags object
|
||||
rating object
|
||||
uuid object
|
||||
binary object
|
||||
json_blob object
|
||||
category category
|
||||
nullable_text object
|
||||
dtype: object
|
||||
"""
|
||||
|
||||
np.random.seed(seed)
|
||||
random.seed(seed)
|
||||
|
||||
def random_tags():
|
||||
tags = ["AI", "ML", "data", "robotics", "vision"]
|
||||
return random.sample(tags, k=random.randint(1, 3)) if random.random() > ratio_none else None
|
||||
|
||||
def random_decimal():
|
||||
return (
|
||||
str(Decimal(random.uniform(1, 5)).quantize(Decimal("0.01"), rounding=ROUND_HALF_UP))
|
||||
if random.random() > ratio_none
|
||||
else None
|
||||
)
|
||||
|
||||
def random_json_blob():
|
||||
blob = {"a": random.randint(1, 10), "b": random.random()}
|
||||
return json.dumps(blob) if random.random() > ratio_none else None
|
||||
|
||||
def random_binary():
|
||||
return bytes(random.randint(0, 255) for _ in range(4)).hex() if random.random() > ratio_none else None
|
||||
|
||||
data = {
|
||||
"timestamp": [
|
||||
datetime(2020, 1, 1) + timedelta(days=np.random.randint(0, 1000)) if np.random.rand() > ratio_none else None
|
||||
for _ in range(n_rows)
|
||||
],
|
||||
"id": range(1, n_rows + 1),
|
||||
"score": np.random.uniform(0, 100, n_rows),
|
||||
"status": np.random.choice(
|
||||
["active", "inactive", "pending", None],
|
||||
size=n_rows,
|
||||
p=[(1 - ratio_none) / 3, (1 - ratio_none) / 3, (1 - ratio_none) / 3, ratio_none],
|
||||
),
|
||||
"flag": np.random.choice(
|
||||
[True, False, None], size=n_rows, p=[(1 - ratio_none) / 2, (1 - ratio_none) / 2, ratio_none]
|
||||
),
|
||||
"count": [np.random.randint(0, 100) if np.random.rand() > ratio_none else None for _ in range(n_rows)],
|
||||
"value": [round(np.random.normal(50, 15), 2) if np.random.rand() > ratio_none else None for _ in range(n_rows)],
|
||||
"tags": [random_tags() for _ in range(n_rows)],
|
||||
"rating": [random_decimal() for _ in range(n_rows)],
|
||||
"uuid": [str(uuid.uuid4()) if np.random.rand() > ratio_none else None for _ in range(n_rows)],
|
||||
"binary": [random_binary() for _ in range(n_rows)],
|
||||
"json_blob": [random_json_blob() for _ in range(n_rows)],
|
||||
"category": pd.Categorical(
|
||||
np.random.choice(
|
||||
["A", "B", "C", None],
|
||||
size=n_rows,
|
||||
p=[(1 - ratio_none) / 3, (1 - ratio_none) / 3, (1 - ratio_none) / 3, ratio_none],
|
||||
)
|
||||
),
|
||||
"nullable_text": [random.choice(["Good", "Bad", "Average", None]) for _ in range(n_rows)],
|
||||
}
|
||||
|
||||
return pd.DataFrame(data)
|
||||
|
||||
|
||||
def auto_convert_dtypes_spark(
|
||||
df: psDataFrame,
|
||||
na_values: list = None,
|
||||
category_threshold: float = 0.3,
|
||||
convert_threshold: float = 0.6,
|
||||
sample_ratio: float = 0.1,
|
||||
) -> tuple[psDataFrame, dict]:
|
||||
"""Automatically convert data types in a PySpark DataFrame using heuristics.
|
||||
|
||||
This function analyzes a sample of the DataFrame to infer appropriate data types
|
||||
and applies the conversions. It handles timestamps, numeric values, booleans,
|
||||
and categorical fields.
|
||||
|
||||
Args:
|
||||
df: A PySpark DataFrame to convert.
|
||||
na_values: List of strings to be considered as NA/NaN. Defaults to
|
||||
['NA', 'na', 'NULL', 'null', ''].
|
||||
category_threshold: Maximum ratio of unique values to total values
|
||||
to consider a column categorical. Defaults to 0.3.
|
||||
convert_threshold: Minimum ratio of successfully converted values required
|
||||
to apply a type conversion. Defaults to 0.6.
|
||||
sample_ratio: Fraction of data to sample for type inference. Defaults to 0.1.
|
||||
|
||||
Returns:
|
||||
tuple: (The DataFrame with converted types, A dictionary mapping column names to
|
||||
their inferred types as strings)
|
||||
|
||||
Note:
|
||||
- 'category' in the schema dict is conceptual as PySpark doesn't have a true
|
||||
category type like pandas
|
||||
- The function uses sampling for efficiency with large datasets
|
||||
"""
|
||||
n_rows = df.count()
|
||||
if na_values is None:
|
||||
na_values = ["NA", "na", "NULL", "null", ""]
|
||||
|
||||
# Normalize NA-like values
|
||||
for colname, coltype in df.dtypes:
|
||||
if coltype == "string":
|
||||
df = df.withColumn(
|
||||
colname,
|
||||
F.when(F.trim(F.lower(F.col(colname))).isin([v.lower() for v in na_values]), None).otherwise(
|
||||
F.col(colname)
|
||||
),
|
||||
)
|
||||
|
||||
schema = {}
|
||||
for colname in df.columns:
|
||||
# Sample once at an appropriate ratio
|
||||
sample_ratio_to_use = min(1.0, sample_ratio if n_rows * sample_ratio > 100 else 100 / n_rows)
|
||||
col_sample = df.select(colname).sample(withReplacement=False, fraction=sample_ratio_to_use).dropna()
|
||||
sample_count = col_sample.count()
|
||||
|
||||
inferred_type = "string" # Default
|
||||
|
||||
if col_sample.dtypes[0][1] != "string":
|
||||
schema[colname] = col_sample.dtypes[0][1]
|
||||
continue
|
||||
|
||||
if sample_count == 0:
|
||||
schema[colname] = "string"
|
||||
continue
|
||||
|
||||
# Check if timestamp
|
||||
ts_col = col_sample.withColumn("parsed", F.to_timestamp(F.col(colname)))
|
||||
|
||||
# Check numeric
|
||||
if (
|
||||
col_sample.withColumn("n", F.col(colname).cast("double")).filter("n is not null").count()
|
||||
>= sample_count * convert_threshold
|
||||
):
|
||||
# All whole numbers?
|
||||
all_whole = (
|
||||
col_sample.withColumn("n", F.col(colname).cast("double"))
|
||||
.filter("n is not null")
|
||||
.withColumn("frac", F.abs(F.col("n") % 1))
|
||||
.filter("frac > 0.000001")
|
||||
.count()
|
||||
== 0
|
||||
)
|
||||
inferred_type = "int" if all_whole else "double"
|
||||
|
||||
# Check low-cardinality (category-like)
|
||||
elif (
|
||||
sample_count > 0
|
||||
and col_sample.select(F.countDistinct(F.col(colname))).collect()[0][0] / sample_count <= category_threshold
|
||||
):
|
||||
inferred_type = "category" # Will just be string, but marked as such
|
||||
|
||||
# Check if timestamp
|
||||
elif ts_col.filter(F.col("parsed").isNotNull()).count() >= sample_count * convert_threshold:
|
||||
inferred_type = "timestamp"
|
||||
|
||||
schema[colname] = inferred_type
|
||||
|
||||
# Apply inferred schema
|
||||
for colname, inferred_type in schema.items():
|
||||
if inferred_type == "int":
|
||||
df = df.withColumn(colname, F.col(colname).cast(T.IntegerType()))
|
||||
elif inferred_type == "double":
|
||||
df = df.withColumn(colname, F.col(colname).cast(T.DoubleType()))
|
||||
elif inferred_type == "boolean":
|
||||
df = df.withColumn(
|
||||
colname,
|
||||
F.when(F.lower(F.col(colname)).isin("true", "yes", "1"), True)
|
||||
.when(F.lower(F.col(colname)).isin("false", "no", "0"), False)
|
||||
.otherwise(None),
|
||||
)
|
||||
elif inferred_type == "timestamp":
|
||||
df = df.withColumn(colname, F.to_timestamp(F.col(colname)))
|
||||
elif inferred_type == "category":
|
||||
df = df.withColumn(colname, F.col(colname).cast(T.StringType())) # Marked conceptually
|
||||
|
||||
# otherwise keep as string (or original type)
|
||||
|
||||
return df, schema
|
||||
|
||||
|
||||
def auto_convert_dtypes_pandas(
|
||||
df: DataFrame,
|
||||
na_values: list = None,
|
||||
category_threshold: float = 0.3,
|
||||
convert_threshold: float = 0.6,
|
||||
sample_ratio: float = 1.0,
|
||||
) -> tuple[DataFrame, dict]:
|
||||
"""Automatically convert data types in a pandas DataFrame using heuristics.
|
||||
|
||||
This function analyzes the DataFrame to infer appropriate data types
|
||||
and applies the conversions. It handles timestamps, timedeltas, numeric values,
|
||||
and categorical fields.
|
||||
|
||||
Args:
|
||||
df: A pandas DataFrame to convert.
|
||||
na_values: List of strings to be considered as NA/NaN. Defaults to
|
||||
['NA', 'na', 'NULL', 'null', ''].
|
||||
category_threshold: Maximum ratio of unique values to total values
|
||||
to consider a column categorical. Defaults to 0.3.
|
||||
convert_threshold: Minimum ratio of successfully converted values required
|
||||
to apply a type conversion. Defaults to 0.6.
|
||||
sample_ratio: Fraction of data to sample for type inference. Not used in pandas version
|
||||
but included for API compatibility. Defaults to 1.0.
|
||||
|
||||
Returns:
|
||||
tuple: (The DataFrame with converted types, A dictionary mapping column names to
|
||||
their inferred types as strings)
|
||||
"""
|
||||
if na_values is None:
|
||||
na_values = {"NA", "na", "NULL", "null", ""}
|
||||
|
||||
df_converted = df.convert_dtypes()
|
||||
schema = {}
|
||||
|
||||
# Sample if needed (for API compatibility)
|
||||
if sample_ratio < 1.0:
|
||||
df = df.sample(frac=sample_ratio)
|
||||
|
||||
n_rows = len(df)
|
||||
|
||||
for col in df.columns:
|
||||
series = df[col]
|
||||
# Replace NA-like values if string
|
||||
series_cleaned = series.map(lambda x: np.nan if isinstance(x, str) and x.strip() in na_values else x)
|
||||
|
||||
# Skip conversion if already non-object data type, except bool which can potentially be categorical
|
||||
if (
|
||||
not isinstance(series_cleaned.dtype, pd.BooleanDtype)
|
||||
and not isinstance(series_cleaned.dtype, pd.StringDtype)
|
||||
and series_cleaned.dtype != "object"
|
||||
):
|
||||
# Keep the original data type for non-object dtypes
|
||||
df_converted[col] = series
|
||||
schema[col] = str(series_cleaned.dtype)
|
||||
continue
|
||||
|
||||
# print(f"type: {series_cleaned.dtype}, column: {series_cleaned.name}")
|
||||
|
||||
if not isinstance(series_cleaned.dtype, pd.BooleanDtype):
|
||||
# Try numeric (int or float)
|
||||
numeric = pd.to_numeric(series_cleaned, errors="coerce")
|
||||
if numeric.notna().sum() >= n_rows * convert_threshold:
|
||||
if (numeric.dropna() % 1 == 0).all():
|
||||
try:
|
||||
df_converted[col] = numeric.astype("int") # Nullable integer
|
||||
schema[col] = "int"
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
df_converted[col] = numeric.astype("double")
|
||||
schema[col] = "double"
|
||||
continue
|
||||
|
||||
# Try datetime
|
||||
datetime_converted = pd.to_datetime(series_cleaned, errors="coerce")
|
||||
if datetime_converted.notna().sum() >= n_rows * convert_threshold:
|
||||
df_converted[col] = datetime_converted
|
||||
schema[col] = "timestamp"
|
||||
continue
|
||||
|
||||
# Try timedelta
|
||||
try:
|
||||
timedelta_converted = pd.to_timedelta(series_cleaned, errors="coerce")
|
||||
if timedelta_converted.notna().sum() >= n_rows * convert_threshold:
|
||||
df_converted[col] = timedelta_converted
|
||||
schema[col] = "timedelta"
|
||||
continue
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
# Try category
|
||||
try:
|
||||
unique_ratio = series_cleaned.nunique(dropna=True) / n_rows if n_rows > 0 else 1.0
|
||||
if unique_ratio <= category_threshold:
|
||||
df_converted[col] = series_cleaned.astype("category")
|
||||
schema[col] = "category"
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
df_converted[col] = series_cleaned.astype("string")
|
||||
schema[col] = "string"
|
||||
|
||||
return df_converted, schema
|
||||
|
||||
@@ -1,7 +1,37 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
|
||||
class ColoredFormatter(logging.Formatter):
|
||||
# ANSI escape codes for colors
|
||||
COLORS = {
|
||||
# logging.DEBUG: "\033[36m", # Cyan
|
||||
# logging.INFO: "\033[32m", # Green
|
||||
logging.WARNING: "\033[33m", # Yellow
|
||||
logging.ERROR: "\033[31m", # Red
|
||||
logging.CRITICAL: "\033[1;31m", # Bright Red
|
||||
}
|
||||
RESET = "\033[0m" # Reset to default
|
||||
|
||||
def __init__(self, fmt, datefmt, use_color=True):
|
||||
super().__init__(fmt, datefmt)
|
||||
self.use_color = use_color
|
||||
|
||||
def format(self, record):
|
||||
formatted = super().format(record)
|
||||
if self.use_color:
|
||||
color = self.COLORS.get(record.levelno, "")
|
||||
if color:
|
||||
return f"{color}{formatted}{self.RESET}"
|
||||
return formatted
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger_formatter = logging.Formatter(
|
||||
"[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s", "%m-%d %H:%M:%S"
|
||||
use_color = True
|
||||
if os.getenv("FLAML_LOG_NO_COLOR"):
|
||||
use_color = False
|
||||
|
||||
logger_formatter = ColoredFormatter(
|
||||
"[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s", "%m-%d %H:%M:%S", use_color
|
||||
)
|
||||
logger.propagate = False
|
||||
|
||||
@@ -9,6 +9,7 @@ import os
|
||||
import shutil
|
||||
import signal
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import warnings
|
||||
from contextlib import contextmanager
|
||||
@@ -89,21 +90,25 @@ def limit_resource(memory_limit, time_limit):
|
||||
except ValueError:
|
||||
# According to https://bugs.python.org/issue40518, it's a mac-specific error.
|
||||
pass
|
||||
main_thread = False
|
||||
if time_limit is not None:
|
||||
alarm_set = False
|
||||
if time_limit is not None and threading.current_thread() is threading.main_thread():
|
||||
try:
|
||||
signal.signal(signal.SIGALRM, TimeoutHandler)
|
||||
signal.alarm(int(time_limit) or 1)
|
||||
main_thread = True
|
||||
alarm_set = True
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
if main_thread:
|
||||
if alarm_set:
|
||||
signal.alarm(0)
|
||||
if memory_limit > 0:
|
||||
resource.setrlimit(resource.RLIMIT_AS, (soft, hard))
|
||||
try:
|
||||
resource.setrlimit(resource.RLIMIT_AS, (soft, hard))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
class BaseEstimator:
|
||||
@@ -130,7 +135,7 @@ class BaseEstimator:
|
||||
self._task = task if isinstance(task, Task) else task_factory(task, None, None)
|
||||
self.params = self.config2params(config)
|
||||
self.estimator_class = self._model = None
|
||||
if "_estimator_type" in config:
|
||||
if "_estimator_type" in self.params:
|
||||
self._estimator_type = self.params.pop("_estimator_type")
|
||||
else:
|
||||
self._estimator_type = "classifier" if self._task.is_classification() else "regressor"
|
||||
@@ -1691,7 +1696,7 @@ class XGBoostEstimator(SKLearnEstimator):
|
||||
# use_label_encoder is deprecated in 1.7.
|
||||
if xgboost_version < "1.7.0":
|
||||
params["use_label_encoder"] = params.get("use_label_encoder", False)
|
||||
if "n_jobs" in config:
|
||||
if "n_jobs" in params:
|
||||
params["nthread"] = params.pop("n_jobs")
|
||||
return params
|
||||
|
||||
@@ -1891,7 +1896,7 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
|
||||
params = super().config2params(config)
|
||||
if "max_leaves" in params:
|
||||
params["max_leaf_nodes"] = params.get("max_leaf_nodes", params.pop("max_leaves"))
|
||||
if not self._task.is_classification() and "criterion" in config:
|
||||
if not self._task.is_classification() and "criterion" in params:
|
||||
params.pop("criterion")
|
||||
if "random_state" not in params:
|
||||
params["random_state"] = 12032022
|
||||
@@ -2344,7 +2349,7 @@ class SGDEstimator(SKLearnEstimator):
|
||||
params["loss"] = params.get("loss", None)
|
||||
if params["loss"] is None and self._task.is_classification():
|
||||
params["loss"] = "log_loss" if SKLEARN_VERSION >= "1.1" else "log"
|
||||
if not self._task.is_classification():
|
||||
if not self._task.is_classification() and "n_jobs" in params:
|
||||
params.pop("n_jobs")
|
||||
|
||||
if params.get("penalty") != "elasticnet":
|
||||
|
||||
@@ -769,10 +769,10 @@ class GenericTask(Task):
|
||||
if not is_spark_dataframe:
|
||||
y_train, y_val = y_train_split[train_index], y_train_split[val_index]
|
||||
if weight is not None:
|
||||
fit_kwargs["sample_weight"], weight_val = (
|
||||
weight[train_index],
|
||||
weight[val_index],
|
||||
fit_kwargs["sample_weight"] = (
|
||||
weight[train_index] if isinstance(weight, np.ndarray) else weight.iloc[train_index]
|
||||
)
|
||||
weight_val = weight[val_index] if isinstance(weight, np.ndarray) else weight.iloc[val_index]
|
||||
if groups is not None:
|
||||
fit_kwargs["groups"] = (
|
||||
groups[train_index] if isinstance(groups, np.ndarray) else groups.iloc[train_index]
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
import atexit
|
||||
import functools
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import random
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import warnings
|
||||
from concurrent.futures import ThreadPoolExecutor, wait
|
||||
from typing import MutableMapping
|
||||
|
||||
import mlflow
|
||||
@@ -12,14 +16,15 @@ import pandas as pd
|
||||
from mlflow.entities import Metric, Param, RunTag
|
||||
from mlflow.exceptions import MlflowException
|
||||
from mlflow.utils.autologging_utils import AUTOLOGGING_INTEGRATIONS, autologging_is_disabled
|
||||
from packaging.requirements import Requirement
|
||||
from scipy.sparse import issparse
|
||||
from sklearn import tree
|
||||
|
||||
try:
|
||||
from pyspark.ml import Pipeline as SparkPipeline
|
||||
from pyspark.ml import PipelineModel as SparkPipelineModel
|
||||
except ImportError:
|
||||
|
||||
class SparkPipeline:
|
||||
class SparkPipelineModel:
|
||||
pass
|
||||
|
||||
|
||||
@@ -32,6 +37,84 @@ from flaml.version import __version__
|
||||
|
||||
SEARCH_MAX_RESULTS = 5000 # Each train should not have more than 5000 trials
|
||||
IS_RENAME_CHILD_RUN = os.environ.get("FLAML_IS_RENAME_CHILD_RUN", "false").lower() == "true"
|
||||
REMOVE_REQUIREMENT_LIST = [
|
||||
"synapseml-cognitive",
|
||||
"synapseml-core",
|
||||
"synapseml-deep-learning",
|
||||
"synapseml-internal",
|
||||
"synapseml-mlflow",
|
||||
"synapseml-opencv",
|
||||
"synapseml-vw",
|
||||
"synapseml-lightgbm",
|
||||
"synapseml-utils",
|
||||
"nni",
|
||||
"optuna",
|
||||
]
|
||||
OPTIONAL_REMOVE_REQUIREMENT_LIST = ["pytorch-lightning", "transformers"]
|
||||
|
||||
os.environ["MLFLOW_ENABLE_ARTIFACTS_PROGRESS_BAR"] = os.environ.get("MLFLOW_ENABLE_ARTIFACTS_PROGRESS_BAR", "false")
|
||||
|
||||
MLFLOW_NUM_WORKERS = int(os.environ.get("FLAML_MLFLOW_NUM_WORKERS", os.cpu_count() * 4 if os.cpu_count() else 2))
|
||||
executor = ThreadPoolExecutor(max_workers=MLFLOW_NUM_WORKERS)
|
||||
atexit.register(lambda: executor.shutdown(wait=True))
|
||||
|
||||
IS_CLEAN_LOGS = os.environ.get("FLAML_IS_CLEAN_LOGS", "1")
|
||||
if IS_CLEAN_LOGS == "1":
|
||||
logging.getLogger("synapse.ml").setLevel(logging.CRITICAL)
|
||||
logging.getLogger("mlflow.utils").setLevel(logging.CRITICAL)
|
||||
logging.getLogger("mlflow.utils.environment").setLevel(logging.CRITICAL)
|
||||
logging.getLogger("mlflow.models.model").setLevel(logging.CRITICAL)
|
||||
warnings.simplefilter("ignore", category=FutureWarning)
|
||||
warnings.simplefilter("ignore", category=UserWarning)
|
||||
|
||||
|
||||
def convert_requirement(requirement_list: list[str]):
|
||||
ret = (
|
||||
[Requirement(s.strip().lower()) for s in requirement_list]
|
||||
if mlflow.__version__ <= "2.17.0"
|
||||
else requirement_list
|
||||
)
|
||||
return ret
|
||||
|
||||
|
||||
def time_it(func_or_code=None):
|
||||
"""
|
||||
Decorator or function that measures execution time.
|
||||
|
||||
Can be used in three ways:
|
||||
1. As a decorator with no arguments: @time_it
|
||||
2. As a decorator with arguments: @time_it()
|
||||
3. As a function call with a string of code to execute and time: time_it("some_code()")
|
||||
|
||||
Args:
|
||||
func_or_code (callable or str, optional): Either a function to decorate or
|
||||
a string of code to execute and time.
|
||||
|
||||
Returns:
|
||||
callable or None: Returns a decorated function if used as a decorator,
|
||||
or None if used to execute a string of code.
|
||||
"""
|
||||
|
||||
def decorator(func):
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
start_time = time.time()
|
||||
result = func(*args, **kwargs)
|
||||
end_time = time.time()
|
||||
logger.debug(f"Execution of {func.__name__} took {end_time - start_time:.4f} seconds")
|
||||
return result
|
||||
|
||||
return wrapper
|
||||
|
||||
if callable(func_or_code):
|
||||
return decorator(func_or_code)
|
||||
elif func_or_code is None:
|
||||
return decorator
|
||||
else:
|
||||
start_time = time.time()
|
||||
exec(func_or_code)
|
||||
end_time = time.time()
|
||||
logger.debug(f"Execution\n```\n{func_or_code}\n```\ntook {end_time - start_time:.4f} seconds")
|
||||
|
||||
|
||||
def flatten_dict(d: MutableMapping, sep: str = ".") -> MutableMapping:
|
||||
@@ -49,23 +132,28 @@ def is_autolog_enabled():
|
||||
return not all(autologging_is_disabled(k) for k in AUTOLOGGING_INTEGRATIONS.keys())
|
||||
|
||||
|
||||
def get_mlflow_log_latency(model_history=False):
|
||||
def get_mlflow_log_latency(model_history=False, delete_run=True):
|
||||
try:
|
||||
FLAML_MLFLOW_LOG_LATENCY = float(os.getenv("FLAML_MLFLOW_LOG_LATENCY", 0))
|
||||
except ValueError:
|
||||
FLAML_MLFLOW_LOG_LATENCY = 0
|
||||
if FLAML_MLFLOW_LOG_LATENCY >= 0.1:
|
||||
return FLAML_MLFLOW_LOG_LATENCY
|
||||
st = time.time()
|
||||
with mlflow.start_run(nested=True, run_name="get_mlflow_log_latency") as run:
|
||||
if model_history:
|
||||
sk_model = tree.DecisionTreeClassifier()
|
||||
mlflow.sklearn.log_model(sk_model, "sk_models")
|
||||
mlflow.sklearn.log_model(Pipeline([("estimator", sk_model)]), "sk_pipeline")
|
||||
mlflow.sklearn.log_model(sk_model, "model")
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pickle_fpath = os.path.join(tmpdir, f"tmp_{int(time.time()*1000)}")
|
||||
pickle_fpath = os.path.join(tmpdir, f"tmp_{int(time.time() * 1000)}")
|
||||
with open(pickle_fpath, "wb") as f:
|
||||
pickle.dump(sk_model, f)
|
||||
mlflow.log_artifact(pickle_fpath, "sk_model1")
|
||||
mlflow.log_artifact(pickle_fpath, "sk_model2")
|
||||
mlflow.log_artifact(pickle_fpath, "sk_model")
|
||||
mlflow.set_tag("synapseml.ui.visible", "false") # not shown inline in fabric
|
||||
mlflow.delete_run(run.info.run_id)
|
||||
if delete_run:
|
||||
mlflow.delete_run(run.info.run_id)
|
||||
et = time.time()
|
||||
return et - st
|
||||
return 3 * (et - st)
|
||||
|
||||
|
||||
def infer_signature(X_train=None, y_train=None, dataframe=None, label=None):
|
||||
@@ -98,12 +186,76 @@ def infer_signature(X_train=None, y_train=None, dataframe=None, label=None):
|
||||
)
|
||||
|
||||
|
||||
def update_and_install_requirements(
|
||||
run_id=None,
|
||||
model_name=None,
|
||||
model_version=None,
|
||||
remove_list=None,
|
||||
artifact_path="model",
|
||||
dst_path=None,
|
||||
install_with_ipython=False,
|
||||
):
|
||||
if not (run_id or (model_name and model_version)):
|
||||
raise ValueError(
|
||||
"Please provide `run_id` or both `model_name` and `model_version`. If all three are provided, `run_id` will be used."
|
||||
)
|
||||
|
||||
if install_with_ipython:
|
||||
from IPython import get_ipython
|
||||
|
||||
if not remove_list:
|
||||
remove_list = [
|
||||
"synapseml-cognitive",
|
||||
"synapseml-core",
|
||||
"synapseml-deep-learning",
|
||||
"synapseml-internal",
|
||||
"synapseml-mlflow",
|
||||
"synapseml-opencv",
|
||||
"synapseml-vw",
|
||||
"synapseml-lightgbm",
|
||||
"synapseml-utils",
|
||||
"flaml", # flaml is needed for AutoML models, should be pre-installed in the runtime
|
||||
"pyspark", # fabric internal pyspark should be pre-installed in the runtime
|
||||
]
|
||||
|
||||
# Download model artifacts
|
||||
client = mlflow.MlflowClient()
|
||||
if not run_id:
|
||||
run_id = client.get_model_version(model_name, model_version).run_id
|
||||
if not dst_path:
|
||||
dst_path = os.path.join(tempfile.gettempdir(), "model_artifacts")
|
||||
os.makedirs(dst_path, exist_ok=True)
|
||||
client.download_artifacts(run_id, artifact_path, dst_path)
|
||||
requirements_path = os.path.join(dst_path, artifact_path, "requirements.txt")
|
||||
with open(requirements_path) as f:
|
||||
reqs = f.read().splitlines()
|
||||
old_reqs = [Requirement(req) for req in reqs if req]
|
||||
old_reqs_dict = {req.name: str(req) for req in old_reqs}
|
||||
for req in remove_list:
|
||||
req = Requirement(req)
|
||||
if req.name in old_reqs_dict:
|
||||
old_reqs_dict.pop(req.name, None)
|
||||
new_reqs_list = list(old_reqs_dict.values())
|
||||
|
||||
with open(requirements_path, "w") as f:
|
||||
f.write("\n".join(new_reqs_list))
|
||||
|
||||
if install_with_ipython:
|
||||
get_ipython().run_line_magic("pip", f"install -r {requirements_path} -q")
|
||||
else:
|
||||
logger.info(f"You can run `pip install -r {requirements_path}` to install dependencies.")
|
||||
return requirements_path
|
||||
|
||||
|
||||
def _mlflow_wrapper(evaluation_func, mlflow_exp_id, mlflow_config=None, extra_tags=None, autolog=False):
|
||||
def wrapped(*args, **kwargs):
|
||||
if mlflow_config is not None:
|
||||
from synapse.ml.mlflow import set_mlflow_env_config
|
||||
try:
|
||||
from synapse.ml.mlflow import set_mlflow_env_config
|
||||
|
||||
set_mlflow_env_config(mlflow_config)
|
||||
set_mlflow_env_config(mlflow_config)
|
||||
except Exception:
|
||||
pass
|
||||
import mlflow
|
||||
|
||||
if mlflow_exp_id is not None:
|
||||
@@ -124,7 +276,20 @@ def _mlflow_wrapper(evaluation_func, mlflow_exp_id, mlflow_config=None, extra_ta
|
||||
|
||||
|
||||
def _get_notebook_name():
|
||||
return None
|
||||
try:
|
||||
import re
|
||||
|
||||
from synapse.ml.mlflow import get_mlflow_env_config
|
||||
from synapse.ml.mlflow.shared_platform_utils import get_artifact
|
||||
|
||||
notebook_id = get_mlflow_env_config(False).artifact_id
|
||||
current_notebook = get_artifact(notebook_id)
|
||||
notebook_name = re.sub("\\W+", "-", current_notebook.displayName).strip()
|
||||
|
||||
return notebook_name
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to get notebook name: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def safe_json_dumps(obj):
|
||||
@@ -163,6 +328,8 @@ class MLflowIntegration:
|
||||
self.has_model = False
|
||||
self.only_history = False
|
||||
self._do_log_model = True
|
||||
self.futures = {}
|
||||
self.futures_log_model = {}
|
||||
|
||||
self.extra_tag = (
|
||||
extra_tag
|
||||
@@ -170,6 +337,9 @@ class MLflowIntegration:
|
||||
else {"extra_tag.sid": f"flaml_{__version__}_{int(time.time())}_{random.randint(1001, 9999)}"}
|
||||
)
|
||||
self.start_time = time.time()
|
||||
self.experiment_type = experiment_type
|
||||
self.update_autolog_state()
|
||||
|
||||
self.mlflow_client = mlflow.tracking.MlflowClient()
|
||||
parent_run_info = mlflow.active_run().info if mlflow.active_run() is not None else None
|
||||
if parent_run_info:
|
||||
@@ -188,8 +358,6 @@ class MLflowIntegration:
|
||||
mlflow.set_experiment(experiment_name=mlflow_exp_name)
|
||||
self.experiment_id = mlflow.tracking.fluent._active_experiment_id
|
||||
self.experiment_name = mlflow.get_experiment(self.experiment_id).name
|
||||
self.experiment_type = experiment_type
|
||||
self.update_autolog_state()
|
||||
|
||||
if self.autolog:
|
||||
# only end user created parent run in autolog scenario
|
||||
@@ -197,9 +365,12 @@ class MLflowIntegration:
|
||||
|
||||
def set_mlflow_config(self):
|
||||
if self.driver_mlflow_env_config is not None:
|
||||
from synapse.ml.mlflow import set_mlflow_env_config
|
||||
try:
|
||||
from synapse.ml.mlflow import set_mlflow_env_config
|
||||
|
||||
set_mlflow_env_config(self.driver_mlflow_env_config)
|
||||
set_mlflow_env_config(self.driver_mlflow_env_config)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def wrap_evaluation_function(self, evaluation_function):
|
||||
wrapped_evaluation_function = _mlflow_wrapper(
|
||||
@@ -267,6 +438,7 @@ class MLflowIntegration:
|
||||
else:
|
||||
_tags = []
|
||||
self.mlflow_client.log_batch(run_id=target_id, metrics=_metrics, params=[], tags=_tags)
|
||||
return f"Successfully copy_mlflow_run run_id {src_id} to run_id {target_id}"
|
||||
|
||||
def record_trial(self, result, trial, metric):
|
||||
if isinstance(result, dict):
|
||||
@@ -334,12 +506,31 @@ class MLflowIntegration:
|
||||
self.copy_mlflow_run(best_mlflow_run_id, self.parent_run_id)
|
||||
self.has_summary = True
|
||||
|
||||
def log_model(self, model, estimator, signature=None):
|
||||
def log_model(self, model, estimator, signature=None, run_id=None):
|
||||
if not self._do_log_model:
|
||||
return
|
||||
logger.debug(f"logging model {estimator}")
|
||||
ret_message = f"Successfully log_model {estimator} to run_id {run_id}"
|
||||
optional_remove_list = (
|
||||
[] if estimator in ["transformer", "transformer_ms", "tcn", "tft"] else OPTIONAL_REMOVE_REQUIREMENT_LIST
|
||||
)
|
||||
run = mlflow.active_run()
|
||||
if run and run.info.run_id == self.parent_run_id:
|
||||
logger.debug(
|
||||
f"Current active run_id {run.info.run_id} == parent_run_id {self.parent_run_id}, Starting run_id {run_id}"
|
||||
)
|
||||
mlflow.start_run(run_id=run_id, nested=True)
|
||||
elif run and run.info.run_id != run_id:
|
||||
ret_message = (
|
||||
f"Error: Should log_model {estimator} to run_id {run_id}, but logged to run_id {run.info.run_id}"
|
||||
)
|
||||
logger.error(ret_message)
|
||||
else:
|
||||
logger.debug(f"No active run, start run_id {run_id}")
|
||||
mlflow.start_run(run_id=run_id)
|
||||
logger.debug(f"logged model {estimator} to run_id {mlflow.active_run().info.run_id}")
|
||||
if estimator.endswith("_spark"):
|
||||
mlflow.spark.log_model(model, estimator, signature=signature)
|
||||
# mlflow.spark.log_model(model, estimator, signature=signature)
|
||||
mlflow.spark.log_model(model, "model", signature=signature)
|
||||
elif estimator in ["lgbm"]:
|
||||
mlflow.lightgbm.log_model(model, estimator, signature=signature)
|
||||
@@ -352,42 +543,93 @@ class MLflowIntegration:
|
||||
elif estimator in ["prophet"]:
|
||||
mlflow.prophet.log_model(model, estimator, signature=signature)
|
||||
elif estimator in ["orbit"]:
|
||||
pass
|
||||
logger.warning(f"Unsupported model: {estimator}. No model logged.")
|
||||
else:
|
||||
mlflow.sklearn.log_model(model, estimator, signature=signature)
|
||||
future = executor.submit(
|
||||
lambda: mlflow.models.model.update_model_requirements(
|
||||
model_uri=f"runs:/{run_id}/{'model' if estimator.endswith('_spark') else estimator}",
|
||||
operation="remove",
|
||||
requirement_list=convert_requirement(REMOVE_REQUIREMENT_LIST + optional_remove_list),
|
||||
)
|
||||
)
|
||||
self.futures[future] = f"run_{run_id}_requirements_updated"
|
||||
if not run or run.info.run_id == self.parent_run_id:
|
||||
logger.debug(f"Ending current run_id {mlflow.active_run().info.run_id}")
|
||||
mlflow.end_run()
|
||||
return ret_message
|
||||
|
||||
def _pickle_and_log_artifact(self, obj, artifact_name, pickle_fname="temp_.pkl"):
|
||||
def _pickle_and_log_artifact(self, obj, artifact_name, pickle_fname="temp_.pkl", run_id=None):
|
||||
if not self._do_log_model:
|
||||
return
|
||||
return True
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pickle_fpath = os.path.join(tmpdir, pickle_fname)
|
||||
try:
|
||||
with open(pickle_fpath, "wb") as f:
|
||||
pickle.dump(obj, f)
|
||||
mlflow.log_artifact(pickle_fpath, artifact_name)
|
||||
mlflow.log_artifact(pickle_fpath, artifact_name, run_id)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to pickle and log artifact {artifact_name}, error: {e}")
|
||||
logger.debug(f"Failed to pickle and log {artifact_name}, error: {e}")
|
||||
return False
|
||||
|
||||
def pickle_and_log_automl_artifacts(self, automl, model, estimator, signature=None):
|
||||
def _log_pipeline(self, pipeline, flavor_name, pipeline_name, signature, run_id, estimator=None):
|
||||
logger.debug(f"logging pipeline {flavor_name}:{pipeline_name}:{estimator}")
|
||||
ret_message = f"Successfully _log_pipeline {flavor_name}:{pipeline_name}:{estimator} to run_id {run_id}"
|
||||
optional_remove_list = (
|
||||
[] if estimator in ["transformer", "transformer_ms", "tcn", "tft"] else OPTIONAL_REMOVE_REQUIREMENT_LIST
|
||||
)
|
||||
run = mlflow.active_run()
|
||||
if run and run.info.run_id == self.parent_run_id:
|
||||
logger.debug(
|
||||
f"Current active run_id {run.info.run_id} == parent_run_id {self.parent_run_id}, Starting run_id {run_id}"
|
||||
)
|
||||
mlflow.start_run(run_id=run_id, nested=True)
|
||||
elif run and run.info.run_id != run_id:
|
||||
ret_message = f"Error: Should _log_pipeline {flavor_name}:{pipeline_name}:{estimator} model to run_id {run_id}, but logged to run_id {run.info.run_id}"
|
||||
logger.error(ret_message)
|
||||
else:
|
||||
logger.debug(f"No active run, start run_id {run_id}")
|
||||
mlflow.start_run(run_id=run_id)
|
||||
logger.debug(
|
||||
f"logging pipeline {flavor_name}:{pipeline_name}:{estimator} to run_id {mlflow.active_run().info.run_id}"
|
||||
)
|
||||
if flavor_name == "sklearn":
|
||||
mlflow.sklearn.log_model(pipeline, pipeline_name, signature=signature)
|
||||
elif flavor_name == "spark":
|
||||
mlflow.spark.log_model(pipeline, pipeline_name, signature=signature)
|
||||
else:
|
||||
logger.warning(f"Unsupported pipeline flavor: {flavor_name}. No model logged.")
|
||||
future = executor.submit(
|
||||
lambda: mlflow.models.model.update_model_requirements(
|
||||
model_uri=f"runs:/{run_id}/{pipeline_name}",
|
||||
operation="remove",
|
||||
requirement_list=convert_requirement(REMOVE_REQUIREMENT_LIST + optional_remove_list),
|
||||
)
|
||||
)
|
||||
self.futures[future] = f"run_{run_id}_requirements_updated"
|
||||
if not run or run.info.run_id == self.parent_run_id:
|
||||
logger.debug(f"Ending current run_id {mlflow.active_run().info.run_id}")
|
||||
mlflow.end_run()
|
||||
return ret_message
|
||||
|
||||
def pickle_and_log_automl_artifacts(self, automl, model, estimator, signature=None, run_id=None):
|
||||
"""log automl artifacts to mlflow
|
||||
load back with `automl = mlflow.pyfunc.load_model(model_run_id_or_uri)`, then do prediction with `automl.predict(X)`
|
||||
"""
|
||||
logger.debug(f"logging automl artifacts {estimator}")
|
||||
self._pickle_and_log_artifact(automl.feature_transformer, "feature_transformer", "feature_transformer.pkl")
|
||||
self._pickle_and_log_artifact(automl.label_transformer, "label_transformer", "label_transformer.pkl")
|
||||
# Test test_mlflow 1 and 4 will get error: TypeError: cannot pickle '_io.TextIOWrapper' object
|
||||
# try:
|
||||
# self._pickle_and_log_artifact(automl, "automl", "automl.pkl")
|
||||
# except TypeError:
|
||||
# pass
|
||||
logger.debug(f"logging automl estimator {estimator}")
|
||||
# self._pickle_and_log_artifact(
|
||||
# automl.feature_transformer, "feature_transformer", "feature_transformer.pkl", run_id
|
||||
# )
|
||||
# self._pickle_and_log_artifact(automl.label_transformer, "label_transformer", "label_transformer.pkl", run_id)
|
||||
if estimator.endswith("_spark"):
|
||||
# spark pipeline is not supported yet
|
||||
return
|
||||
feature_transformer = automl.feature_transformer
|
||||
if isinstance(feature_transformer, Pipeline):
|
||||
if isinstance(feature_transformer, Pipeline) and not estimator.endswith("_spark"):
|
||||
pipeline = feature_transformer
|
||||
pipeline.steps.append(("estimator", model))
|
||||
elif isinstance(feature_transformer, SparkPipeline):
|
||||
elif isinstance(feature_transformer, SparkPipelineModel) and estimator.endswith("_spark"):
|
||||
pipeline = feature_transformer
|
||||
pipeline.stages.append(model)
|
||||
elif not estimator.endswith("_spark"):
|
||||
@@ -395,24 +637,26 @@ class MLflowIntegration:
|
||||
steps.append(("estimator", model))
|
||||
pipeline = Pipeline(steps)
|
||||
else:
|
||||
stages = [feature_transformer]
|
||||
stages = []
|
||||
if feature_transformer is not None:
|
||||
stages.append(feature_transformer)
|
||||
stages.append(model)
|
||||
pipeline = SparkPipeline(stages=stages)
|
||||
if isinstance(pipeline, SparkPipeline):
|
||||
pipeline = SparkPipelineModel(stages=stages)
|
||||
if isinstance(pipeline, SparkPipelineModel):
|
||||
logger.debug(f"logging spark pipeline {estimator}")
|
||||
mlflow.spark.log_model(pipeline, "automl_pipeline", signature=signature)
|
||||
self._log_pipeline(pipeline, "spark", "model", signature, run_id, estimator)
|
||||
else:
|
||||
# Add a log named "model" to fit default settings
|
||||
logger.debug(f"logging sklearn pipeline {estimator}")
|
||||
mlflow.sklearn.log_model(pipeline, "automl_pipeline", signature=signature)
|
||||
mlflow.sklearn.log_model(pipeline, "model", signature=signature)
|
||||
self._log_pipeline(pipeline, "sklearn", "model", signature, run_id, estimator)
|
||||
return f"Successfully pickle_and_log_automl_artifacts {estimator} to run_id {run_id}"
|
||||
|
||||
@time_it
|
||||
def record_state(self, automl, search_state, estimator):
|
||||
_st = time.time()
|
||||
automl_metric_name = (
|
||||
automl._state.metric if isinstance(automl._state.metric, str) else automl._state.error_metric
|
||||
)
|
||||
|
||||
if automl._state.error_metric.startswith("1-"):
|
||||
automl_metric_value = 1 - search_state.val_loss
|
||||
elif automl._state.error_metric.startswith("-"):
|
||||
@@ -425,6 +669,8 @@ class MLflowIntegration:
|
||||
else:
|
||||
config = search_state.config
|
||||
|
||||
self.automl_user_configurations = safe_json_dumps(automl._automl_user_configurations)
|
||||
|
||||
info = {
|
||||
"metrics": {
|
||||
"iter_counter": automl._track_iter,
|
||||
@@ -445,7 +691,7 @@ class MLflowIntegration:
|
||||
"flaml.meric": automl_metric_name,
|
||||
"flaml.run_source": "flaml-automl",
|
||||
"flaml.log_type": self.log_type,
|
||||
"flaml.automl_user_configurations": safe_json_dumps(automl._automl_user_configurations),
|
||||
"flaml.automl_user_configurations": self.automl_user_configurations,
|
||||
},
|
||||
"params": {
|
||||
"sample_size": search_state.sample_size,
|
||||
@@ -472,33 +718,70 @@ class MLflowIntegration:
|
||||
run_name = f"{self.parent_run_name}_child_{self.child_counter}"
|
||||
else:
|
||||
run_name = None
|
||||
_t1 = time.time()
|
||||
wait(self.futures_log_model)
|
||||
_t2 = time.time() - _t1
|
||||
logger.debug(f"wait futures_log_model in record_state took {_t2} seconds")
|
||||
with mlflow.start_run(nested=True, run_name=run_name) as child_run:
|
||||
self._log_info_to_run(info, child_run.info.run_id, log_params=True)
|
||||
future = executor.submit(lambda: self._log_info_to_run(info, child_run.info.run_id, log_params=True))
|
||||
self.futures[future] = f"iter_{automl._track_iter}_log_info_to_run"
|
||||
future = executor.submit(lambda: self._log_automl_configurations(child_run.info.run_id))
|
||||
self.futures[future] = f"iter_{automl._track_iter}_log_automl_configurations"
|
||||
if automl._state.model_history:
|
||||
self.log_model(
|
||||
search_state.trained_estimator._model, estimator, signature=automl.estimator_signature
|
||||
)
|
||||
self.pickle_and_log_automl_artifacts(
|
||||
automl, search_state.trained_estimator, estimator, signature=automl.pipeline_signature
|
||||
)
|
||||
if estimator.endswith("_spark"):
|
||||
future = executor.submit(
|
||||
lambda: self.log_model(
|
||||
search_state.trained_estimator._model,
|
||||
estimator,
|
||||
automl.estimator_signature,
|
||||
child_run.info.run_id,
|
||||
)
|
||||
)
|
||||
self.futures_log_model[future] = f"record_state-log_model_{estimator}"
|
||||
else:
|
||||
future = executor.submit(
|
||||
lambda: self.pickle_and_log_automl_artifacts(
|
||||
automl,
|
||||
search_state.trained_estimator,
|
||||
estimator,
|
||||
automl.pipeline_signature,
|
||||
child_run.info.run_id,
|
||||
)
|
||||
)
|
||||
self.futures_log_model[future] = f"record_state-pickle_and_log_automl_artifacts_{estimator}"
|
||||
self.manual_run_ids.append(child_run.info.run_id)
|
||||
self.child_counter += 1
|
||||
return f"Successfully record_state iteration {automl._track_iter}"
|
||||
|
||||
@time_it
|
||||
def log_automl(self, automl):
|
||||
self.set_best_iter(automl)
|
||||
if self.autolog:
|
||||
if self.parent_run_id is not None:
|
||||
mlflow.start_run(run_id=self.parent_run_id, experiment_id=self.experiment_id)
|
||||
mlflow.log_metric("best_validation_loss", automl._state.best_loss)
|
||||
mlflow.log_metric("best_iteration", automl._best_iteration)
|
||||
mlflow.log_metric("num_child_runs", len(self.infos))
|
||||
if automl._trained_estimator is not None and not self.has_model:
|
||||
self.log_model(
|
||||
automl._trained_estimator._model, automl.best_estimator, signature=automl.estimator_signature
|
||||
)
|
||||
self.pickle_and_log_automl_artifacts(
|
||||
automl, automl.model, automl.best_estimator, signature=automl.pipeline_signature
|
||||
)
|
||||
mlflow.log_metrics(
|
||||
{
|
||||
"best_validation_loss": automl._state.best_loss,
|
||||
"best_iteration": automl._best_iteration,
|
||||
"num_child_runs": len(self.infos),
|
||||
}
|
||||
)
|
||||
if (
|
||||
automl._trained_estimator is not None
|
||||
and not self.has_model
|
||||
and automl._trained_estimator._model is not None
|
||||
):
|
||||
if automl.best_estimator.endswith("_spark"):
|
||||
self.log_model(
|
||||
automl._trained_estimator._model,
|
||||
automl.best_estimator,
|
||||
automl.estimator_signature,
|
||||
self.parent_run_id,
|
||||
)
|
||||
else:
|
||||
self.pickle_and_log_automl_artifacts(
|
||||
automl, automl.model, automl.best_estimator, automl.pipeline_signature, self.parent_run_id
|
||||
)
|
||||
self.has_model = True
|
||||
|
||||
self.adopt_children(automl)
|
||||
@@ -515,30 +798,65 @@ class MLflowIntegration:
|
||||
if "ml" in conf.keys():
|
||||
conf = conf["ml"]
|
||||
|
||||
mlflow.log_params(conf)
|
||||
mlflow.log_param("best_learner", automl._best_estimator)
|
||||
mlflow.log_params({**conf, "best_learner": automl._best_estimator}, run_id=self.parent_run_id)
|
||||
if not self.has_summary:
|
||||
logger.info(f"logging best model {automl.best_estimator}")
|
||||
self.copy_mlflow_run(best_mlflow_run_id, self.parent_run_id)
|
||||
future = executor.submit(lambda: self.copy_mlflow_run(best_mlflow_run_id, self.parent_run_id))
|
||||
self.futures[future] = "log_automl_copy_mlflow_run"
|
||||
future = executor.submit(lambda: self._log_automl_configurations(self.parent_run_id))
|
||||
self.futures[future] = "log_automl_log_automl_configurations"
|
||||
self.has_summary = True
|
||||
if automl._trained_estimator is not None and not self.has_model:
|
||||
self.log_model(
|
||||
automl._trained_estimator._model,
|
||||
automl.best_estimator,
|
||||
signature=automl.estimator_signature,
|
||||
)
|
||||
self.pickle_and_log_automl_artifacts(
|
||||
automl, automl.model, automl.best_estimator, signature=automl.pipeline_signature
|
||||
)
|
||||
_t1 = time.time()
|
||||
wait(self.futures_log_model)
|
||||
_t2 = time.time() - _t1
|
||||
logger.debug(f"wait futures_log_model in log_automl took {_t2} seconds")
|
||||
if (
|
||||
automl._trained_estimator is not None
|
||||
and not self.has_model
|
||||
and automl._trained_estimator._model is not None
|
||||
):
|
||||
if automl.best_estimator.endswith("_spark"):
|
||||
future = executor.submit(
|
||||
lambda: self.log_model(
|
||||
automl._trained_estimator._model,
|
||||
automl.best_estimator,
|
||||
signature=automl.estimator_signature,
|
||||
run_id=self.parent_run_id,
|
||||
)
|
||||
)
|
||||
self.futures_log_model[future] = f"log_automl-log_model_{automl.best_estimator}"
|
||||
else:
|
||||
future = executor.submit(
|
||||
lambda: self.pickle_and_log_automl_artifacts(
|
||||
automl,
|
||||
automl.model,
|
||||
automl.best_estimator,
|
||||
signature=automl.pipeline_signature,
|
||||
run_id=self.parent_run_id,
|
||||
)
|
||||
)
|
||||
self.futures_log_model[
|
||||
future
|
||||
] = f"log_automl-pickle_and_log_automl_artifacts_{automl.best_estimator}"
|
||||
self.has_model = True
|
||||
|
||||
def resume_mlflow(self):
|
||||
if len(self.resume_params) > 0:
|
||||
mlflow.autolog(**self.resume_params)
|
||||
|
||||
def _log_automl_configurations(self, run_id):
|
||||
self.mlflow_client.log_text(
|
||||
run_id=run_id,
|
||||
text=self.automl_user_configurations,
|
||||
artifact_file="automl_configurations/automl_user_configurations.json",
|
||||
)
|
||||
return f"Successfully _log_automl_configurations to run_id {run_id}"
|
||||
|
||||
def _log_info_to_run(self, info, run_id, log_params=False):
|
||||
_metrics = [Metric(key, value, int(time.time() * 1000), 0) for key, value in info["metrics"].items()]
|
||||
_tags = [RunTag(key, str(value)) for key, value in info["tags"].items()]
|
||||
_tags = [
|
||||
RunTag(key, str(value)[:5000]) for key, value in info["tags"].items()
|
||||
] # AML will raise error if value length > 5000
|
||||
_params = [
|
||||
Param(key, str(value))
|
||||
for key, value in info["params"].items()
|
||||
@@ -554,6 +872,7 @@ class MLflowIntegration:
|
||||
_tags = [RunTag("mlflow.parentRunId", run_id)]
|
||||
self.mlflow_client.log_batch(run_id=run.info.run_id, metrics=_metrics, params=[], tags=_tags)
|
||||
del info["submetrics"]["values"]
|
||||
return f"Successfully _log_info_to_run to run_id {run_id}"
|
||||
|
||||
def adopt_children(self, result=None):
|
||||
"""
|
||||
|
||||
37
flaml/tune/logger.py
Normal file
37
flaml/tune/logger.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
|
||||
class ColoredFormatter(logging.Formatter):
|
||||
# ANSI escape codes for colors
|
||||
COLORS = {
|
||||
# logging.DEBUG: "\033[36m", # Cyan
|
||||
# logging.INFO: "\033[32m", # Green
|
||||
logging.WARNING: "\033[33m", # Yellow
|
||||
logging.ERROR: "\033[31m", # Red
|
||||
logging.CRITICAL: "\033[1;31m", # Bright Red
|
||||
}
|
||||
RESET = "\033[0m" # Reset to default
|
||||
|
||||
def __init__(self, fmt, datefmt, use_color=True):
|
||||
super().__init__(fmt, datefmt)
|
||||
self.use_color = use_color
|
||||
|
||||
def format(self, record):
|
||||
formatted = super().format(record)
|
||||
if self.use_color:
|
||||
color = self.COLORS.get(record.levelno, "")
|
||||
if color:
|
||||
return f"{color}{formatted}{self.RESET}"
|
||||
return formatted
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
use_color = True
|
||||
if os.getenv("FLAML_LOG_NO_COLOR"):
|
||||
use_color = False
|
||||
|
||||
logger_formatter = ColoredFormatter(
|
||||
"[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s", "%m-%d %H:%M:%S", use_color
|
||||
)
|
||||
logger.propagate = False
|
||||
@@ -162,6 +162,10 @@ def broadcast_code(custom_code="", file_name="mylearner"):
|
||||
assert isinstance(MyLargeLGBM(), LGBMEstimator)
|
||||
```
|
||||
"""
|
||||
# Check if Spark is available
|
||||
spark_available, _ = check_spark()
|
||||
|
||||
# Write to local driver file system
|
||||
flaml_path = os.path.dirname(os.path.abspath(__file__))
|
||||
custom_code = textwrap.dedent(custom_code)
|
||||
custom_path = os.path.join(flaml_path, file_name + ".py")
|
||||
@@ -169,6 +173,24 @@ def broadcast_code(custom_code="", file_name="mylearner"):
|
||||
with open(custom_path, "w") as f:
|
||||
f.write(custom_code)
|
||||
|
||||
# If using Spark, broadcast the code content to executors
|
||||
if spark_available:
|
||||
spark = SparkSession.builder.getOrCreate()
|
||||
bc_code = spark.sparkContext.broadcast(custom_code)
|
||||
|
||||
# Execute a job to ensure the code is distributed to all executors
|
||||
def _write_code(bc):
|
||||
code = bc.value
|
||||
import os
|
||||
|
||||
module_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), file_name + ".py")
|
||||
os.makedirs(os.path.dirname(module_path), exist_ok=True)
|
||||
with open(module_path, "w") as f:
|
||||
f.write(code)
|
||||
return True
|
||||
|
||||
spark.sparkContext.parallelize(range(1)).map(lambda _: _write_code(bc_code)).collect()
|
||||
|
||||
return custom_path
|
||||
|
||||
|
||||
|
||||
@@ -21,11 +21,11 @@ except (ImportError, AssertionError):
|
||||
from .analysis import ExperimentAnalysis as EA
|
||||
else:
|
||||
ray_available = True
|
||||
|
||||
import logging
|
||||
|
||||
from flaml.tune.spark.utils import PySparkOvertimeMonitor, check_spark
|
||||
|
||||
from .logger import logger, logger_formatter
|
||||
from .result import DEFAULT_METRIC
|
||||
from .trial import Trial
|
||||
|
||||
@@ -41,8 +41,6 @@ except ImportError:
|
||||
internal_mlflow = False
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.propagate = False
|
||||
_use_ray = True
|
||||
_runner = None
|
||||
_verbose = 0
|
||||
@@ -197,9 +195,16 @@ def report(_metric=None, **kwargs):
|
||||
global _training_iteration
|
||||
if _use_ray:
|
||||
try:
|
||||
from ray import tune
|
||||
from ray import __version__ as ray_version
|
||||
|
||||
return tune.report(_metric, **kwargs)
|
||||
if ray_version.startswith("1."):
|
||||
from ray import tune
|
||||
|
||||
return tune.report(_metric, **kwargs)
|
||||
else: # ray>=2
|
||||
from ray.air import session
|
||||
|
||||
return session.report(metrics={"metric": _metric, **kwargs})
|
||||
except ImportError:
|
||||
# calling tune.report() outside tune.run()
|
||||
return
|
||||
@@ -514,10 +519,6 @@ def run(
|
||||
elif not logger.hasHandlers():
|
||||
# Add the console handler.
|
||||
_ch = logging.StreamHandler(stream=sys.stdout)
|
||||
logger_formatter = logging.Formatter(
|
||||
"[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s",
|
||||
"%m-%d %H:%M:%S",
|
||||
)
|
||||
_ch.setFormatter(logger_formatter)
|
||||
logger.addHandler(_ch)
|
||||
if verbose <= 2:
|
||||
@@ -745,10 +746,16 @@ def run(
|
||||
max_concurrent = max(1, search_alg.max_concurrent)
|
||||
else:
|
||||
max_concurrent = max(1, max_spark_parallelism)
|
||||
passed_in_n_concurrent_trials = max(n_concurrent_trials, max_concurrent)
|
||||
n_concurrent_trials = min(
|
||||
n_concurrent_trials if n_concurrent_trials > 0 else num_executors,
|
||||
max_concurrent,
|
||||
)
|
||||
if n_concurrent_trials < passed_in_n_concurrent_trials:
|
||||
logger.warning(
|
||||
f"The actual concurrent trials is {n_concurrent_trials}. You can set the environment "
|
||||
f"variable `FLAML_MAX_CONCURRENT` to '{passed_in_n_concurrent_trials}' to override the detected num of executors."
|
||||
)
|
||||
with parallel_backend("spark"):
|
||||
with Parallel(n_jobs=n_concurrent_trials, verbose=max(0, (verbose - 1) * 50)) as parallel:
|
||||
try:
|
||||
@@ -770,7 +777,7 @@ def run(
|
||||
and num_failures < upperbound_num_failures
|
||||
):
|
||||
if automl_info and automl_info[0] > 0 and time_budget_s < np.inf:
|
||||
time_budget_s -= automl_info[0]
|
||||
time_budget_s -= automl_info[0] * n_concurrent_trials
|
||||
logger.debug(f"Remaining time budget with mlflow log latency: {time_budget_s} seconds.")
|
||||
while len(_runner.running_trials) < n_concurrent_trials:
|
||||
# suggest trials for spark
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "2.3.3"
|
||||
__version__ = "2.3.5"
|
||||
|
||||
3
pytest.ini
Normal file
3
pytest.ini
Normal file
@@ -0,0 +1,3 @@
|
||||
[pytest]
|
||||
markers =
|
||||
spark: mark a test as requiring Spark
|
||||
5
setup.py
5
setup.py
@@ -73,7 +73,7 @@ setuptools.setup(
|
||||
"psutil==5.8.0",
|
||||
"dataclasses",
|
||||
"transformers[torch]==4.26",
|
||||
"datasets",
|
||||
"datasets<=3.5.0",
|
||||
"nltk<=3.8.1", # 3.8.2 doesn't work with mlflow
|
||||
"rouge_score",
|
||||
"hcrystalball==0.1.10",
|
||||
@@ -170,10 +170,9 @@ setuptools.setup(
|
||||
"Operating System :: OS Independent",
|
||||
# Specify the Python versions you support here.
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
],
|
||||
python_requires=">=3.8",
|
||||
python_requires=">=3.9",
|
||||
)
|
||||
|
||||
@@ -17,6 +17,8 @@ from flaml import AutoML
|
||||
from flaml.automl.ml import sklearn_metric_loss_score
|
||||
from flaml.tune.spark.utils import check_spark
|
||||
|
||||
pytestmark = pytest.mark.spark
|
||||
|
||||
leaderboard = defaultdict(dict)
|
||||
|
||||
warnings.simplefilter(action="ignore")
|
||||
|
||||
@@ -477,7 +477,10 @@ def test_forecast_classification(budget=5):
|
||||
def get_stalliion_data():
|
||||
from pytorch_forecasting.data.examples import get_stallion_data
|
||||
|
||||
data = get_stallion_data()
|
||||
# data = get_stallion_data()
|
||||
data = pd.read_parquet(
|
||||
"https://raw.githubusercontent.com/sktime/pytorch-forecasting/refs/heads/main/examples/data/stallion.parquet"
|
||||
)
|
||||
# add time index - For datasets with no missing values, FLAML will automate this process
|
||||
data["time_idx"] = data["date"].dt.year * 12 + data["date"].dt.month
|
||||
data["time_idx"] -= data["time_idx"].min()
|
||||
|
||||
51
test/automl/test_max_iter_1.py
Normal file
51
test/automl/test_max_iter_1.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import mlflow
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from flaml import AutoML
|
||||
|
||||
|
||||
def test_max_iter_1():
|
||||
date_rng = pd.date_range(start="2024-01-01", periods=100, freq="H")
|
||||
X = pd.DataFrame({"ds": date_rng})
|
||||
y_train_24h = np.random.rand(len(X)) * 100
|
||||
|
||||
# AutoML
|
||||
settings = {
|
||||
"max_iter": 1,
|
||||
"estimator_list": ["xgboost", "lgbm"],
|
||||
"starting_points": {"xgboost": {}, "lgbm": {}},
|
||||
"task": "ts_forecast",
|
||||
"log_file_name": "test_max_iter_1.log",
|
||||
"seed": 41,
|
||||
"mlflow_exp_name": "TestExp-max_iter-1",
|
||||
"use_spark": False,
|
||||
"n_concurrent_trials": 1,
|
||||
"verbose": 1,
|
||||
"featurization": "off",
|
||||
"metric": "rmse",
|
||||
"mlflow_logging": True,
|
||||
}
|
||||
|
||||
automl = AutoML(**settings)
|
||||
|
||||
with mlflow.start_run(run_name="AutoMLModel-XGBoost-and-LGBM-max_iter_1"):
|
||||
automl.fit(
|
||||
X_train=X,
|
||||
y_train=y_train_24h,
|
||||
period=24,
|
||||
X_val=X,
|
||||
y_val=y_train_24h,
|
||||
split_ratio=0,
|
||||
force_cancel=False,
|
||||
)
|
||||
|
||||
assert automl.model is not None, "AutoML failed to return a model"
|
||||
assert automl.best_run_id is not None, "Best run ID should not be None with mlflow logging"
|
||||
|
||||
print("Best model:", automl.model)
|
||||
print("Best run ID:", automl.best_run_id)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_max_iter_1()
|
||||
@@ -10,6 +10,18 @@ from flaml import AutoML
|
||||
|
||||
|
||||
class TestMLFlowLoggingParam:
|
||||
def test_update_and_install_requirements(self):
|
||||
import mlflow
|
||||
from sklearn import tree
|
||||
|
||||
from flaml.fabric.mlflow import update_and_install_requirements
|
||||
|
||||
with mlflow.start_run(run_name="test") as run:
|
||||
sk_model = tree.DecisionTreeClassifier()
|
||||
mlflow.sklearn.log_model(sk_model, "model", registered_model_name="test")
|
||||
|
||||
update_and_install_requirements(run_id=run.info.run_id)
|
||||
|
||||
def test_should_start_new_run_by_default(self, automl_settings):
|
||||
with mlflow.start_run() as parent_run:
|
||||
automl = AutoML()
|
||||
|
||||
@@ -143,4 +143,5 @@ def test_prep():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_lrl2()
|
||||
test_prep()
|
||||
@@ -1,4 +1,5 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.datasets import fetch_openml, load_iris
|
||||
from sklearn.metrics import accuracy_score
|
||||
from sklearn.model_selection import GroupKFold, KFold, train_test_split
|
||||
@@ -59,8 +60,6 @@ def test_groups_for_classification_task():
|
||||
|
||||
X, y = load_wine(return_X_y=True)
|
||||
|
||||
import numpy as np
|
||||
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
@@ -118,6 +117,43 @@ def test_groups_for_regression_task():
|
||||
automl.fit(X_train, y_train, **automl_settings)
|
||||
|
||||
|
||||
def test_groups_with_sample_weights():
|
||||
"""Verifies that sample weights can be used with group splits i.e. that https://github.com/microsoft/FLAML/issues/1396 remains fixed"""
|
||||
iris_dict_data = load_iris(as_frame=True) # numpy arrays
|
||||
iris_data = iris_dict_data["frame"] # pandas dataframe data + target
|
||||
iris_data["cluster"] = np.random.randint(0, 5, iris_data.shape[0])
|
||||
automl = AutoML()
|
||||
|
||||
X = iris_data[["sepal length (cm)", "sepal width (cm)", "petal length (cm)"]].to_numpy()
|
||||
y = iris_data["petal width (cm)"]
|
||||
sample_weight = pd.Series(np.random.rand(X.shape[0]))
|
||||
(
|
||||
X_train,
|
||||
X_test,
|
||||
y_train,
|
||||
y_test,
|
||||
groups_train,
|
||||
groups_test,
|
||||
sample_weight_train,
|
||||
sample_weight_test,
|
||||
) = train_test_split(X, y, iris_data["cluster"], sample_weight, random_state=42)
|
||||
automl_settings = {
|
||||
"max_iter": 5,
|
||||
"time_budget": -1,
|
||||
"metric": "r2",
|
||||
"task": "regression",
|
||||
"log_file_name": "error.log",
|
||||
"log_type": "all",
|
||||
"estimator_list": ["lgbm"],
|
||||
"eval_method": "cv",
|
||||
"split_type": "group",
|
||||
"groups": groups_train,
|
||||
"sample_weight": sample_weight_train,
|
||||
}
|
||||
automl.fit(X_train, y_train, **automl_settings)
|
||||
assert automl.model is not None
|
||||
|
||||
|
||||
def test_stratified_groupkfold():
|
||||
from minio.error import ServerError
|
||||
from sklearn.model_selection import StratifiedGroupKFold
|
||||
|
||||
@@ -24,6 +24,8 @@ model_path_list = [
|
||||
if sys.platform.startswith("darwin") and sys.version_info[0] == 3 and sys.version_info[1] == 11:
|
||||
pytest.skip("skipping Python 3.11 on MacOS", allow_module_level=True)
|
||||
|
||||
pytestmark = pytest.mark.spark # set to spark as parallel testing raised RuntimeError
|
||||
|
||||
|
||||
def test_switch_1_1():
|
||||
data_idx, model_path_idx = 0, 0
|
||||
|
||||
@@ -5,6 +5,8 @@ import sys
|
||||
import pytest
|
||||
from utils import get_automl_settings, get_toy_data_seqclassification
|
||||
|
||||
pytestmark = pytest.mark.spark # set to spark as parallel testing raised MlflowException of changing parameter
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform in ["darwin", "win32"], reason="do not run on mac os or windows")
|
||||
def test_cv():
|
||||
|
||||
@@ -10,6 +10,10 @@ from flaml.default import portfolio
|
||||
if sys.platform.startswith("darwin") and sys.version_info[0] == 3 and sys.version_info[1] == 11:
|
||||
pytest.skip("skipping Python 3.11 on MacOS", allow_module_level=True)
|
||||
|
||||
pytestmark = (
|
||||
pytest.mark.spark
|
||||
) # set to spark as parallel testing raised ValueError: Feature NonExisting not implemented.
|
||||
|
||||
|
||||
def pop_args(fit_kwargs):
|
||||
fit_kwargs.pop("max_iter", None)
|
||||
|
||||
@@ -3,11 +3,13 @@ import sys
|
||||
import warnings
|
||||
|
||||
import mlflow
|
||||
import numpy as np
|
||||
import pytest
|
||||
import sklearn.datasets as skds
|
||||
from packaging.version import Version
|
||||
|
||||
from flaml import AutoML
|
||||
from flaml.automl.data import auto_convert_dtypes_pandas, auto_convert_dtypes_spark, get_random_dataframe
|
||||
from flaml.tune.spark.utils import check_spark
|
||||
|
||||
warnings.simplefilter(action="ignore")
|
||||
@@ -58,7 +60,7 @@ if sys.version_info >= (3, 11):
|
||||
else:
|
||||
skip_py311 = False
|
||||
|
||||
pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
|
||||
pytestmark = [pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests."), pytest.mark.spark]
|
||||
|
||||
|
||||
def _test_spark_synapseml_lightgbm(spark=None, task="classification"):
|
||||
@@ -296,11 +298,88 @@ def _test_spark_large_df():
|
||||
print("time cost in minutes: ", (end_time - start_time) / 60)
|
||||
|
||||
|
||||
def test_get_random_dataframe():
|
||||
# Test with default parameters
|
||||
df = get_random_dataframe(n_rows=50, ratio_none=0.2, seed=123)
|
||||
assert df.shape == (50, 14) # Default is 200 rows and 14 columns
|
||||
|
||||
# Test column types
|
||||
assert "timestamp" in df.columns and np.issubdtype(df["timestamp"].dtype, np.datetime64)
|
||||
assert "id" in df.columns and np.issubdtype(df["id"].dtype, np.integer)
|
||||
assert "score" in df.columns and np.issubdtype(df["score"].dtype, np.floating)
|
||||
assert "category" in df.columns and df["category"].dtype.name == "category"
|
||||
|
||||
|
||||
def test_auto_convert_dtypes_pandas():
|
||||
# Create a test DataFrame with various types
|
||||
import pandas as pd
|
||||
|
||||
test_df = pd.DataFrame(
|
||||
{
|
||||
"int_col": ["1", "2", "3", "4", "5", "6", "6"],
|
||||
"float_col": ["1.1", "2.2", "3.3", "NULL", "5.5", "6.6", "6.6"],
|
||||
"date_col": ["2021-01-01", "2021-02-01", "NA", "2021-04-01", "2021-05-01", "2021-06-01", "2021-06-01"],
|
||||
"cat_col": ["A", "B", "A", "A", "B", "A", "B"],
|
||||
"string_col": ["text1", "text2", "text3", "text4", "text5", "text6", "text7"],
|
||||
}
|
||||
)
|
||||
|
||||
# Convert dtypes
|
||||
converted_df, schema = auto_convert_dtypes_pandas(test_df)
|
||||
|
||||
# Check conversions
|
||||
assert schema["int_col"] == "int"
|
||||
assert schema["float_col"] == "double"
|
||||
assert schema["date_col"] == "timestamp"
|
||||
assert schema["cat_col"] == "category"
|
||||
assert schema["string_col"] == "string"
|
||||
|
||||
|
||||
def test_auto_convert_dtypes_spark():
|
||||
"""Test auto_convert_dtypes_spark function with various data types."""
|
||||
import pandas as pd
|
||||
|
||||
# Create a test DataFrame with various types
|
||||
test_pdf = pd.DataFrame(
|
||||
{
|
||||
"int_col": ["1", "2", "3", "4", "NA"],
|
||||
"float_col": ["1.1", "2.2", "3.3", "NULL", "5.5"],
|
||||
"date_col": ["2021-01-01", "2021-02-01", "NA", "2021-04-01", "2021-05-01"],
|
||||
"cat_col": ["A", "B", "A", "C", "B"],
|
||||
"string_col": ["text1", "text2", "text3", "text4", "text5"],
|
||||
}
|
||||
)
|
||||
|
||||
# Convert pandas DataFrame to Spark DataFrame
|
||||
test_df = spark.createDataFrame(test_pdf)
|
||||
|
||||
# Convert dtypes
|
||||
converted_df, schema = auto_convert_dtypes_spark(test_df)
|
||||
|
||||
# Check conversions
|
||||
assert schema["int_col"] == "int"
|
||||
assert schema["float_col"] == "double"
|
||||
assert schema["date_col"] == "timestamp"
|
||||
assert schema["cat_col"] == "string" # Conceptual category in schema
|
||||
assert schema["string_col"] == "string"
|
||||
|
||||
# Verify the actual data types from the Spark DataFrame
|
||||
spark_dtypes = dict(converted_df.dtypes)
|
||||
assert spark_dtypes["int_col"] == "int"
|
||||
assert spark_dtypes["float_col"] == "double"
|
||||
assert spark_dtypes["date_col"] == "timestamp"
|
||||
assert spark_dtypes["cat_col"] == "string" # In Spark, categories are still strings
|
||||
assert spark_dtypes["string_col"] == "string"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_spark_synapseml_classification()
|
||||
test_spark_synapseml_regression()
|
||||
test_spark_synapseml_rank()
|
||||
test_spark_input_df()
|
||||
test_get_random_dataframe()
|
||||
test_auto_convert_dtypes_pandas()
|
||||
test_auto_convert_dtypes_spark()
|
||||
|
||||
# import cProfile
|
||||
# import pstats
|
||||
|
||||
@@ -25,7 +25,7 @@ os.environ["FLAML_MAX_CONCURRENT"] = "2"
|
||||
spark_available, _ = check_spark()
|
||||
skip_spark = not spark_available
|
||||
|
||||
pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
|
||||
pytestmark = [pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests."), pytest.mark.spark]
|
||||
|
||||
|
||||
def test_parallel_xgboost(hpo_method=None, data_size=1000):
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
from sklearn.datasets import load_wine
|
||||
|
||||
from flaml import AutoML
|
||||
@@ -24,6 +25,8 @@ if os.path.exists(os.path.join(os.getcwd(), "test", "spark", "custom_mylearner.p
|
||||
else:
|
||||
skip_my_learner = True
|
||||
|
||||
pytestmark = pytest.mark.spark
|
||||
|
||||
|
||||
class TestEnsemble(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
|
||||
@@ -9,7 +9,7 @@ from flaml.tune.spark.utils import check_spark
|
||||
spark_available, _ = check_spark()
|
||||
skip_spark = not spark_available
|
||||
|
||||
pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
|
||||
pytestmark = [pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests."), pytest.mark.spark]
|
||||
|
||||
os.environ["FLAML_MAX_CONCURRENT"] = "2"
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ try:
|
||||
from pyspark.ml.feature import VectorAssembler
|
||||
except ImportError:
|
||||
pass
|
||||
pytestmark = pytest.mark.spark
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
skip_spark = importlib.util.find_spec("pyspark") is None
|
||||
|
||||
@@ -2,6 +2,7 @@ import os
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import scipy.sparse
|
||||
from sklearn.datasets import load_iris, load_wine
|
||||
|
||||
@@ -12,6 +13,7 @@ from flaml.tune.spark.utils import check_spark
|
||||
|
||||
spark_available, _ = check_spark()
|
||||
skip_spark = not spark_available
|
||||
pytestmark = pytest.mark.spark
|
||||
|
||||
os.environ["FLAML_MAX_CONCURRENT"] = "2"
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ from flaml.tune.spark.utils import check_spark
|
||||
spark_available, _ = check_spark()
|
||||
skip_spark = not spark_available
|
||||
|
||||
pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
|
||||
pytestmark = [pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests."), pytest.mark.spark]
|
||||
|
||||
here = os.path.abspath(os.path.dirname(__file__))
|
||||
os.environ["FLAML_MAX_CONCURRENT"] = "2"
|
||||
|
||||
@@ -25,7 +25,7 @@ try:
|
||||
except ImportError:
|
||||
skip_spark = True
|
||||
|
||||
pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
|
||||
pytestmark = [pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests."), pytest.mark.spark]
|
||||
|
||||
|
||||
def test_overtime():
|
||||
|
||||
@@ -11,7 +11,7 @@ from flaml.tune.spark.utils import check_spark
|
||||
spark_available, _ = check_spark()
|
||||
skip_spark = not spark_available
|
||||
|
||||
pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
|
||||
pytestmark = [pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests."), pytest.mark.spark]
|
||||
|
||||
os.environ["FLAML_MAX_CONCURRENT"] = "2"
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ from flaml.tune.spark.utils import check_spark
|
||||
spark_available, _ = check_spark()
|
||||
skip_spark = not spark_available
|
||||
|
||||
pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
|
||||
pytestmark = [pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests."), pytest.mark.spark]
|
||||
|
||||
os.environ["FLAML_MAX_CONCURRENT"] = "2"
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
|
||||
@@ -36,7 +36,7 @@ except ImportError:
|
||||
print("Spark is not installed. Skip all spark tests.")
|
||||
skip_spark = True
|
||||
|
||||
pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
|
||||
pytestmark = [pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests."), pytest.mark.spark]
|
||||
|
||||
|
||||
def test_with_parameters_spark():
|
||||
|
||||
@@ -59,6 +59,17 @@ def _test_hf_data():
|
||||
except requests.exceptions.ConnectionError:
|
||||
return
|
||||
|
||||
# Tests will only run if there is a GPU available
|
||||
try:
|
||||
import ray
|
||||
|
||||
pg = ray.util.placement_group([{"CPU": 1, "GPU": 1}])
|
||||
|
||||
if not pg.wait(timeout_seconds=10): # Wait 10 seconds for resources
|
||||
raise RuntimeError("No available node types can fulfill resource request!")
|
||||
except RuntimeError:
|
||||
return
|
||||
|
||||
custom_sent_keys = ["sentence1", "sentence2"]
|
||||
label_key = "label"
|
||||
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
"name": "website",
|
||||
"version": "0.0.0",
|
||||
"private": true,
|
||||
"resolutions" :{
|
||||
"nth-check":"2.0.1",
|
||||
"trim":"0.0.3",
|
||||
"resolutions": {
|
||||
"nth-check": "2.0.1",
|
||||
"trim": "0.0.3",
|
||||
"got": "11.8.5",
|
||||
"node-forge": "1.3.0",
|
||||
"minimatch": "3.0.5",
|
||||
@@ -12,7 +12,7 @@
|
||||
"eta": "2.0.0",
|
||||
"@sideway/formula": "3.0.1",
|
||||
"http-cache-semantics": "4.1.1"
|
||||
},
|
||||
},
|
||||
"scripts": {
|
||||
"docusaurus": "docusaurus",
|
||||
"start": "docusaurus start",
|
||||
@@ -33,13 +33,13 @@
|
||||
"clsx": "^1.1.1",
|
||||
"file-loader": "^6.2.0",
|
||||
"hast-util-is-element": "1.1.0",
|
||||
"minimatch": "3.0.5",
|
||||
"react": "^17.0.1",
|
||||
"react-dom": "^17.0.1",
|
||||
"rehype-katex": "4",
|
||||
"remark-math": "3",
|
||||
"trim": "^0.0.3",
|
||||
"url-loader": "^4.1.1",
|
||||
"minimatch": "3.0.5"
|
||||
"url-loader": "^4.1.1"
|
||||
},
|
||||
"browserslist": {
|
||||
"production": [
|
||||
|
||||
@@ -153,6 +153,15 @@
|
||||
"@babel/highlight" "^7.23.4"
|
||||
chalk "^2.4.2"
|
||||
|
||||
"@babel/code-frame@^7.26.2":
|
||||
version "7.26.2"
|
||||
resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.26.2.tgz#4b5fab97d33338eff916235055f0ebc21e573a85"
|
||||
integrity sha512-RJlIHRueQgwWitWgF8OdFYGZX328Ax5BCemNGlqHfplnRT9ESi8JkFlvaVYbS+UubVY6dpv87Fs2u5M29iNFVQ==
|
||||
dependencies:
|
||||
"@babel/helper-validator-identifier" "^7.25.9"
|
||||
js-tokens "^4.0.0"
|
||||
picocolors "^1.0.0"
|
||||
|
||||
"@babel/compat-data@^7.17.7", "@babel/compat-data@^7.20.0", "@babel/compat-data@^7.20.1":
|
||||
version "7.20.1"
|
||||
resolved "https://registry.npmmirror.com/@babel/compat-data/-/compat-data-7.20.1.tgz#f2e6ef7790d8c8dbf03d379502dcc246dcce0b30"
|
||||
@@ -429,6 +438,11 @@
|
||||
resolved "https://registry.yarnpkg.com/@babel/helper-string-parser/-/helper-string-parser-7.23.4.tgz#9478c707febcbbe1ddb38a3d91a2e054ae622d83"
|
||||
integrity sha512-803gmbQdqwdf4olxrX4AJyFBV/RTr3rSmOj0rKwesmzlfhYNDEs+/iOcznzpNWlJlIlTJC2QfPFcHB6DlzdVLQ==
|
||||
|
||||
"@babel/helper-string-parser@^7.25.9":
|
||||
version "7.25.9"
|
||||
resolved "https://registry.yarnpkg.com/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz#1aabb72ee72ed35789b4bbcad3ca2862ce614e8c"
|
||||
integrity sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==
|
||||
|
||||
"@babel/helper-validator-identifier@^7.18.6", "@babel/helper-validator-identifier@^7.19.1":
|
||||
version "7.19.1"
|
||||
resolved "https://registry.npmmirror.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.19.1.tgz#7eea834cf32901ffdc1a7ee555e2f9c27e249ca2"
|
||||
@@ -439,6 +453,11 @@
|
||||
resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz#c4ae002c61d2879e724581d96665583dbc1dc0e0"
|
||||
integrity sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==
|
||||
|
||||
"@babel/helper-validator-identifier@^7.25.9":
|
||||
version "7.25.9"
|
||||
resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz#24b64e2c3ec7cd3b3c547729b8d16871f22cbdc7"
|
||||
integrity sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==
|
||||
|
||||
"@babel/helper-validator-option@^7.18.6":
|
||||
version "7.18.6"
|
||||
resolved "https://registry.npmmirror.com/@babel/helper-validator-option/-/helper-validator-option-7.18.6.tgz#bf0d2b5a509b1f336099e4ff36e1a63aa5db4db8"
|
||||
@@ -455,13 +474,12 @@
|
||||
"@babel/types" "^7.19.0"
|
||||
|
||||
"@babel/helpers@^7.12.5", "@babel/helpers@^7.20.1":
|
||||
version "7.20.1"
|
||||
resolved "https://registry.npmmirror.com/@babel/helpers/-/helpers-7.20.1.tgz#2ab7a0fcb0a03b5bf76629196ed63c2d7311f4c9"
|
||||
integrity sha512-J77mUVaDTUJFZ5BpP6mMn6OIl3rEWymk2ZxDBQJUG3P+PbmyMcF3bYWvz0ma69Af1oobDqT/iAsvzhB58xhQUg==
|
||||
version "7.26.10"
|
||||
resolved "https://registry.yarnpkg.com/@babel/helpers/-/helpers-7.26.10.tgz#6baea3cd62ec2d0c1068778d63cb1314f6637384"
|
||||
integrity sha512-UPYc3SauzZ3JGgj87GgZ89JVdC5dj0AoetR5Bw6wj4niittNyFh6+eOGonYvJ1ao6B8lEa3Q3klS7ADZ53bc5g==
|
||||
dependencies:
|
||||
"@babel/template" "^7.18.10"
|
||||
"@babel/traverse" "^7.20.1"
|
||||
"@babel/types" "^7.20.0"
|
||||
"@babel/template" "^7.26.9"
|
||||
"@babel/types" "^7.26.10"
|
||||
|
||||
"@babel/highlight@^7.18.6":
|
||||
version "7.18.6"
|
||||
@@ -491,6 +509,13 @@
|
||||
resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.23.6.tgz#ba1c9e512bda72a47e285ae42aff9d2a635a9e3b"
|
||||
integrity sha512-Z2uID7YJ7oNvAI20O9X0bblw7Qqs8Q2hFy0R9tAfnfLkp5MW0UH9eUvnDSnFwKZ0AvgS1ucqR4KzvVHgnke1VQ==
|
||||
|
||||
"@babel/parser@^7.26.9":
|
||||
version "7.26.10"
|
||||
resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.26.10.tgz#e9bdb82f14b97df6569b0b038edd436839c57749"
|
||||
integrity sha512-6aQR2zGE/QFi8JpDLjUZEPYOs7+mhKXm86VaKFiLP35JQwQb6bwUE+XbvkH0EptsYhbNBSUGaUBLKqxH1xSgsA==
|
||||
dependencies:
|
||||
"@babel/types" "^7.26.10"
|
||||
|
||||
"@babel/plugin-bugfix-safari-id-destructuring-collision-in-function-expression@^7.18.6":
|
||||
version "7.18.6"
|
||||
resolved "https://registry.npmmirror.com/@babel/plugin-bugfix-safari-id-destructuring-collision-in-function-expression/-/plugin-bugfix-safari-id-destructuring-collision-in-function-expression-7.18.6.tgz#da5b8f9a580acdfbe53494dba45ea389fb09a4d2"
|
||||
@@ -1196,19 +1221,19 @@
|
||||
"@babel/plugin-transform-typescript" "^7.18.6"
|
||||
|
||||
"@babel/runtime-corejs3@^7.15.4":
|
||||
version "7.20.1"
|
||||
resolved "https://registry.npmmirror.com/@babel/runtime-corejs3/-/runtime-corejs3-7.20.1.tgz#d0775a49bb5fba77e42cbb7276c9955c7b05af8d"
|
||||
integrity sha512-CGulbEDcg/ND1Im7fUNRZdGXmX2MTWVVZacQi/6DiKE5HNwZ3aVTm5PV4lO8HHz0B2h8WQyvKKjbX5XgTtydsg==
|
||||
version "7.26.10"
|
||||
resolved "https://registry.yarnpkg.com/@babel/runtime-corejs3/-/runtime-corejs3-7.26.10.tgz#5a3185ca2813f8de8ae68622572086edf5cf51f2"
|
||||
integrity sha512-uITFQYO68pMEYR46AHgQoyBg7KPPJDAbGn4jUTIRgCFJIp88MIBUianVOplhZDEec07bp9zIyr4Kp0FCyQzmWg==
|
||||
dependencies:
|
||||
core-js-pure "^3.25.1"
|
||||
regenerator-runtime "^0.13.10"
|
||||
core-js-pure "^3.30.2"
|
||||
regenerator-runtime "^0.14.0"
|
||||
|
||||
"@babel/runtime@^7.1.2", "@babel/runtime@^7.10.2", "@babel/runtime@^7.10.3", "@babel/runtime@^7.12.13", "@babel/runtime@^7.15.4", "@babel/runtime@^7.8.4":
|
||||
version "7.20.1"
|
||||
resolved "https://registry.npmmirror.com/@babel/runtime/-/runtime-7.20.1.tgz#1148bb33ab252b165a06698fde7576092a78b4a9"
|
||||
integrity sha512-mrzLkl6U9YLF8qpqI7TB82PESyEGjm/0Ly91jG575eVxMMlb8fYfOXFZIJ8XfLrJZQbm7dlKry2bJmXBUEkdFg==
|
||||
version "7.26.10"
|
||||
resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.26.10.tgz#a07b4d8fa27af131a633d7b3524db803eb4764c2"
|
||||
integrity sha512-2WJMeRQPHKSPemqk/awGrAiuFfzBmOIPXKizAsVhWH9YJqLZ0H+HS4c8loHGgW6utJ3E/ejXQUsiGaQy2NZ9Fw==
|
||||
dependencies:
|
||||
regenerator-runtime "^0.13.10"
|
||||
regenerator-runtime "^0.14.0"
|
||||
|
||||
"@babel/template@^7.12.7", "@babel/template@^7.18.10":
|
||||
version "7.18.10"
|
||||
@@ -1228,6 +1253,15 @@
|
||||
"@babel/parser" "^7.22.15"
|
||||
"@babel/types" "^7.22.15"
|
||||
|
||||
"@babel/template@^7.26.9":
|
||||
version "7.26.9"
|
||||
resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.26.9.tgz#4577ad3ddf43d194528cff4e1fa6b232fa609bb2"
|
||||
integrity sha512-qyRplbeIpNZhmzOysF/wFMuP9sctmh2cFzRAZOn1YapxBsE1i9bJIY586R/WBLfLcmcBlM8ROBiQURnnNy+zfA==
|
||||
dependencies:
|
||||
"@babel/code-frame" "^7.26.2"
|
||||
"@babel/parser" "^7.26.9"
|
||||
"@babel/types" "^7.26.9"
|
||||
|
||||
"@babel/traverse@^7.12.13", "@babel/traverse@^7.12.9", "@babel/traverse@^7.19.0", "@babel/traverse@^7.19.1", "@babel/traverse@^7.20.1":
|
||||
version "7.23.6"
|
||||
resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.23.6.tgz#b53526a2367a0dd6edc423637f3d2d0f2521abc5"
|
||||
@@ -1262,6 +1296,14 @@
|
||||
"@babel/helper-validator-identifier" "^7.22.20"
|
||||
to-fast-properties "^2.0.0"
|
||||
|
||||
"@babel/types@^7.26.10", "@babel/types@^7.26.9":
|
||||
version "7.26.10"
|
||||
resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.26.10.tgz#396382f6335bd4feb65741eacfc808218f859259"
|
||||
integrity sha512-emqcG3vHrpxUKTrxcblR36dcrcoRDvKmnL/dCL6ZsHaShW80qxCAcNhzQZrpeM765VzEos+xOi4s+r4IXzTwdQ==
|
||||
dependencies:
|
||||
"@babel/helper-string-parser" "^7.25.9"
|
||||
"@babel/helper-validator-identifier" "^7.25.9"
|
||||
|
||||
"@docsearch/css@3.3.0":
|
||||
version "3.3.0"
|
||||
resolved "https://registry.npmmirror.com/@docsearch/css/-/css-3.3.0.tgz#d698e48302d12240d7c2f7452ccb2d2239a8cd80"
|
||||
@@ -2995,15 +3037,10 @@ caniuse-api@^3.0.0:
|
||||
lodash.memoize "^4.1.2"
|
||||
lodash.uniq "^4.5.0"
|
||||
|
||||
caniuse-lite@^1.0.0, caniuse-lite@^1.0.30001400, caniuse-lite@^1.0.30001426:
|
||||
version "1.0.30001430"
|
||||
resolved "https://registry.npmmirror.com/caniuse-lite/-/caniuse-lite-1.0.30001430.tgz#638a8ae00b5a8a97e66ff43733b2701f81b101fa"
|
||||
integrity sha512-IB1BXTZKPDVPM7cnV4iaKaHxckvdr/3xtctB3f7Hmenx3qYBhGtTZ//7EllK66aKXW98Lx0+7Yr0kxBtIt3tzg==
|
||||
|
||||
caniuse-lite@^1.0.30001646:
|
||||
version "1.0.30001657"
|
||||
resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001657.tgz#29fd504bffca719d1c6b63a1f6f840be1973a660"
|
||||
integrity sha512-DPbJAlP8/BAXy3IgiWmZKItubb3TYGP0WscQQlVGIfT4s/YlFYVuJgyOsQNP7rJRChx/qdMeLJQJP0Sgg2yjNA==
|
||||
caniuse-lite@^1.0.0, caniuse-lite@^1.0.30001400, caniuse-lite@^1.0.30001426, caniuse-lite@^1.0.30001646:
|
||||
version "1.0.30001718"
|
||||
resolved "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001718.tgz"
|
||||
integrity sha512-AflseV1ahcSunK53NfEs9gFWgOEmzr0f+kaMFA4xiLZlr9Hzt7HxcSpIFcnNCUkz6R6dWKa54rUz3HUmI3nVcw==
|
||||
|
||||
ccount@^1.0.0, ccount@^1.0.3:
|
||||
version "1.1.0"
|
||||
@@ -3326,10 +3363,10 @@ core-js-compat@^3.25.1:
|
||||
dependencies:
|
||||
browserslist "^4.21.4"
|
||||
|
||||
core-js-pure@^3.25.1:
|
||||
version "3.26.0"
|
||||
resolved "https://registry.npmmirror.com/core-js-pure/-/core-js-pure-3.26.0.tgz#7ad8a5dd7d910756f3124374b50026e23265ca9a"
|
||||
integrity sha512-LiN6fylpVBVwT8twhhluD9TzXmZQQsr2I2eIKtWNbZI1XMfBT7CV18itaN6RA7EtQd/SDdRx/wzvAShX2HvhQA==
|
||||
core-js-pure@^3.30.2:
|
||||
version "3.41.0"
|
||||
resolved "https://registry.yarnpkg.com/core-js-pure/-/core-js-pure-3.41.0.tgz#349fecad168d60807a31e83c99d73d786fe80811"
|
||||
integrity sha512-71Gzp96T9YPk63aUvE5Q5qP+DryB4ZloUZPSOebGM88VNw8VNfvdA7z6kGA8iGOTEzAomsRidp4jXSmUIJsL+Q==
|
||||
|
||||
core-js@^3.18.0:
|
||||
version "3.26.0"
|
||||
@@ -4830,9 +4867,9 @@ http-parser-js@>=0.5.1:
|
||||
integrity sha512-SGeBX54F94Wgu5RH3X5jsDtf4eHyRogWX1XGT3b4HuW3tQPM4AaBzoUji/4AAJNXCEOWZ5O0DgZmJw1947gD5Q==
|
||||
|
||||
http-proxy-middleware@^2.0.3:
|
||||
version "2.0.7"
|
||||
resolved "https://registry.yarnpkg.com/http-proxy-middleware/-/http-proxy-middleware-2.0.7.tgz#915f236d92ae98ef48278a95dedf17e991936ec6"
|
||||
integrity sha512-fgVY8AV7qU7z/MmXJ/rxwbrtQH4jBQ9m7kp3llF0liB7glmFeVZFBepQb32T3y8n8k2+AEYuMPCpinYW+/CuRA==
|
||||
version "2.0.9"
|
||||
resolved "https://registry.yarnpkg.com/http-proxy-middleware/-/http-proxy-middleware-2.0.9.tgz#e9e63d68afaa4eee3d147f39149ab84c0c2815ef"
|
||||
integrity sha512-c1IyJYLYppU574+YI7R4QyX2ystMtVXZwIdzazUIPIJsHuWNd+mho2j+bKoHftndicGj9yh+xjd+l0yj7VeT1Q==
|
||||
dependencies:
|
||||
"@types/http-proxy" "^1.17.8"
|
||||
http-proxy "^1.18.1"
|
||||
@@ -6441,9 +6478,9 @@ prism-react-renderer@^1.2.1:
|
||||
integrity sha512-IJ+MSwBWKG+SM3b2SUfdrhC+gu01QkV2KmRQgREThBfSQRoufqRfxfHUxpG1WcaFjP+kojcFyO9Qqtpgt3qLCg==
|
||||
|
||||
prismjs@^1.23.0:
|
||||
version "1.29.0"
|
||||
resolved "https://registry.npmmirror.com/prismjs/-/prismjs-1.29.0.tgz#f113555a8fa9b57c35e637bba27509dcf802dd12"
|
||||
integrity sha512-Kx/1w86q/epKcmte75LNrEoT+lX8pBpavuAbvJWRXar7Hz8jrtF+e3vY751p0R8H9HdArwaCTNDDzHg/ScJK1Q==
|
||||
version "1.30.0"
|
||||
resolved "https://registry.yarnpkg.com/prismjs/-/prismjs-1.30.0.tgz#d9709969d9d4e16403f6f348c63553b19f0975a9"
|
||||
integrity sha512-DEvV2ZF2r2/63V+tK8hQvrR2ZGn10srHbXviTlcv7Kpzw8jWiNTqbVgjO3IY8RxrrOUF8VPMQQFysYYYv0YZxw==
|
||||
|
||||
process-nextick-args@~2.0.0:
|
||||
version "2.0.1"
|
||||
@@ -6816,10 +6853,10 @@ regenerate@^1.4.2:
|
||||
resolved "https://registry.npmmirror.com/regenerate/-/regenerate-1.4.2.tgz#b9346d8827e8f5a32f7ba29637d398b69014848a"
|
||||
integrity sha512-zrceR/XhGYU/d/opr2EKO7aRHUeiBI8qjtfHqADTwZd6Szfy16la6kqD0MIUs5z5hx6AaKa+PixpPrR289+I0A==
|
||||
|
||||
regenerator-runtime@^0.13.10:
|
||||
version "0.13.10"
|
||||
resolved "https://registry.npmmirror.com/regenerator-runtime/-/regenerator-runtime-0.13.10.tgz#ed07b19616bcbec5da6274ebc75ae95634bfc2ee"
|
||||
integrity sha512-KepLsg4dU12hryUO7bp/axHAKvwGOCV0sGloQtpagJ12ai+ojVDqkeGSiRX1zlq+kjIMZ1t7gpze+26QqtdGqw==
|
||||
regenerator-runtime@^0.14.0:
|
||||
version "0.14.1"
|
||||
resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz#356ade10263f685dda125100cd862c1db895327f"
|
||||
integrity sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw==
|
||||
|
||||
regenerator-transform@^0.15.0:
|
||||
version "0.15.0"
|
||||
@@ -7272,14 +7309,7 @@ send@0.19.0:
|
||||
range-parser "~1.2.1"
|
||||
statuses "2.0.1"
|
||||
|
||||
serialize-javascript@^6.0.0:
|
||||
version "6.0.0"
|
||||
resolved "https://registry.npmmirror.com/serialize-javascript/-/serialize-javascript-6.0.0.tgz#efae5d88f45d7924141da8b5c3a7a7e663fefeb8"
|
||||
integrity sha512-Qr3TosvguFt8ePWqsvRfrKyQXIiW+nGbYpy8XK24NQHE83caxWt+mIymTT19DGFbNWNLfEwsrkSmN64lVWB9ag==
|
||||
dependencies:
|
||||
randombytes "^2.1.0"
|
||||
|
||||
serialize-javascript@^6.0.1:
|
||||
serialize-javascript@^6.0.0, serialize-javascript@^6.0.1:
|
||||
version "6.0.2"
|
||||
resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-6.0.2.tgz#defa1e055c83bf6d59ea805d8da862254eb6a6c2"
|
||||
integrity sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==
|
||||
|
||||
Reference in New Issue
Block a user