|
|
|
|
@@ -112,9 +112,7 @@
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"raw_dataset = datasets.load_dataset(\"glue\", TASK)"
|
|
|
|
|
]
|
|
|
|
|
"source": "raw_dataset = datasets.load_dataset(\"glue\", TASK, trust_remote_code=True)"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
@@ -425,9 +423,7 @@
|
|
|
|
|
"execution_count": 14,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"metric = datasets.load_metric(\"glue\", TASK)"
|
|
|
|
|
]
|
|
|
|
|
"source": "metric = datasets.load_metric(\"glue\", TASK, trust_remote_code=True)"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
@@ -646,7 +642,7 @@
|
|
|
|
|
"def train_distilbert(config: dict):\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # Load CoLA dataset and apply tokenizer\n",
|
|
|
|
|
" cola_raw = datasets.load_dataset(\"glue\", TASK)\n",
|
|
|
|
|
" cola_raw = datasets.load_dataset(\"glue\", TASK, trust_remote_code=True)\n",
|
|
|
|
|
" cola_encoded = cola_raw.map(tokenize, batched=True)\n",
|
|
|
|
|
" train_dataset, eval_dataset = cola_encoded[\"train\"], cola_encoded[\"validation\"]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
@@ -654,7 +650,7 @@
|
|
|
|
|
" MODEL_CHECKPOINT, num_labels=NUM_LABELS\n",
|
|
|
|
|
" )\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" metric = datasets.load_metric(\"glue\", TASK)\n",
|
|
|
|
|
" metric = datasets.load_metric(\"glue\", TASK, trust_remote_code=True)\n",
|
|
|
|
|
" def compute_metrics(eval_pred):\n",
|
|
|
|
|
" predictions, labels = eval_pred\n",
|
|
|
|
|
" predictions = np.argmax(predictions, axis=1)\n",
|
|
|
|
|
@@ -847,7 +843,7 @@
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"\u001b[2m\u001b[36m(pid=11344)\u001b[0m Reusing dataset glue (/home/ec2-user/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
|
|
|
|
|
"\u001B[2m\u001B[36m(pid=11344)\u001B[0m Reusing dataset glue (/home/ec2-user/.cache/huggingface/datasets/glue/cola/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)\n",
|
|
|
|
|
" 0%| | 0/9 [00:00<?, ?ba/s]\n",
|
|
|
|
|
" 22%|██▏ | 2/9 [00:00<00:00, 19.41ba/s]\n",
|
|
|
|
|
" 56%|█████▌ | 5/9 [00:00<00:00, 20.98ba/s]\n",
|
|
|
|
|
@@ -856,25 +852,25 @@
|
|
|
|
|
"100%|██████████| 2/2 [00:00<00:00, 42.79ba/s]\n",
|
|
|
|
|
" 0%| | 0/2 [00:00<?, ?ba/s]\n",
|
|
|
|
|
"100%|██████████| 2/2 [00:00<00:00, 41.48ba/s]\n",
|
|
|
|
|
"\u001b[2m\u001b[36m(pid=11344)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
|
|
|
|
|
"\u001b[2m\u001b[36m(pid=11344)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
|
|
|
|
|
"\u001b[2m\u001b[36m(pid=11344)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
|
|
|
|
|
"\u001b[2m\u001b[36m(pid=11344)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
|
|
|
|
|
"\u001b[2m\u001b[36m(pid=11344)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
|
|
|
|
"\u001B[2m\u001B[36m(pid=11344)\u001B[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
|
|
|
|
|
"\u001B[2m\u001B[36m(pid=11344)\u001B[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
|
|
|
|
|
"\u001B[2m\u001B[36m(pid=11344)\u001B[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
|
|
|
|
|
"\u001B[2m\u001B[36m(pid=11344)\u001B[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
|
|
|
|
|
"\u001B[2m\u001B[36m(pid=11344)\u001B[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"\u001b[2m\u001b[36m(pid=11344)\u001b[0m huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
|
|
|
|
"\u001b[2m\u001b[36m(pid=11344)\u001b[0m To disable this warning, you can either:\n",
|
|
|
|
|
"\u001b[2m\u001b[36m(pid=11344)\u001b[0m \t- Avoid using `tokenizers` before the fork if possible\n",
|
|
|
|
|
"\u001b[2m\u001b[36m(pid=11344)\u001b[0m \t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
|
|
|
|
|
"\u001b[2m\u001b[36m(pid=11344)\u001b[0m huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
|
|
|
|
"\u001b[2m\u001b[36m(pid=11344)\u001b[0m To disable this warning, you can either:\n",
|
|
|
|
|
"\u001b[2m\u001b[36m(pid=11344)\u001b[0m \t- Avoid using `tokenizers` before the fork if possible\n",
|
|
|
|
|
"\u001b[2m\u001b[36m(pid=11344)\u001b[0m \t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
|
|
|
|
|
"\u001B[2m\u001B[36m(pid=11344)\u001B[0m huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
|
|
|
|
"\u001B[2m\u001B[36m(pid=11344)\u001B[0m To disable this warning, you can either:\n",
|
|
|
|
|
"\u001B[2m\u001B[36m(pid=11344)\u001B[0m \t- Avoid using `tokenizers` before the fork if possible\n",
|
|
|
|
|
"\u001B[2m\u001B[36m(pid=11344)\u001B[0m \t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
|
|
|
|
|
"\u001B[2m\u001B[36m(pid=11344)\u001B[0m huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
|
|
|
|
"\u001B[2m\u001B[36m(pid=11344)\u001B[0m To disable this warning, you can either:\n",
|
|
|
|
|
"\u001B[2m\u001B[36m(pid=11344)\u001B[0m \t- Avoid using `tokenizers` before the fork if possible\n",
|
|
|
|
|
"\u001B[2m\u001B[36m(pid=11344)\u001B[0m \t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
|