Skip to content

Commit bfec1e7

Browse files
authored
Fix #3768 (#3789)
* fix bug * update
1 parent 1d3f269 commit bfec1e7

File tree

8 files changed

+24
-13
lines changed

8 files changed

+24
-13
lines changed

paddlenlp/transformers/auto/modeling.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@
8989
("Bart", "bart"),
9090
("GAUAlpha", "gau_alpha"),
9191
("CodeGen", "codegen"),
92+
("CLIPVision", "clip"),
93+
("CLIPText", "clip"),
9294
("CLIP", "clip"),
9395
("Artist", "artist"),
9496
("OPT", 'opt'),

paddlenlp/transformers/clip/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ def quick_gelu(x):
533533

534534
F.quick_gelu = quick_gelu
535535

536-
NEG_INF = -1e9 # float("-inf") -1e4 -1e9
536+
NEG_INF = -1e4 # float("-inf") -1e4 -1e9
537537

538538

539539
class VisionTransformer(nn.Layer):

ppdiffusers/examples/dreambooth/train_dreambooth.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@
3535
from paddle.vision import transforms
3636
from paddle.optimizer import AdamW
3737
from tqdm.auto import tqdm
38-
from paddlenlp.transformers import AutoModel, AutoTokenizer
38+
from paddlenlp.transformers import BertModel, AutoTokenizer, CLIPTextModel
3939
from pathlib import Path
4040

4141

42-
def parse_args(input_args):
42+
def parse_args(input_args=None):
4343
parser = argparse.ArgumentParser(
4444
description="Simple example of a training dreambooth script.")
4545
parser.add_argument(
@@ -427,7 +427,11 @@ def main(args):
427427
os.path.join(args.pretrained_model_name_or_path, "tokenizer"))
428428

429429
# Load models and create wrapper for stable diffusion
430-
text_encoder = AutoModel.from_pretrained(
430+
if "Taiyi-Stable-Diffusion-1B-Chinese-v0.1" in args.pretrained_model_name_or_path:
431+
model_cls = BertModel
432+
else:
433+
model_cls = CLIPTextModel
434+
text_encoder = model_cls.from_pretrained(
431435
os.path.join(args.pretrained_model_name_or_path, "text_encoder"))
432436
vae = AutoencoderKL.from_pretrained(args.pretrained_model_name_or_path,
433437
subfolder="vae")

ppdiffusers/examples/text_to_image/README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ export dataset_name="lambdalabs/pokemon-blip-captions"
3434
python -u train_text_to_image.py \
3535
--pretrained_model_name_or_path=$MODEL_NAME \
3636
--dataset_name=$dataset_name \
37-
--use_ema \
3837
--resolution=512 --center_crop --random_flip \
3938
--train_batch_size=1 \
4039
--gradient_accumulation_steps=4 \
@@ -108,7 +107,6 @@ export dataset_name="lambdalabs/pokemon-blip-captions"
108107
python -u -m paddle.distributed.launch --gpus "0,1,2,3" train_text_to_image.py \
109108
--pretrained_model_name_or_path=$MODEL_NAME \
110109
--dataset_name=$dataset_name \
111-
--use_ema \
112110
--resolution=512 --center_crop --random_flip \
113111
--train_batch_size=1 \
114112
--gradient_accumulation_steps=4 \

ppdiffusers/examples/text_to_image/run_multi.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ export dataset_name="lambdalabs/pokemon-blip-captions"
1818
python -u -m paddle.distributed.launch --gpus "0,1,2,3" train_text_to_image.py \
1919
--pretrained_model_name_or_path=$MODEL_NAME \
2020
--dataset_name=$dataset_name \
21-
--use_ema \
2221
--resolution=512 --center_crop --random_flip \
2322
--train_batch_size=1 \
2423
--gradient_accumulation_steps=4 \

ppdiffusers/examples/text_to_image/run_single.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ export dataset_name="lambdalabs/pokemon-blip-captions"
1818
python -u train_text_to_image.py \
1919
--pretrained_model_name_or_path=$MODEL_NAME \
2020
--dataset_name=$dataset_name \
21-
--use_ema \
2221
--resolution=512 --center_crop --random_flip \
2322
--train_batch_size=1 \
2423
--gradient_accumulation_steps=4 \

ppdiffusers/examples/text_to_image/train_text_to_image.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from paddle.vision import transforms, BaseTransform
4040
from paddle.optimizer import AdamW
4141
from tqdm.auto import tqdm
42-
from paddlenlp.transformers import AutoModel, AutoTokenizer
42+
from paddlenlp.transformers import CLIPTextModel, AutoTokenizer, BertModel
4343

4444

4545
class Lambda(BaseTransform):
@@ -332,7 +332,12 @@ def main():
332332
# Load models and create wrapper for stable diffusion
333333
tokenizer = AutoTokenizer.from_pretrained(
334334
os.path.join(args.pretrained_model_name_or_path, "tokenizer"))
335-
text_encoder = AutoModel.from_pretrained(
335+
336+
if "Taiyi-Stable-Diffusion-1B-Chinese-v0.1" in args.pretrained_model_name_or_path:
337+
model_cls = BertModel
338+
else:
339+
model_cls = CLIPTextModel
340+
text_encoder = model_cls.from_pretrained(
336341
os.path.join(args.pretrained_model_name_or_path, "text_encoder"))
337342
vae = AutoencoderKL.from_pretrained(args.pretrained_model_name_or_path,
338343
subfolder="vae")

ppdiffusers/examples/textual_inversion/train_textual_inversion.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
from paddle.vision.transforms import RandomHorizontalFlip
4444
from paddle.optimizer import AdamW
4545
from tqdm.auto import tqdm
46-
from paddlenlp.transformers import AutoModel, AutoTokenizer, BertModel
46+
from paddlenlp.transformers import CLIPTextModel, AutoTokenizer, BertModel
4747

4848

4949
def get_writer(args):
@@ -468,7 +468,7 @@ def main():
468468
if args.tokenizer_name:
469469
tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name)
470470
elif args.pretrained_model_name_or_path:
471-
tokenizer = AutoModel.from_pretrained(
471+
tokenizer = AutoTokenizer.from_pretrained(
472472
os.path.join(args.pretrained_model_name_or_path, "tokenizer"))
473473

474474
# Add the placeholder token in tokenizer
@@ -490,7 +490,11 @@ def main():
490490
args.placeholder_token)
491491

492492
# Load models and create wrapper for stable diffusion
493-
text_encoder = AutoModel.from_pretrained(
493+
if "Taiyi-Stable-Diffusion-1B-Chinese-v0.1" in args.pretrained_model_name_or_path:
494+
model_cls = BertModel
495+
else:
496+
model_cls = CLIPTextModel
497+
text_encoder = model_cls.from_pretrained(
494498
os.path.join(args.pretrained_model_name_or_path, "text_encoder"))
495499
vae = AutoencoderKL.from_pretrained(args.pretrained_model_name_or_path,
496500
subfolder="vae")

0 commit comments

Comments
 (0)