2 years ago · 1315b162ed
--- a/models/loader/args.py
+++ b/models/loader/args.py
@@ -49,7 +49,6 @@ parser.add_argument('--gpu-memory', type=str, nargs="+", help='Maxmimum GPU memo
 
				 parser.add_argument('--cpu-memory', type=str, help='Maximum CPU memory in GiB to allocate for offloaded weights. Same as above.')
			
 
				 parser.add_argument('--load-in-8bit', action='store_true', help='Load the model with 8-bit precision.')
			
 
				 parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
			
 
				-parser.add_argument('--no-trust-remote-code', action='store_true', default=False, help="Set trust_remote_code=True while loading a model. Necessary for ChatGLM.")
			
 
				 
			
 
				 
			
 
				 args = parser.parse_args([])
			
@@ -57,6 +56,4 @@ args = parser.parse_args([])
 
				 DEFAULT_ARGS = vars(args)
			
 
				 
			
 
				 
			
 
				-# Security warnings
			
 
				-if not args.no_trust_remote_code:
			
 
				-    print("Warning: trust_remote_code is enabled. This is dangerous.\n")
			
 
				+
			
--- a/models/loader/loader.py
+++ b/models/loader/loader.py
@@ -34,8 +34,6 @@ class LoaderLLM:
 
				     cpu_memory: object = None
			
 
				     auto_devices: object = True
			
 
				     load_in_8bit: bool = False
			
 
				-    # 信任本地加载的模型
			
 
				-    trust_remote_code: bool = False
			
 
				     is_llamacpp: bool = False
			
 
				     bf16: bool = False
			
 
				     params: object = None
			
@@ -64,7 +62,6 @@ class LoaderLLM:
 
				         self.cpu_memory = params.get('cpu_memory', None)
			
 
				         self.auto_devices = params.get('auto_devices', True)
			
 
				         self.load_in_8bit = params.get('load_in_8bit', False)
			
 
				-        self.trust_remote_code = not params.get('no_trust_remote_code', False)
			
 
				         self.bf16 = params.get('bf16', False)
			
 
				         self.reload_model()
			
 
				 
			
@@ -73,7 +70,7 @@ class LoaderLLM:
 
				         if not self.no_remote_model:
			
 
				             checkpoint = model_name
			
 
				 
			
 
				-        model_config = AutoConfig.from_pretrained(checkpoint, trust_remote_code=self.trust_remote_code)
			
 
				+        model_config = AutoConfig.from_pretrained(checkpoint, trust_remote_code=True)
			
 
				 
			
 
				         return model_config
			
 
				 
			
@@ -96,10 +93,8 @@ class LoaderLLM:
 
				 
			
 
				         if 'chatglm' in model_name.lower():
			
 
				             LoaderClass = AutoModel
			
 
				-            trust_remote_code = self.trust_remote_code
			
 
				         else:
			
 
				             LoaderClass = AutoModelForCausalLM
			
 
				-            trust_remote_code = False
			
 
				 
			
 
				         # Load the model in simple 16-bit mode by default
			
 
				         if not any([self.cpu, self.load_in_8bit, self.auto_devices, self.gpu_memory is not None, self.cpu_memory is not None, self.is_llamacpp]):
			
@@ -113,7 +108,7 @@ class LoaderLLM:
 
				                                                     low_cpu_mem_usage=True,
			
 
				                                                     config=self.model_config,
			
 
				                                                     torch_dtype=torch.bfloat16 if self.bf16 else torch.float16,
			
 
				-                                                    trust_remote_code=trust_remote_code)
			
 
				+                                                    trust_remote_code=True)
			
 
				                         .half()
			
 
				                         .cuda()
			
 
				                     )
			
@@ -124,7 +119,7 @@ class LoaderLLM:
 
				                                                         low_cpu_mem_usage=True,
			
 
				                                                         config=self.model_config,
			
 
				                                                         torch_dtype=torch.bfloat16 if self.bf16 else torch.float16,
			
 
				-                                                        trust_remote_code=trust_remote_code).half()
			
 
				+                                                        trust_remote_code=True).half()
			
 
				                     # 可传入device_map自定义每张卡的部署情况
			
 
				                     if self.device_map is None:
			
 
				                         device_map = self.auto_configure_device_map(num_gpus)
			
@@ -161,7 +156,7 @@ class LoaderLLM:
 
				                 params["torch_dtype"] = torch.float32
			
 
				             else:
			
 
				                 params["device_map"] = 'auto'
			
 
				-                params["trust_remote_code"] = trust_remote_code
			
 
				+                params["trust_remote_code"] = True
			
 
				                 if self.load_in_8bit and any((self.auto_devices, self.gpu_memory)):
			
 
				                     params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
			
 
				                 elif self.load_in_8bit:
			
@@ -220,7 +215,7 @@ class LoaderLLM:
 
				             except:
			
 
				                 pass
			
 
				         else:
			
 
				-            tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=trust_remote_code)
			
 
				+            tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
			
 
				 
			
 
				         print(f"Loaded the model in {(time.time()-t0):.2f} seconds.")
			
 
				         return model, tokenizer