patterns = compute_valid_1d_patterns(m, n).cuda() """ m:n 1d structured best """ def mn_1d_best(matrix, m, n): Valid_patterns = torch.Tensor( list( set(permutations(patterns.tolist())))) If m = 4 and n = 2 and valid_m4n2_1d_patterns is not None: # Early exit if patterns was already created. Valid_m4n2_1d_patterns = None def compute_valid_1d_patterns(m, n): Matrix.shape, matrix.shape + (m - matrix.shape % m) # If not a nice multiple of m, fill with zeroes. Return float(x.nonzero().size(0)) / torch.numel(x) # !/usr/bin/env python3 # -*- coding: utf-8 -*- # 13:38 # - sparse_masklib.py - import sys ![]() # for name, module in model.named_modules(): # if isinstance(module, quant_nn.TensorQuantizer): # print("-") # print(name, module) return model Module.load_calib_amax(method = "percentile")Ĭollect_stats(model, get_data(), num_batches =1000) If isinstance(module._calibrator, calib.MaxCalibrator): If isinstance(module, quant_nn.TensorQuantizer):īreak for name, module in model.named_modules(): Model.load_state_dict(torch.load( "model.pt"))ĭef collect_stats(model, data, num_batches):įor name, module in model.named_modules(): Quant_desc_input = QuantDescriptor(calib_method = "histogram") Optimizer = optim.Adam(model.parameters()) Torch.save(model.state_dict(), "model.pt") Return DataLoader(training_set, batch_size =100, shuffle = True) use_fb_fake_quant = False def get_data(): # use_fb_fake_quant 是 tensorrt 的一个 hack: 当需要导出为 onnx 时设为 true, # forward 时 fake_tensor_quant 会被替换为 _fb_fake_quant, 后者会调用torch 自 # 己的 fake_quantize_per_channel_affine 以便导出为 onnx 的 # QuantizeLinear/DequantizeLinear # !/usr/bin/env python3 # -*- coding: utf-8 -*- # 14:24 import osįrom import DataLoader, Datasetįrom pytorch_quantization import nn as quant_nnįrom pytorch_quantization.tensor_quant import QuantDescriptorįrom pytorch_quantization import quant_modules
0 Comments
Leave a Reply. |
Details
AuthorWrite something about yourself. No need to be fancy, just an overview. ArchivesCategories |