Python源码示例:joblib.dump()
示例1
def save_model(self):
"""
Saves all necessary model state information for classification work to disk.
:return: True if it succeeded and False otherwise.
"""
# if we aren't keeping the extracted file details to reproduce the analysis, let's clear that data and
# save the model. It's not needed to perform basic predictions on new files.
if self.retain_sample_contents is False:
metadata = {'filemodified', 'extracted_vba', 'filename_vba', 'filepath', 'filename', 'function_names',
'filesize', 'filemodified', 'stream_path'}
metadata_delete = list(metadata & set(self.modeldata.columns))
self.modeldata.drop(metadata_delete, axis=1, inplace=True)
try:
saved_model = {'modeldata': self.modeldata,
'features': self.features,
'model_cntvect_cnts_array': self.modeldata_cnts.toarray()
}
joblib.dump(saved_model, self.modeldata_pickle)
except Exception as e:
raise IOError("Error saving model data to disk: {}".format(str(e)))
return False
return True
示例2
def save(self, filepath):
joblib.dump(self, filepath, 3)
示例3
def save_to_disk(dataset, filename, compress=3):
"""Save a dataset to file."""
if filename.endswith('.joblib'):
joblib.dump(dataset, filename, compress=compress)
elif filename.endswith('.npy'):
np.save(filename, dataset)
else:
raise ValueError("Filename with unsupported extension: %s" % filename)
示例4
def save_metadata(tasks, metadata_df, data_dir):
"""
Saves the metadata for a DiskDataset
Parameters
----------
tasks: list of str
Tasks of DiskDataset
metadata_df: pd.DataFrame
data_dir: str
Directory to store metadata
Returns
-------
"""
if isinstance(tasks, np.ndarray):
tasks = tasks.tolist()
metadata_filename = os.path.join(data_dir, "metadata.csv.gzip")
tasks_filename = os.path.join(data_dir, "tasks.json")
with open(tasks_filename, 'w') as fout:
json.dump(tasks, fout)
metadata_df.to_csv(metadata_filename, index=False, compression='gzip')
示例5
def save_metadata(tasks, metadata_df, data_dir):
"""
Saves the metadata for a DiskDataset
Parameters
----------
tasks: list of str
Tasks of DiskDataset
metadata_df: pd.DataFrame
data_dir: str
Directory to store metadata
Returns
-------
"""
if isinstance(tasks, np.ndarray):
tasks = tasks.tolist()
metadata_filename = os.path.join(data_dir, "metadata.csv.gzip")
tasks_filename = os.path.join(data_dir, "tasks.json")
with open(tasks_filename, 'w') as fout:
json.dump(tasks, fout)
metadata_df.to_csv(metadata_filename, index=False, compression='gzip')
示例6
def _tf_simple_save(self, itr=None):
"""
Uses simple_save to save a trained model, plus info to make it easy
to associated tensors to variables after restore.
"""
if proc_id()==0:
assert hasattr(self, 'tf_saver_elements'), \
"First have to setup saving with self.setup_tf_saver"
fpath = 'tf1_save' + ('%d'%itr if itr is not None else '')
fpath = osp.join(self.output_dir, fpath)
if osp.exists(fpath):
# simple_save refuses to be useful if fpath already exists,
# so just delete fpath if it's there.
shutil.rmtree(fpath)
tf.saved_model.simple_save(export_dir=fpath, **self.tf_saver_elements)
joblib.dump(self.tf_saver_info, osp.join(fpath, 'model_info.pkl'))
示例7
def create_model_from_signatures(sig_csv_path, model_out, sig_datatype=np.int32):
"""
Takes a .csv file containing class signatures - produced by extract_features_to_csv - and uses it to train
and pickle a scikit-learn model.
Parameters
----------
sig_csv_path
The path to the signatures file
model_out
The location to save the pickled model to.
sig_datatype
The datatype to read the csv as. Defaults to int32.
Notes
-----
At present, the model is an ExtraTreesClassifier arrived at by tpot:
model = ens.ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.55, min_samples_leaf=2,
min_samples_split=16, n_estimators=100, n_jobs=4, class_weight='balanced')
"""
model = ens.ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.55, min_samples_leaf=2,
min_samples_split=16, n_estimators=100, n_jobs=4, class_weight='balanced')
features, labels = load_signatures(sig_csv_path, sig_datatype)
model.fit(features, labels)
joblib.dump(model, model_out)
示例8
def save_itr_params(itr, params, prefix='', save_anyway=False):
if _snapshot_dir:
if len(prefix) > 0:
prefix = prefix + '_'
if _snapshot_mode == 'all':
file_name = osp.join(_snapshot_dir, prefix + 'itr_%d.pkl' % itr)
pickle.dump(params, open(file_name, "wb"))
elif _snapshot_mode == 'last':
# override previous params
file_name = osp.join(_snapshot_dir, prefix + 'params.pkl')
pickle.dump(params, open(file_name, "wb"))
elif _snapshot_mode == "gap":
if save_anyway or itr % _snapshot_gap == 0:
file_name = osp.join(_snapshot_dir, prefix + 'itr_%d.pkl' % itr)
pickle.dump(params, open(file_name, "wb"))
elif _snapshot_mode == "gap_and_last":
if save_anyway or itr % _snapshot_gap == 0:
file_name = osp.join(_snapshot_dir, prefix + 'itr_%d.pkl' % itr)
pickle.dump(params, open(file_name, "wb"))
file_name = osp.join(_snapshot_dir, prefix + 'params.pkl')
pickle.dump(params, open(file_name, "wb"))
elif _snapshot_mode == 'none':
pass
else:
raise NotImplementedError
示例9
def main():
args = parse_args()
features_extractor = FaceFeaturesExtractor()
embeddings, labels, class_to_idx = load_data(args, features_extractor)
clf = train(args, embeddings, labels)
idx_to_class = {v: k for k, v in class_to_idx.items()}
target_names = map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0]))
print(metrics.classification_report(labels, clf.predict(embeddings), target_names=list(target_names)))
if not os.path.isdir(MODEL_DIR_PATH):
os.mkdir(MODEL_DIR_PATH)
model_path = os.path.join('model', 'face_recogniser.pkl')
joblib.dump(FaceRecogniser(features_extractor, clf, idx_to_class), model_path)
示例10
def save(self, filename, ensure_compatibility = True):
"""
Pickle a class instance. E.g., corex.save('saved.pkl')
When set to True, ensure_compatibility resets self.words before saving
a pickle to avoid Unicode loading issues usually seen when trying to load
the pickle from a Python 2 implementation.
It is recommended to set it to False if you know you are going to load the
model in an all Python 3 implementation as self.words is required for fetching
the topics via get_topics().
"""
# Avoid saving words with object.
#TODO: figure out why Unicode sometimes causes an issue with loading after pickling
temp_words = self.words
if ensure_compatibility and (self.words is not None):
self.words = None
# Save CorEx object
import pickle
if path.dirname(filename) and not path.exists(path.dirname(filename)):
makedirs(path.dirname(filename))
pickle.dump(self, open(filename, 'wb'), protocol=-1)
# Restore words to CorEx object
self.words = temp_words
示例11
def fit_log_reg(X, y):
# fits a logistic regression model to your data
model = LogisticRegression(class_weight='balanced')
model.fit(X, y)
print('Train size: ', len(X))
train_score = model.score(X, y)
print('Training accuracy', train_score)
ypredz = model.predict(X)
cm = confusion_matrix(y, ypredz)
# tn, fp, fn, tp = cm.ravel()
tn, _, _, tp = cm.ravel()
# true positive rate When it's actually yes, how often does it predict yes?
recall = float(tp) / np.sum(cm, axis=1)[1]
# Specificity: When it's actually no, how often does it predict no?
specificity = float(tn) / np.sum(cm, axis=1)[0]
print('Recall/ Like accuracy', recall)
print('specificity/ Dislike accuracy', specificity)
# save the model
joblib.dump(model, 'log_reg_model.pkl')
示例12
def write_to_file(obj, filename, path=None, overwrite=False):
if path is not None:
filename = os.path.join(path, filename)
filename = os.path.abspath(filename)
output_dir = os.path.dirname(filename)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
if not overwrite and os.path.exists(filename):
print("WARNING: file already exists %s; not overwriting." % (filename,))
pass
# Check to see whether same as one on disk?
# When to overwrite?
else:
print("Writing to %s" % (filename,))
joblib.dump(obj, filename)
# Special-case stuff
# ------------------
示例13
def test_model_joblib_serialization(teardown, dump, load):
x_data = iris.data
y_t_data = iris.target
random_state = 123
n_components = 2
stacked_model_baikal = make_naive_stacked_model(
n_components, random_state, x_data, y_t_data
)
y_pred_baikal = stacked_model_baikal.predict(x_data)
# Persist model to a file
f = tempfile.TemporaryFile()
dump(stacked_model_baikal, f)
f.seek(0)
stacked_model_baikal_2 = load(f)
y_pred_baikal_2 = stacked_model_baikal_2.predict(x_data)
assert_array_equal(y_pred_baikal_2, y_pred_baikal)
示例14
def read_grid_pkl(tmpdir):
expected = {'lon_min_x': 116.319236,
'lat_min_y': 39.984094,
'grid_size_lat_y': 5,
'grid_size_lon_x': 5,
'cell_size_by_degree': 0.0001353464801860623
}
d = tmpdir.mkdir('core')
file_write_default = d.join('test_read_grid.pkl')
filename_write_default = os.path.join(
file_write_default.dirname, file_write_default.basename
)
grid = _default_grid()
with open(filename_write_default, 'wb') as f:
joblib.dump(grid.get_grid(), f)
saved_grid = grid.read_grid_pkl(filename_write_default)
assert_equal(saved_grid, expected)
示例15
def test_modelpipeline_pickling_preserves_template_ids(
version, train_id, predict_id):
# Test that pickling a ModelPipeline object preserves the template IDs
# that have already been set during object instantiation.
with TemporaryDirectory() as temp_dir:
mp = _model.ModelPipeline('wf', 'dv', civisml_version=version)
# Before pickling, make sure the template IDs are set as expected
assert mp.train_template_id == train_id
assert mp.predict_template_id == predict_id
pickle_path = os.path.join(temp_dir, 'model.pkl')
with open(pickle_path, 'wb') as f:
pickle.dump(mp, f)
with open(pickle_path, 'rb') as f:
mp_unpickled = pickle.load(f)
# After unpickling, the template IDs should remain.
assert mp_unpickled.train_template_id == train_id
assert mp_unpickled.predict_template_id == predict_id
示例16
def _tf_simple_save(self, itr=None):
"""
Uses simple_save to save a trained model, plus info to make it easy
to associated tensors to variables after restore.
"""
if proc_id()==0:
assert hasattr(self, 'tf_saver_elements'), \
"First have to setup saving with self.setup_tf_saver"
fpath = 'simple_save' + ('%d'%itr if itr is not None else '')
fpath = osp.join(self.output_dir, fpath)
if osp.exists(fpath):
# simple_save refuses to be useful if fpath already exists,
# so just delete fpath if it's there.
shutil.rmtree(fpath)
tf.saved_model.simple_save(export_dir=fpath, **self.tf_saver_elements)
joblib.dump(self.tf_saver_info, osp.join(fpath, 'model_info.pkl'))
示例17
def save_itr_params(itr, params):
if _snapshot_dir:
if _snapshot_mode == 'all':
file_name = osp.join(_snapshot_dir, 'itr_%d.pkl' % itr)
joblib.dump(params, file_name, compress=3)
elif _snapshot_mode == 'last':
# override previous params
file_name = osp.join(_snapshot_dir, 'params.pkl')
joblib.dump(params, file_name, compress=3)
elif _snapshot_mode == "gap":
if itr % _snapshot_gap == 0:
file_name = osp.join(_snapshot_dir, 'itr_%d.pkl' % itr)
joblib.dump(params, file_name, compress=3)
elif _snapshot_mode == 'none':
pass
else:
raise NotImplementedError
示例18
def log_parameters(log_file, args, classes):
log_params = {}
for param_name, param_value in args.__dict__.items():
if any([param_name.startswith(x) for x in list(classes.keys())]):
continue
log_params[param_name] = param_value
for name, cls in classes.items():
if isinstance(cls, type):
params = get_all_parameters(cls, args)
params["_name"] = getattr(args, name)
log_params[name] = params
else:
log_params[name] = getattr(cls, "__kwargs", dict())
log_params[name][
"_name"] = cls.__module__ + "." + cls.__class__.__name__
mkdir_p(os.path.dirname(log_file))
with open(log_file, "w") as f:
json.dump(log_params, f, indent=2, sort_keys=True)
示例19
def log_parameters_lite(log_file, args):
log_params = {}
for param_name, param_value in args.__dict__.items():
log_params[param_name] = param_value
if args.args_data is not None:
stub_method = pickle.loads(base64.b64decode(args.args_data))
method_args = stub_method.kwargs
log_params["json_args"] = dict()
for k, v in list(method_args.items()):
log_params["json_args"][k] = stub_to_json(v)
kwargs = stub_method.obj.kwargs
for k in ["baseline", "env", "policy"]:
if k in kwargs:
log_params["json_args"][k] = stub_to_json(kwargs.pop(k))
log_params["json_args"]["algo"] = stub_to_json(stub_method.obj)
mkdir_p(os.path.dirname(log_file))
with open(log_file, "w") as f:
json.dump(log_params, f, indent=2, sort_keys=True, cls=MyEncoder)
示例20
def store_matrix(matrix='',
output_dir_path='',
out_file_name='',
output_format=''):
"""store_matrix."""
if not os.path.exists(output_dir_path):
os.mkdir(output_dir_path)
full_out_file_name = os.path.join(output_dir_path, out_file_name)
if output_format == "MatrixMarket":
if len(matrix.shape) == 1:
raise Exception(
"'MatrixMarket' format supports only 2D dimensional array\
and not vectors")
else:
io.mmwrite(full_out_file_name, matrix, precision=None)
elif output_format == "numpy":
np.save(full_out_file_name, matrix)
elif output_format == "joblib":
joblib.dump(matrix, full_out_file_name)
elif output_format == "text":
with open(full_out_file_name, "w") as f:
if len(matrix.shape) == 1:
for x in matrix:
f.write("%s\n" % (x))
else:
raise Exception(
"'text' format supports only mono dimensional array\
and not matrices")
logger.info("Written file: %s" % full_out_file_name)
示例21
def dump(obj, output_dir_path='', out_file_name=''):
"""dump."""
if not os.path.exists(output_dir_path):
os.mkdir(output_dir_path)
full_out_file_name = os.path.join(output_dir_path, out_file_name) + ".pkl"
joblib.dump(obj, full_out_file_name)
示例22
def save(self, model_name):
"""save."""
joblib.dump(self, model_name, compress=1)
示例23
def save(filepath, obj):
"""Saves an object to the specified filepath using joblib.
joblib is like pickle but will save NumPy arrays as separate files for
greater efficiency.
:param filepath: str, path to save to
:obj filepath: object to save
"""
joblib.dump(obj, filepath)
示例24
def save_variables(save_path, variables=None, sess=None):
sess = sess or get_session()
variables = variables or tf.trainable_variables()
ps = sess.run(variables)
save_dict = {v.name: value for v, value in zip(variables, ps)}
os.makedirs(os.path.dirname(save_path), exist_ok=True)
joblib.dump(save_dict, save_path)
示例25
def save_variables(save_path, variables=None, sess=None):
sess = sess or get_session()
variables = variables or tf.trainable_variables()
ps = sess.run(variables)
save_dict = {v.name: value for v, value in zip(variables, ps)}
dirname = os.path.dirname(save_path)
if any(dirname):
os.makedirs(dirname, exist_ok=True)
joblib.dump(save_dict, save_path)
示例26
def save_variables(save_path, variables=None, sess=None):
sess = sess or get_session()
variables = variables or tf.trainable_variables()
ps = sess.run(variables)
save_dict = {v.name: value for v, value in zip(variables, ps)}
dirname = os.path.dirname(save_path)
if any(dirname):
os.makedirs(dirname, exist_ok=True)
joblib.dump(save_dict, save_path)
示例27
def save_variables(save_path, variables=None, sess=None):
sess = sess or get_session()
variables = variables or tf.trainable_variables()
ps = sess.run(variables)
save_dict = {v.name: value for v, value in zip(variables, ps)}
dirname = os.path.dirname(save_path)
if any(dirname):
os.makedirs(dirname, exist_ok=True)
joblib.dump(save_dict, save_path)
示例28
def save(self, model, out_path):
if model is not None and out_path is not None:
self.log("Save model to " + out_path)
check_and_create_dir(out_path)
joblib.dump(model, out_path)
示例29
def dump_reader(self, filename):
""" Dump reader model to a .joblib object
"""
self.cpu()
joblib.dump(self.reader, filename)
if torch.cuda.is_available():
self.cuda()
示例30
def save(self, filename: str):
"""
Saves model to a custom file format
filename : str
Name of file to save. Don't include filename extensions
Extensions are added automatically
File format is a zipfile with joblib dump (pickle-like) + dependency metata
Metadata is checked on load.
Includes validation and metadata to avoid Pickle deserialization gotchas
See here Alex Gaynor PyCon 2014 talk "Pickles are for Delis"
for more info on why we introduce this additional check
"""
if '.zip' in filename:
raise UserWarning("The file extension '.zip' is automatically added"
+ " to saved models. The name will have redundant extensions")
sysverinfo = sys.version_info
meta_data = {
"python_": f'{sysverinfo[0]}.{sysverinfo[1]}',
"skl_": sklearn.__version__[:-2],
"pd_": pd.__version__[:-2],
"csrg_": cg.__version__[:-2]
}
with tempfile.TemporaryDirectory() as temp_dir:
joblib.dump(self, os.path.join(temp_dir, self.f_model), compress=True)
with open(os.path.join(temp_dir, self.f_mdata), 'w') as f:
json.dump(meta_data, f)
filename = shutil.make_archive(filename, 'zip', temp_dir)