当前位置：源码 > Python源码 >

Python源码示例：joblib.dump()

示例1

def save_model(self):
        """
        Saves all necessary model state information for classification work to disk.
        :return: True if it succeeded and False otherwise.
        """
        # if we aren't keeping the extracted file details to reproduce the analysis, let's clear that data and
        # save the model.  It's not needed to perform basic predictions on new files.
        if self.retain_sample_contents is False:
            metadata = {'filemodified', 'extracted_vba', 'filename_vba', 'filepath', 'filename', 'function_names',
                        'filesize', 'filemodified', 'stream_path'}
            metadata_delete = list(metadata & set(self.modeldata.columns))
            self.modeldata.drop(metadata_delete, axis=1, inplace=True)

        try:
            saved_model = {'modeldata': self.modeldata,
                           'features': self.features,
                           'model_cntvect_cnts_array': self.modeldata_cnts.toarray()
                           }

            joblib.dump(saved_model, self.modeldata_pickle)

        except Exception as e:
            raise IOError("Error saving model data to disk: {}".format(str(e)))
            return False
        return True

示例2

def save(self, filepath):
        joblib.dump(self, filepath, 3)

示例3

def save_to_disk(dataset, filename, compress=3):
  """Save a dataset to file."""
  if filename.endswith('.joblib'):
    joblib.dump(dataset, filename, compress=compress)
  elif filename.endswith('.npy'):
    np.save(filename, dataset)
  else:
    raise ValueError("Filename with unsupported extension: %s" % filename)

示例4

def save_metadata(tasks, metadata_df, data_dir):
  """
  Saves the metadata for a DiskDataset
  Parameters
  ----------
  tasks: list of str
    Tasks of DiskDataset
  metadata_df: pd.DataFrame
  data_dir: str
    Directory to store metadata
  Returns
  -------
  """
  if isinstance(tasks, np.ndarray):
    tasks = tasks.tolist()
  metadata_filename = os.path.join(data_dir, "metadata.csv.gzip")
  tasks_filename = os.path.join(data_dir, "tasks.json")
  with open(tasks_filename, 'w') as fout:
    json.dump(tasks, fout)
  metadata_df.to_csv(metadata_filename, index=False, compression='gzip')

示例5

def save_metadata(tasks, metadata_df, data_dir):
  """
  Saves the metadata for a DiskDataset
  Parameters
  ----------
  tasks: list of str
    Tasks of DiskDataset
  metadata_df: pd.DataFrame
  data_dir: str
    Directory to store metadata
  Returns
  -------
  """
  if isinstance(tasks, np.ndarray):
    tasks = tasks.tolist()
  metadata_filename = os.path.join(data_dir, "metadata.csv.gzip")
  tasks_filename = os.path.join(data_dir, "tasks.json")
  with open(tasks_filename, 'w') as fout:
    json.dump(tasks, fout)
  metadata_df.to_csv(metadata_filename, index=False, compression='gzip')

示例6

def _tf_simple_save(self, itr=None):
        """
        Uses simple_save to save a trained model, plus info to make it easy
        to associated tensors to variables after restore. 
        """
        if proc_id()==0:
            assert hasattr(self, 'tf_saver_elements'), \
                "First have to setup saving with self.setup_tf_saver"
            fpath = 'tf1_save' + ('%d'%itr if itr is not None else '')
            fpath = osp.join(self.output_dir, fpath)
            if osp.exists(fpath):
                # simple_save refuses to be useful if fpath already exists,
                # so just delete fpath if it's there.
                shutil.rmtree(fpath)
            tf.saved_model.simple_save(export_dir=fpath, **self.tf_saver_elements)
            joblib.dump(self.tf_saver_info, osp.join(fpath, 'model_info.pkl'))

示例7

def create_model_from_signatures(sig_csv_path, model_out, sig_datatype=np.int32):
    """
    Takes a .csv file containing class signatures - produced by extract_features_to_csv - and uses it to train
    and pickle a scikit-learn model.

    Parameters
    ----------
    sig_csv_path
        The path to the signatures file
    model_out
        The location to save the pickled model to.
    sig_datatype
        The datatype to read the csv as. Defaults to int32.

    Notes
    -----
    At present, the model is an ExtraTreesClassifier arrived at by tpot:
    model = ens.ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.55, min_samples_leaf=2,
                                 min_samples_split=16, n_estimators=100, n_jobs=4, class_weight='balanced')
    """
    model = ens.ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.55, min_samples_leaf=2,
                                     min_samples_split=16, n_estimators=100, n_jobs=4, class_weight='balanced')
    features, labels = load_signatures(sig_csv_path, sig_datatype)
    model.fit(features, labels)
    joblib.dump(model, model_out)

示例8

def save_itr_params(itr, params, prefix='', save_anyway=False):
    if _snapshot_dir:
        if len(prefix) > 0:
            prefix = prefix + '_'
        if _snapshot_mode == 'all':
            file_name = osp.join(_snapshot_dir, prefix + 'itr_%d.pkl' % itr)
            pickle.dump(params, open(file_name, "wb"))
        elif _snapshot_mode == 'last':
            # override previous params
            file_name = osp.join(_snapshot_dir, prefix + 'params.pkl')
            pickle.dump(params, open(file_name, "wb"))
        elif _snapshot_mode == "gap":
            if save_anyway or itr % _snapshot_gap == 0:
                file_name = osp.join(_snapshot_dir, prefix + 'itr_%d.pkl' % itr)
                pickle.dump(params, open(file_name, "wb"))
        elif _snapshot_mode == "gap_and_last":
            if save_anyway or itr % _snapshot_gap == 0:
                file_name = osp.join(_snapshot_dir, prefix + 'itr_%d.pkl' % itr)
                pickle.dump(params, open(file_name, "wb"))
            file_name = osp.join(_snapshot_dir, prefix + 'params.pkl')
            pickle.dump(params, open(file_name, "wb"))
        elif _snapshot_mode == 'none':
            pass
        else:
            raise NotImplementedError

示例9

def main():
    args = parse_args()

    features_extractor = FaceFeaturesExtractor()
    embeddings, labels, class_to_idx = load_data(args, features_extractor)
    clf = train(args, embeddings, labels)

    idx_to_class = {v: k for k, v in class_to_idx.items()}

    target_names = map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0]))
    print(metrics.classification_report(labels, clf.predict(embeddings), target_names=list(target_names)))

    if not os.path.isdir(MODEL_DIR_PATH):
        os.mkdir(MODEL_DIR_PATH)
    model_path = os.path.join('model', 'face_recogniser.pkl')
    joblib.dump(FaceRecogniser(features_extractor, clf, idx_to_class), model_path)

示例10

def save(self, filename, ensure_compatibility = True):
        """
        Pickle a class instance. E.g., corex.save('saved.pkl')
        When set to True, ensure_compatibility resets self.words before saving
        a pickle to avoid Unicode loading issues usually seen when trying to load
        the pickle from a Python 2 implementation.
        It is recommended to set it to False if you know you are going to load the
        model in an all Python 3 implementation as self.words is required for fetching
        the topics via get_topics().
        """
        # Avoid saving words with object.
        #TODO: figure out why Unicode sometimes causes an issue with loading after pickling
        temp_words = self.words
        if ensure_compatibility and (self.words is not None):
            self.words = None

        # Save CorEx object
        import pickle
        if path.dirname(filename) and not path.exists(path.dirname(filename)):
            makedirs(path.dirname(filename))
        pickle.dump(self, open(filename, 'wb'), protocol=-1)
        # Restore words to CorEx object
        self.words = temp_words

示例11

def fit_log_reg(X, y):
    # fits a logistic regression model to your data
    model = LogisticRegression(class_weight='balanced')
    model.fit(X, y)
    print('Train size: ', len(X))
    train_score = model.score(X, y)
    print('Training accuracy', train_score)
    ypredz = model.predict(X)
    cm = confusion_matrix(y, ypredz)
    # tn, fp, fn, tp = cm.ravel()
    tn, _, _, tp = cm.ravel()

    # true positive rate When it's actually yes, how often does it predict yes?
    recall = float(tp) / np.sum(cm, axis=1)[1]
    # Specificity: When it's actually no, how often does it predict no?
    specificity = float(tn) / np.sum(cm, axis=1)[0]

    print('Recall/ Like accuracy', recall)
    print('specificity/ Dislike accuracy', specificity)

    # save the model
    joblib.dump(model, 'log_reg_model.pkl')

示例12

def write_to_file(obj, filename, path=None, overwrite=False):
    if path is not None:
        filename = os.path.join(path, filename)
    filename = os.path.abspath(filename)
    output_dir = os.path.dirname(filename)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if not overwrite and os.path.exists(filename):
        print("WARNING: file already exists %s; not overwriting." % (filename,))
        pass
        # Check to see whether same as one on disk?
        # When to overwrite?
    else:
        print("Writing to %s" % (filename,))
        joblib.dump(obj, filename)


# Special-case stuff
# ------------------

示例13

def test_model_joblib_serialization(teardown, dump, load):
    x_data = iris.data
    y_t_data = iris.target
    random_state = 123
    n_components = 2

    stacked_model_baikal = make_naive_stacked_model(
        n_components, random_state, x_data, y_t_data
    )
    y_pred_baikal = stacked_model_baikal.predict(x_data)

    # Persist model to a file
    f = tempfile.TemporaryFile()
    dump(stacked_model_baikal, f)
    f.seek(0)
    stacked_model_baikal_2 = load(f)
    y_pred_baikal_2 = stacked_model_baikal_2.predict(x_data)

    assert_array_equal(y_pred_baikal_2, y_pred_baikal)

示例14

def read_grid_pkl(tmpdir):
    expected = {'lon_min_x': 116.319236,
                'lat_min_y': 39.984094,
                'grid_size_lat_y': 5,
                'grid_size_lon_x': 5,
                'cell_size_by_degree': 0.0001353464801860623
                }
    d = tmpdir.mkdir('core')

    file_write_default = d.join('test_read_grid.pkl')
    filename_write_default = os.path.join(
        file_write_default.dirname, file_write_default.basename
    )

    grid = _default_grid()

    with open(filename_write_default, 'wb') as f:
        joblib.dump(grid.get_grid(), f)

    saved_grid = grid.read_grid_pkl(filename_write_default)

    assert_equal(saved_grid, expected)

示例15

def test_modelpipeline_pickling_preserves_template_ids(
        version, train_id, predict_id):
    # Test that pickling a ModelPipeline object preserves the template IDs
    # that have already been set during object instantiation.
    with TemporaryDirectory() as temp_dir:
        mp = _model.ModelPipeline('wf', 'dv', civisml_version=version)

        # Before pickling, make sure the template IDs are set as expected
        assert mp.train_template_id == train_id
        assert mp.predict_template_id == predict_id

        pickle_path = os.path.join(temp_dir, 'model.pkl')

        with open(pickle_path, 'wb') as f:
            pickle.dump(mp, f)

        with open(pickle_path, 'rb') as f:
            mp_unpickled = pickle.load(f)

        # After unpickling, the template IDs should remain.
        assert mp_unpickled.train_template_id == train_id
        assert mp_unpickled.predict_template_id == predict_id

示例16

def _tf_simple_save(self, itr=None):
        """
        Uses simple_save to save a trained model, plus info to make it easy
        to associated tensors to variables after restore. 
        """
        if proc_id()==0:
            assert hasattr(self, 'tf_saver_elements'), \
                "First have to setup saving with self.setup_tf_saver"
            fpath = 'simple_save' + ('%d'%itr if itr is not None else '')
            fpath = osp.join(self.output_dir, fpath)
            if osp.exists(fpath):
                # simple_save refuses to be useful if fpath already exists,
                # so just delete fpath if it's there.
                shutil.rmtree(fpath)
            tf.saved_model.simple_save(export_dir=fpath, **self.tf_saver_elements)
            joblib.dump(self.tf_saver_info, osp.join(fpath, 'model_info.pkl'))

示例17

def save_itr_params(itr, params):
    if _snapshot_dir:
        if _snapshot_mode == 'all':
            file_name = osp.join(_snapshot_dir, 'itr_%d.pkl' % itr)
            joblib.dump(params, file_name, compress=3)
        elif _snapshot_mode == 'last':
            # override previous params
            file_name = osp.join(_snapshot_dir, 'params.pkl')
            joblib.dump(params, file_name, compress=3)
        elif _snapshot_mode == "gap":
            if itr % _snapshot_gap == 0:
                file_name = osp.join(_snapshot_dir, 'itr_%d.pkl' % itr)
                joblib.dump(params, file_name, compress=3)
        elif _snapshot_mode == 'none':
            pass
        else:
            raise NotImplementedError

示例18

def log_parameters(log_file, args, classes):
    log_params = {}
    for param_name, param_value in args.__dict__.items():
        if any([param_name.startswith(x) for x in list(classes.keys())]):
            continue
        log_params[param_name] = param_value
    for name, cls in classes.items():
        if isinstance(cls, type):
            params = get_all_parameters(cls, args)
            params["_name"] = getattr(args, name)
            log_params[name] = params
        else:
            log_params[name] = getattr(cls, "__kwargs", dict())
            log_params[name][
                "_name"] = cls.__module__ + "." + cls.__class__.__name__
    mkdir_p(os.path.dirname(log_file))
    with open(log_file, "w") as f:
        json.dump(log_params, f, indent=2, sort_keys=True)

示例19

def log_parameters_lite(log_file, args):
    log_params = {}
    for param_name, param_value in args.__dict__.items():
        log_params[param_name] = param_value
    if args.args_data is not None:
        stub_method = pickle.loads(base64.b64decode(args.args_data))
        method_args = stub_method.kwargs
        log_params["json_args"] = dict()
        for k, v in list(method_args.items()):
            log_params["json_args"][k] = stub_to_json(v)
        kwargs = stub_method.obj.kwargs
        for k in ["baseline", "env", "policy"]:
            if k in kwargs:
                log_params["json_args"][k] = stub_to_json(kwargs.pop(k))
        log_params["json_args"]["algo"] = stub_to_json(stub_method.obj)
    mkdir_p(os.path.dirname(log_file))
    with open(log_file, "w") as f:
        json.dump(log_params, f, indent=2, sort_keys=True, cls=MyEncoder)

示例20

def store_matrix(matrix='',
                 output_dir_path='',
                 out_file_name='',
                 output_format=''):
    """store_matrix."""
    if not os.path.exists(output_dir_path):
        os.mkdir(output_dir_path)
    full_out_file_name = os.path.join(output_dir_path, out_file_name)
    if output_format == "MatrixMarket":
        if len(matrix.shape) == 1:
            raise Exception(
                "'MatrixMarket' format supports only 2D dimensional array\
                and not vectors")
        else:
            io.mmwrite(full_out_file_name, matrix, precision=None)
    elif output_format == "numpy":
        np.save(full_out_file_name, matrix)
    elif output_format == "joblib":
        joblib.dump(matrix, full_out_file_name)
    elif output_format == "text":
        with open(full_out_file_name, "w") as f:
            if len(matrix.shape) == 1:
                for x in matrix:
                    f.write("%s\n" % (x))
            else:
                raise Exception(
                    "'text' format supports only mono dimensional array\
                    and not matrices")
    logger.info("Written file: %s" % full_out_file_name)

示例21

def dump(obj, output_dir_path='', out_file_name=''):
    """dump."""
    if not os.path.exists(output_dir_path):
        os.mkdir(output_dir_path)
    full_out_file_name = os.path.join(output_dir_path, out_file_name) + ".pkl"
    joblib.dump(obj, full_out_file_name)

示例22

def save(self, model_name):
        """save."""
        joblib.dump(self, model_name, compress=1)

示例23

def save(filepath, obj):
    """Saves an object to the specified filepath using joblib.

    joblib is like pickle but will save NumPy arrays as separate files for
    greater efficiency.

    :param filepath: str, path to save to
    :obj filepath: object to save
    """

    joblib.dump(obj, filepath)

示例24

def save_variables(save_path, variables=None, sess=None):
    sess = sess or get_session()
    variables = variables or tf.trainable_variables()
    
    ps = sess.run(variables)
    save_dict = {v.name: value for v, value in zip(variables, ps)}
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    joblib.dump(save_dict, save_path)

示例25

def save_variables(save_path, variables=None, sess=None):
    sess = sess or get_session()
    variables = variables or tf.trainable_variables()

    ps = sess.run(variables)
    save_dict = {v.name: value for v, value in zip(variables, ps)}
    dirname = os.path.dirname(save_path)
    if any(dirname):
        os.makedirs(dirname, exist_ok=True)
    joblib.dump(save_dict, save_path)

示例26

def save_variables(save_path, variables=None, sess=None):
    sess = sess or get_session()
    variables = variables or tf.trainable_variables()

    ps = sess.run(variables)
    save_dict = {v.name: value for v, value in zip(variables, ps)}
    dirname = os.path.dirname(save_path)
    if any(dirname):
        os.makedirs(dirname, exist_ok=True)
    joblib.dump(save_dict, save_path)

示例27

def save_variables(save_path, variables=None, sess=None):
    sess = sess or get_session()
    variables = variables or tf.trainable_variables()

    ps = sess.run(variables)
    save_dict = {v.name: value for v, value in zip(variables, ps)}
    dirname = os.path.dirname(save_path)
    if any(dirname):
        os.makedirs(dirname, exist_ok=True)
    joblib.dump(save_dict, save_path)

示例28

def save(self, model, out_path):
        if model is not None and out_path is not None:
            self.log("Save model to " + out_path)
            check_and_create_dir(out_path)
            joblib.dump(model, out_path)

示例29

def dump_reader(self, filename):
        """ Dump reader model to a .joblib object
        """
        self.cpu()
        joblib.dump(self.reader, filename)
        if torch.cuda.is_available():
            self.cuda()

示例30

def save(self, filename: str):
        """
        Saves model to a custom file format
        
        filename : str
            Name of file to save. Don't include filename extensions
            Extensions are added automatically
        
        File format is a zipfile with joblib dump (pickle-like) + dependency metata
        Metadata is checked on load.
        
        Includes validation and metadata to avoid Pickle deserialization gotchas
        See here Alex Gaynor PyCon 2014 talk "Pickles are for Delis"
            for more info on why we introduce this additional check
        """
        if '.zip' in filename:
            raise UserWarning("The file extension '.zip' is automatically added"
                + " to saved models. The name will have redundant extensions")
        sysverinfo = sys.version_info
        meta_data = {
            "python_": f'{sysverinfo[0]}.{sysverinfo[1]}',
            "skl_": sklearn.__version__[:-2],
            "pd_": pd.__version__[:-2],
            "csrg_": cg.__version__[:-2]
        }
        with tempfile.TemporaryDirectory() as temp_dir:
            joblib.dump(self, os.path.join(temp_dir, self.f_model), compress=True)
            with open(os.path.join(temp_dir, self.f_mdata), 'w') as f:
                json.dump(meta_data, f)
            filename = shutil.make_archive(filename, 'zip', temp_dir)

Python源码示例：joblib.dump()

微信关注