Python源码示例:tensorflow.python.lib.io.file.FileIO()
示例1
def make_request_json(self, uri, output_json):
"""Produces a JSON request suitable to send to CloudML Prediction API.
Args:
uri: The input image URI.
output_json: File handle of the output json where request will be written.
"""
def _open_file_read_binary(uri):
try:
return file_io.FileIO(uri, mode='rb')
except errors.InvalidArgumentError:
return file_io.FileIO(uri, mode='r')
with open(output_json, 'w') as outf:
with _open_file_read_binary(uri) as f:
image_bytes = f.read()
image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
image = image.resize((299, 299), Image.BILINEAR)
resized_image = io.BytesIO()
image.save(resized_image, format='JPEG')
encoded_image = base64.b64encode(resized_image.getvalue())
row = json.dumps({'key': uri, 'image_bytes': {'b64': encoded_image}})
outf.write(row)
outf.write('\n')
示例2
def process(self, element):
from tensorflow.python.lib.io import file_io as tf_file_io
uri, label_id = element
try:
with tf_file_io.FileIO(uri, 'r') as f:
img = Image.open(f).convert('RGB')
# A variety of different calling libraries throw different exceptions here.
# They all correspond to an unreadable file so we treat them equivalently.
# pylint: disable broad-except
except Exception as e:
logging.exception('Error processing image %s: %s', uri, str(e))
error_count.inc()
return
# Convert to desired format and output.
output = cStringIO.StringIO()
img.save(output, 'jpeg')
image_bytes = output.getvalue()
yield uri, label_id, image_bytes
示例3
def load_images(image_files, resize=True):
"""Load images from files and optionally resize it."""
images = []
for image_file in image_files:
with file_io.FileIO(image_file, 'r') as ff:
images.append(ff.read())
if resize is False:
return images
# To resize, run a tf session so we can reuse 'decode_and_resize()'
# which is used in prediction graph. This makes sure we don't lose
# any quality in prediction, while decreasing the size of the images
# submitted to the model over network.
image_str_tensor = tf.placeholder(tf.string, shape=[None])
image = tf.map_fn(resize_image, image_str_tensor, back_prop=False)
feed_dict = collections.defaultdict(list)
feed_dict[image_str_tensor.name] = images
with tf.Session() as sess:
images_resized = sess.run(image, feed_dict=feed_dict)
return images_resized
示例4
def read_vocab_file(file_path):
"""Reads a vocab file to memeory.
Args:
file_path: Each line of the vocab is in the form "token,example_count"
Returns:
Two lists, one for the vocab, and one for just the example counts.
"""
with file_io.FileIO(file_path, 'r') as f:
vocab_pd = pd.read_csv(
f,
header=None,
names=['vocab', 'count'],
dtype=str, # Prevent pd from converting numerical categories.
na_filter=False) # Prevent pd from converting 'NA' to a NaN.
vocab = vocab_pd['vocab'].tolist()
ex_count = vocab_pd['count'].astype(int).tolist()
return vocab, ex_count
示例5
def read_vocab_file(file_path):
"""Reads a vocab file to memeory.
Args:
file_path: Each line of the vocab is in the form "token,example_count"
Returns:
Two lists, one for the vocab, and one for just the example counts.
"""
with file_io.FileIO(file_path, 'r') as f:
vocab_pd = pd.read_csv(
f,
header=None,
names=['vocab', 'count'],
dtype=str, # Prevent pd from converting numerical categories.
na_filter=False) # Prevent pd from converting 'NA' to a NaN.
vocab = vocab_pd['vocab'].tolist()
ex_count = vocab_pd['count'].astype(int).tolist()
return vocab, ex_count
示例6
def _download_images(data, img_cols):
"""Download images given image columns."""
images = collections.defaultdict(list)
for d in data:
for img_col in img_cols:
if d.get(img_col, None):
if isinstance(d[img_col], Image.Image):
# If it is already an Image, just copy and continue.
images[img_col].append(d[img_col])
else:
# Otherwise it is image url. Load the image.
with file_io.FileIO(d[img_col], 'rb') as fi:
im = Image.open(fi)
images[img_col].append(im)
else:
images[img_col].append('')
return images
示例7
def read_metadata(path):
"""Load metadata in JSON format from a path into a new DatasetMetadata."""
schema_file = os.path.join(path, 'schema.pbtxt')
legacy_schema_file = os.path.join(path, 'v1-json', 'schema.json')
if file_io.file_exists(schema_file):
text_proto = file_io.FileIO(schema_file, 'r').read()
schema_proto = text_format.Parse(text_proto, schema_pb2.Schema(),
allow_unknown_extension=True)
elif file_io.file_exists(legacy_schema_file):
schema_json = file_io.FileIO(legacy_schema_file, 'r').read()
schema_proto = _parse_schema_json(schema_json)
else:
raise IOError(
'Schema file {} does not exist and neither did legacy format file '
'{}'.format(schema_file, legacy_schema_file))
return dataset_metadata.DatasetMetadata(schema_proto)
示例8
def _write_vocabulary(vocab_counter, vocab_size, destination):
"""Write the top vocab_size number of words to a file.
Returns : A word to index mapping python dictionary for the vocabulary.
"""
# Remove words that occur less than 5 times
vocab_counter = collections.Counter(
{k: v for k, v in vocab_counter.iteritems() if v > 4})
# Filter top words
vocab_list = vocab_counter.most_common(
min(len(vocab_counter), vocab_size - 1))
# Add __UNK__ token to the start of the top_words
vocab_list.insert(0, (__UNK__, 0))
# Write the top_words to destination (line by line fashion)
with file_io.FileIO(destination, 'w+') as f:
for word in vocab_list:
f.write(u'{} {}\n'.format(word[0], word[1]))
# Create a rev_vocab dictionary that returns the index of each word
return dict([(word, i)
for (i, (word, word_count)) in enumerate(vocab_list)])
示例9
def _check_params(gcs_working_dir, version):
"""Check if the data already exists by checking for file 'params.json'."""
data_dir = '{}/v{}/data'.format(gcs_working_dir, version)
# Prefix matching for the path
bucket_name, prefix = data_dir[5:].split('/', 1)
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blobs = bucket.list_blobs(prefix=prefix)
for blob in blobs:
if blob.name.rsplit('/', 1)[-1] == PARAMS_FILE_NAME:
with file_io.FileIO('{}/{}'.format(data_dir, PARAMS_FILE_NAME),
'r') as f:
return json.load(f)
示例10
def write_cam(file, cam):
# f = open(file, "w")
f = file_io.FileIO(file, "w")
f.write('extrinsic\n')
for i in range(0, 4):
for j in range(0, 4):
f.write(str(cam[0][i][j]) + ' ')
f.write('\n')
f.write('\n')
f.write('intrinsic\n')
for i in range(0, 3):
for j in range(0, 3):
f.write(str(cam[1][i][j]) + ' ')
f.write('\n')
f.write('\n' + str(cam[1][3][0]) + ' ' + str(cam[1][3][1]) + ' ' + str(cam[1][3][2]) + ' ' + str(cam[1][3][3]) + '\n')
f.close()
示例11
def load_tfrecord(fname):
"""Load tfrecord dataset.
Args:
fname (str): filename of the .yml metadata file to be loaded.
dtypes (dict): dtype of dataset.
"""
# dataset
with FileIO(fname, 'r') as f:
format_dict = (yaml.safe_load(f)['format'])
dtypes = {k: format_dict[k]['dtype'] for k in format_dict.keys()}
shapes = {k: format_dict[k]['shape'] for k in format_dict.keys()}
feature_dict = {k: tf.FixedLenFeature([], tf.string) for k in dtypes}
def parser(example): return tf.parse_single_example(example, feature_dict)
def converter(tensors):
tensors = {k: tf.parse_tensor(v, dtypes[k])
for k, v in tensors.items()}
[v.set_shape(shapes[k]) for k, v in tensors.items()]
return tensors
tfr = '.'.join(fname.split('.')[:-1]+['tfr'])
dataset = tf.data.TFRecordDataset(tfr).map(parser).map(converter)
return dataset
示例12
def load_class_labels(label_filename):
"""Load class labels.
Assumes the data directory is left unchanged from the original zip.
Args:
root_directory (str): the dataset's root directory
Returns:
arr: an array of class labels
"""
class_labels = []
header = True
with file_io.FileIO(label_filename, mode='r') as file:
for line in file.readlines():
if header:
header = False
continue
line = line.rstrip()
label = line.split('\t')[-1]
class_labels.append(label)
return numpy.array(class_labels)
示例13
def _load_class_labels(label_filename):
"""Load class labels.
Assumes the data directory is left unchanged from the original zip.
Args:
root_directory (str): the dataset's root directory
Returns:
List[(int, str)]: a list of class ids and labels
"""
class_labels = []
header = True
with file_io.FileIO(label_filename, mode='r') as file:
for line in file.readlines():
if header:
class_labels.append((0, 'none'))
header = False
continue
line = line.rstrip()
line = line.split('\t')
label = line[-1]
label_id = int(line[0])
class_labels.append((label_id, label))
return class_labels
示例14
def parse_schema_file(schema_path): # type: (str) -> Schema
"""
Read a schema file and return the proto object.
"""
assert file_io.file_exists(schema_path), "File not found: {}".format(schema_path)
schema = Schema()
with file_io.FileIO(schema_path, "rb") as f:
schema.ParseFromString(f.read())
return schema
示例15
def settings(cls, settings_dir, settings_filename=None):
# type: (str, str) -> List[Dict[str, Any]]
"""
Read a Featran settings file and return a list of settings
:param settings_dir: Path to the directory containing the settings file
:param settings_filename: Filename of the Featran Settings JSON file
:return: A List of Featran Settings
"""
f = cls.__get_featran_settings_file(settings_dir, settings_filename)
with file_io.FileIO(f, "r") as fio:
settings = json.load(fio)
return settings
示例16
def _save_np(absolute_fn, array):
if absolute_fn.startswith('gs://'):
with file_io.FileIO(absolute_fn, 'w') as f:
np.save(f, array)
else:
np.save(absolute_fn, array)
示例17
def write_predictions(self):
"""Run one round of predictions and write predictions to csv file."""
num_eval_batches = self.num_eval_batches + 1
with tf.Graph().as_default() as graph:
self.tensors = self.model.build_eval_graph(self.eval_data_paths,
self.batch_size)
self.saver = tf.train.Saver()
self.sv = tf.train.Supervisor(
graph=graph,
logdir=self.output_path,
summary_op=None,
global_step=None,
saver=self.saver)
last_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
with self.sv.managed_session(
master='', start_standard_services=False) as session:
self.sv.saver.restore(session, last_checkpoint)
with file_io.FileIO(os.path.join(self.output_path,
'predictions.csv'), 'w') as f:
to_run = [self.tensors.keys] + self.tensors.predictions
self.sv.start_queue_runners(session)
last_log_progress = 0
for i in range(num_eval_batches):
progress = i * 100 // num_eval_batches
if progress > last_log_progress:
logging.info('%3d%% predictions processed', progress)
last_log_progress = progress
res = session.run(to_run)
for element in range(len(res[0])):
f.write('%s' % res[0][element])
for prediction in res[1:]:
f.write(',')
f.write(str(prediction[element]))
f.write('\n')
示例18
def process(self, element):
try:
uri, label_ids = element.element
except AttributeError:
uri, label_ids = element
# TF will enable 'rb' in future versions, but until then, 'r' is
# required.
def _open_file_read_binary(uri):
try:
return file_io.FileIO(uri, mode='rb')
except errors.InvalidArgumentError:
return file_io.FileIO(uri, mode='r')
try:
with _open_file_read_binary(uri) as f:
image_bytes = f.read()
img = Image.open(io.BytesIO(image_bytes)).convert('RGB')
# A variety of different calling libraries throw different exceptions here.
# They all correspond to an unreadable file so we treat them equivalently.
except Exception as e: # pylint: disable=broad-except
logging.exception('Error processing image %s: %s', uri, str(e))
error_count.inc()
return
# Convert to desired format and output.
output = io.BytesIO()
img.save(output, Default.FORMAT)
image_bytes = output.getvalue()
yield uri, label_ids, image_bytes
示例19
def copy_file_to_gcs(job_dir, file_path):
with file_io.FileIO(file_path, mode='rb') as input_f:
with file_io.FileIO(os.path.join(job_dir, file_path), mode='w+') as fp:
fp.write(input_f.read())
示例20
def write_predictions(self):
"""Run one round of predictions and write predictions to csv file."""
num_eval_batches = self.num_eval_batches + 1
with tf.Graph().as_default() as graph:
self.tensors = self.model.build_eval_graph(self.eval_data_paths,
self.batch_size)
self.saver = tf.train.Saver()
self.sv = tf.train.Supervisor(
graph=graph,
logdir=self.output_path,
summary_op=None,
global_step=None,
saver=self.saver)
last_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
with self.sv.managed_session(
master='', start_standard_services=False) as session:
self.sv.saver.restore(session, last_checkpoint)
with file_io.FileIO(os.path.join(self.output_path,
'predictions.csv'), 'w') as f:
to_run = [self.tensors.keys] + self.tensors.predictions
self.sv.start_queue_runners(session)
last_log_progress = 0
for i in range(num_eval_batches):
progress = i * 100 // num_eval_batches
if progress > last_log_progress:
logging.info('%3d%% predictions processed', progress)
last_log_progress = progress
res = session.run(to_run)
for element in range(len(res[0])):
f.write('%s' % res[0][element])
for prediction in res[1:]:
f.write(',')
f.write(str(prediction[element]))
f.write('\n')
示例21
def process(self, element):
try:
uri, label_ids = element.element
except AttributeError:
uri, label_ids = element
# TF will enable 'rb' in future versions, but until then, 'r' is
# required.
def _open_file_read_binary(uri):
try:
return file_io.FileIO(uri, mode='rb')
except errors.InvalidArgumentError:
return file_io.FileIO(uri, mode='r')
try:
with _open_file_read_binary(uri) as f:
image_bytes = f.read()
img = Image.open(io.BytesIO(image_bytes)).convert('RGB')
# A variety of different calling libraries throw different exceptions here.
# They all correspond to an unreadable file so we treat them equivalently.
except Exception as e: # pylint: disable=broad-except
logging.exception('Error processing image %s: %s', uri, str(e))
error_count.inc()
return
# Convert to desired format and output.
output = io.BytesIO()
img.save(output, Default.FORMAT)
image_bytes = output.getvalue()
yield uri, label_ids, image_bytes
示例22
def _read_file(filename):
"""Reads a file containing `GraphDef` and returns the protocol buffer.
Args:
filename: `graph_def` filename including the path.
Returns:
A `GraphDef` protocol buffer.
Raises:
IOError: If the file doesn't exist, or cannot be successfully parsed.
"""
graph_def = graph_pb2.GraphDef()
if not file_io.file_exists(filename):
raise IOError("File %s does not exist." % filename)
# First try to read it as a binary file.
file_content = file_io.FileIO(filename, "rb").read()
try:
graph_def.ParseFromString(file_content)
return graph_def
except Exception: # pylint: disable=broad-except
pass
# Next try to read it as a text file.
try:
text_format.Merge(file_content, graph_def)
except text_format.ParseError as e:
raise IOError("Cannot parse file %s: %s." % (filename, str(e)))
return graph_def
示例23
def read_meta_graph_file(filename):
"""Reads a file containing `MetaGraphDef` and returns the protocol buffer.
Args:
filename: `meta_graph_def` filename including the path.
Returns:
A `MetaGraphDef` protocol buffer.
Raises:
IOError: If the file doesn't exist, or cannot be successfully parsed.
"""
meta_graph_def = meta_graph_pb2.MetaGraphDef()
if not file_io.file_exists(filename):
raise IOError("File %s does not exist." % filename)
# First try to read it as a binary file.
file_content = file_io.FileIO(filename, "rb").read()
try:
meta_graph_def.ParseFromString(file_content)
return meta_graph_def
except Exception: # pylint: disable=broad-except
pass
# Next try to read it as a text file.
try:
text_format.Merge(file_content.decode("utf-8"), meta_graph_def)
except text_format.ParseError as e:
raise IOError("Cannot parse file %s: %s." % (filename, str(e)))
return meta_graph_def
示例24
def _read_tensor_tsv_file(fpath):
with file_io.FileIO(fpath, 'r') as f:
tensor = []
for line in f:
if line:
tensor.append(list(map(float, line.rstrip('\n').split('\t'))))
return np.array(tensor, dtype='float32')
示例25
def _serve_sprite_image(self, request):
run = request.args.get('run')
if not run:
return Respond(request, 'query parameter "run" is required', 'text/plain',
400)
name = request.args.get('name')
if name is None:
return Respond(request, 'query parameter "name" is required',
'text/plain', 400)
if run not in self.configs:
return Respond(request, 'Unknown run: "%s"' % run, 'text/plain', 400)
config = self.configs[run]
embedding_info = self._get_embedding(name, config)
if not embedding_info or not embedding_info.sprite.image_path:
return Respond(
request,
'No sprite image file found for tensor "%s" in the config file "%s"' %
(name, self.config_fpaths[run]), 'text/plain', 400)
fpath = os.path.expanduser(embedding_info.sprite.image_path)
fpath = _rel_to_abs_asset_path(fpath, self.config_fpaths[run])
if not file_io.file_exists(fpath) or file_io.is_directory(fpath):
return Respond(request, '"%s" does not exist or is directory' % fpath,
'text/plain', 400)
f = file_io.FileIO(fpath, 'rb')
encoded_image_string = f.read()
f.close()
image_type = imghdr.what(None, encoded_image_string)
mime_type = _IMGHDR_TO_MIMETYPE.get(image_type, _DEFAULT_IMAGE_MIMETYPE)
return Respond(request, encoded_image_string, mime_type)
示例26
def _read_tensor_file(fpath):
with file_io.FileIO(fpath, 'r') as f:
tensor = []
for line in f:
if line:
tensor.append(map(float, line.rstrip('\n').split('\t')))
return np.array(tensor, dtype='float32')
示例27
def _serve_bookmarks(self, request, query_params):
run = query_params.get('run')
if not run:
request.respond('query parameter "run" is required', 'text/plain', 400)
return
name = query_params.get('name')
if name is None:
request.respond('query parameter "name" is required', 'text/plain', 400)
return
if run not in self.configs:
request.respond('Unknown run: %s' % run, 'text/plain', 400)
return
config = self.configs[run]
fpath = self._get_bookmarks_file_for_tensor(name, config)
if not fpath:
request.respond(
'No bookmarks file found for tensor %s in the config file %s' %
(name, self.config_fpaths[run]), 'text/plain', 400)
return
if not file_io.file_exists(fpath) or file_io.is_directory(fpath):
request.respond('%s is not a file' % fpath, 'text/plain', 400)
return
bookmarks_json = None
with file_io.FileIO(fpath, 'r') as f:
bookmarks_json = f.read()
request.respond(bookmarks_json, 'application/json')
示例28
def _serve_sprite_image(self, request, query_params):
run = query_params.get('run')
if not run:
request.respond('query parameter "run" is required', 'text/plain', 400)
return
name = query_params.get('name')
if name is None:
request.respond('query parameter "name" is required', 'text/plain', 400)
return
if run not in self.configs:
request.respond('Unknown run: %s' % run, 'text/plain', 400)
return
config = self.configs[run]
embedding_info = self._get_embedding(name, config)
if not embedding_info or not embedding_info.sprite.image_path:
request.respond(
'No sprite image file found for tensor %s in the config file %s' %
(name, self.config_fpaths[run]), 'text/plain', 400)
return
fpath = embedding_info.sprite.image_path
if not file_io.file_exists(fpath) or file_io.is_directory(fpath):
request.respond(
'%s does not exist or is directory' % fpath, 'text/plain', 400)
return
f = file_io.FileIO(fpath, 'r')
encoded_image_string = f.read()
f.close()
image_type = imghdr.what(None, encoded_image_string)
mime_type = _IMGHDR_TO_MIMETYPE.get(image_type, _DEFAULT_IMAGE_MIMETYPE)
request.respond(encoded_image_string, mime_type)
示例29
def parse_model_config(json_file):
#the 'open' function can't support goole cloud platform
with file_io.FileIO(json_file, 'r') as f:
config = json.load(f)
return config
示例30
def load_from_file(path, mode):
if path.startswith('gs://'): # For google cloud ml-engine. pickle
from tensorflow.python.lib.io import file_io
f = file_io.FileIO(path + '.npy', mode)
loaded = np.load(f)
else:
fd = open(path, mode)
loaded = np.fromfile(file=fd, dtype=np.uint8)
return loaded