Python源码示例:pandas.io.json.json_normalize()
示例1
def main(opt):
scorer = COCOScorer()
gt_dataframe = json_normalize(
json.load(open(opt["videoinfo_json"]))['sentences'])
gts = convert_data_to_coco_scorer_format(gt_dataframe)
samples = {}
video_ids = open(opt['video_ids'])
sents = open(opt['pred'])
for video_id in video_ids:
# strip file extensions
video_id = video_id.split('.')[0]
sent = sents.readline().strip()
samples[video_id] = [{'image_id': video_id, 'caption': sent}]
video_ids.close()
sents.close()
with suppress_stdout_stderr():
valid_score = scorer.score(gts, samples, samples.keys())
print(valid_score)
示例2
def test_simple_normalize(self, state_data):
result = json_normalize(state_data[0], 'counties')
expected = DataFrame(state_data[0]['counties'])
tm.assert_frame_equal(result, expected)
result = json_normalize(state_data, 'counties')
expected = []
for rec in state_data:
expected.extend(rec['counties'])
expected = DataFrame(expected)
tm.assert_frame_equal(result, expected)
result = json_normalize(state_data, 'counties', meta='state')
expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
tm.assert_frame_equal(result, expected)
示例3
def test_simple_normalize_with_separator(self, deep_nested):
# GH 14883
result = json_normalize({'A': {'A': 1, 'B': 2}})
expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
tm.assert_frame_equal(result.reindex_like(expected), expected)
result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
tm.assert_frame_equal(result.reindex_like(expected), expected)
result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
tm.assert_frame_equal(result.reindex_like(expected), expected)
result = json_normalize(deep_nested, ['states', 'cities'],
meta=['country', ['states', 'name']],
sep='_')
expected = Index(['name', 'pop',
'country', 'states_name']).sort_values()
assert result.columns.sort_values().equals(expected)
示例4
def test_more_deeply_nested(self, deep_nested):
result = json_normalize(deep_nested, ['states', 'cities'],
meta=['country', ['states', 'name']])
# meta_prefix={'states': 'state_'})
ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
'states.name': ['California', 'California', 'Ohio', 'Ohio',
'Bayern', 'Nordrhein-Westfalen',
'Nordrhein-Westfalen'],
'name': ['San Francisco', 'Los Angeles', 'Columbus',
'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'],
'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]}
expected = DataFrame(ex_data, columns=result.columns)
tm.assert_frame_equal(result, expected)
示例5
def test_meta_name_conflict(self):
data = [{'foo': 'hello',
'bar': 'there',
'data': [{'foo': 'something', 'bar': 'else'},
{'foo': 'something2', 'bar': 'else2'}]}]
msg = (r"Conflicting metadata name (foo|bar),"
" need distinguishing prefix")
with pytest.raises(ValueError, match=msg):
json_normalize(data, 'data', meta=['foo', 'bar'])
result = json_normalize(data, 'data', meta=['foo', 'bar'],
meta_prefix='meta')
for val in ['metafoo', 'metabar', 'foo', 'bar']:
assert val in result
示例6
def test_record_prefix(self, state_data):
result = json_normalize(state_data[0], 'counties')
expected = DataFrame(state_data[0]['counties'])
tm.assert_frame_equal(result, expected)
result = json_normalize(state_data, 'counties',
meta='state',
record_prefix='county_')
expected = []
for rec in state_data:
expected.extend(rec['counties'])
expected = DataFrame(expected)
expected = expected.rename(columns=lambda x: 'county_' + x)
expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
tm.assert_frame_equal(result, expected)
示例7
def test_missing_field(self, author_missing_data):
# GH20030:
result = json_normalize(author_missing_data)
ex_data = [
{'info': np.nan,
'author_name.first': np.nan,
'author_name.last_name': np.nan,
'info.created_at': np.nan,
'info.last_updated': np.nan},
{'info': None,
'author_name.first': 'Jane',
'author_name.last_name': 'Doe',
'info.created_at': '11/08/1993',
'info.last_updated': '26/05/2012'}
]
expected = DataFrame(ex_data)
tm.assert_frame_equal(result, expected)
示例8
def test_simple_normalize(self, state_data):
result = json_normalize(state_data[0], 'counties')
expected = DataFrame(state_data[0]['counties'])
tm.assert_frame_equal(result, expected)
result = json_normalize(state_data, 'counties')
expected = []
for rec in state_data:
expected.extend(rec['counties'])
expected = DataFrame(expected)
tm.assert_frame_equal(result, expected)
result = json_normalize(state_data, 'counties', meta='state')
expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
tm.assert_frame_equal(result, expected)
示例9
def test_simple_normalize_with_separator(self, deep_nested):
# GH 14883
result = json_normalize({'A': {'A': 1, 'B': 2}})
expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
tm.assert_frame_equal(result.reindex_like(expected), expected)
result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
tm.assert_frame_equal(result.reindex_like(expected), expected)
result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
tm.assert_frame_equal(result.reindex_like(expected), expected)
result = json_normalize(deep_nested, ['states', 'cities'],
meta=['country', ['states', 'name']],
sep='_')
expected = Index(['name', 'pop',
'country', 'states_name']).sort_values()
assert result.columns.sort_values().equals(expected)
示例10
def test_record_prefix(self, state_data):
result = json_normalize(state_data[0], 'counties')
expected = DataFrame(state_data[0]['counties'])
tm.assert_frame_equal(result, expected)
result = json_normalize(state_data, 'counties',
meta='state',
record_prefix='county_')
expected = []
for rec in state_data:
expected.extend(rec['counties'])
expected = DataFrame(expected)
expected = expected.rename(columns=lambda x: 'county_' + x)
expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
tm.assert_frame_equal(result, expected)
示例11
def test_non_ascii_key(self):
if compat.PY3:
testjson = (
b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' +
b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
).decode('utf8')
else:
testjson = ('[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]')
testdata = {
u'sub.A': [1, 3],
u'sub.B': [2, 4],
b"\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1]
}
expected = DataFrame(testdata)
result = json_normalize(json.loads(testjson))
tm.assert_frame_equal(result, expected)
示例12
def test_simple_normalize(self):
result = json_normalize(self.state_data[0], 'counties')
expected = DataFrame(self.state_data[0]['counties'])
tm.assert_frame_equal(result, expected)
result = json_normalize(self.state_data, 'counties')
expected = []
for rec in self.state_data:
expected.extend(rec['counties'])
expected = DataFrame(expected)
tm.assert_frame_equal(result, expected)
result = json_normalize(self.state_data, 'counties', meta='state')
expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
tm.assert_frame_equal(result, expected)
示例13
def df_from_file(path):
data = load_json(path)
# meta = [["experiment_setup", "task"],
# ["experiment_setup", "subcategory"],
# ["experiment_setup", "method"],
# ["experiment_setup", "embeddings"]]
dframe = json_normalize(data)
if "details" in dframe:
dframe.drop("details", axis="columns", inplace=True)
default_measurement = "accuracy"
try:
default_measurement = dframe["experiment_setup.default_measurement"].unique()[0]
except KeyError:
logger.warning(f"default_measurement not specified in {path}")
dframe["result"] = dframe["result." + default_measurement]
# df["reciprocal_rank"] = 1 / (df["rank"] + 1)
return dframe
示例14
def test_simple_normalize(self, state_data):
result = json_normalize(state_data[0], 'counties')
expected = DataFrame(state_data[0]['counties'])
tm.assert_frame_equal(result, expected)
result = json_normalize(state_data, 'counties')
expected = []
for rec in state_data:
expected.extend(rec['counties'])
expected = DataFrame(expected)
tm.assert_frame_equal(result, expected)
result = json_normalize(state_data, 'counties', meta='state')
expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
tm.assert_frame_equal(result, expected)
示例15
def test_simple_normalize_with_separator(self, deep_nested):
# GH 14883
result = json_normalize({'A': {'A': 1, 'B': 2}})
expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
tm.assert_frame_equal(result.reindex_like(expected), expected)
result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
tm.assert_frame_equal(result.reindex_like(expected), expected)
result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
tm.assert_frame_equal(result.reindex_like(expected), expected)
result = json_normalize(deep_nested, ['states', 'cities'],
meta=['country', ['states', 'name']],
sep='_')
expected = Index(['name', 'pop',
'country', 'states_name']).sort_values()
assert result.columns.sort_values().equals(expected)
示例16
def test_more_deeply_nested(self, deep_nested):
result = json_normalize(deep_nested, ['states', 'cities'],
meta=['country', ['states', 'name']])
# meta_prefix={'states': 'state_'})
ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
'states.name': ['California', 'California', 'Ohio', 'Ohio',
'Bayern', 'Nordrhein-Westfalen',
'Nordrhein-Westfalen'],
'name': ['San Francisco', 'Los Angeles', 'Columbus',
'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'],
'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]}
expected = DataFrame(ex_data, columns=result.columns)
tm.assert_frame_equal(result, expected)
示例17
def test_meta_name_conflict(self):
data = [{'foo': 'hello',
'bar': 'there',
'data': [{'foo': 'something', 'bar': 'else'},
{'foo': 'something2', 'bar': 'else2'}]}]
msg = (r"Conflicting metadata name (foo|bar),"
" need distinguishing prefix")
with pytest.raises(ValueError, match=msg):
json_normalize(data, 'data', meta=['foo', 'bar'])
result = json_normalize(data, 'data', meta=['foo', 'bar'],
meta_prefix='meta')
for val in ['metafoo', 'metabar', 'foo', 'bar']:
assert val in result
示例18
def test_record_prefix(self, state_data):
result = json_normalize(state_data[0], 'counties')
expected = DataFrame(state_data[0]['counties'])
tm.assert_frame_equal(result, expected)
result = json_normalize(state_data, 'counties',
meta='state',
record_prefix='county_')
expected = []
for rec in state_data:
expected.extend(rec['counties'])
expected = DataFrame(expected)
expected = expected.rename(columns=lambda x: 'county_' + x)
expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
tm.assert_frame_equal(result, expected)
示例19
def test_missing_field(self, author_missing_data):
# GH20030:
result = json_normalize(author_missing_data)
ex_data = [
{'info': np.nan,
'author_name.first': np.nan,
'author_name.last_name': np.nan,
'info.created_at': np.nan,
'info.last_updated': np.nan},
{'info': None,
'author_name.first': 'Jane',
'author_name.last_name': 'Doe',
'info.created_at': '11/08/1993',
'info.last_updated': '26/05/2012'}
]
expected = DataFrame(ex_data)
tm.assert_frame_equal(result, expected)
示例20
def query_dataframe(url_suffix, query_params):
'''
API接口,返回字典结构
key value type
code 返回值 int
data 返回结果 dataframe
msg 返回消息 string
'''
return_value = {'code': -1, 'data': None, 'msg': ''}
rlt = query_json(url_suffix, query_params)
if rlt is None:
return_value['msg'] = 'query failed.'
else:
if 'code' in rlt.keys():
return_value['code'] = rlt['code']
if 'msg' in rlt.keys():
return_value['msg'] = rlt['msg']
if 'data' in rlt.keys():
return_value['data'] = json_normalize(rlt['data'])
return return_value
示例21
def get_flickr30k_data(cfg):
#using the provided splits
train_split = set(map(lambda x: x.split(".")[0], open(cfg.train_file).read().splitlines()))
val_split = set(map(lambda x: x.split(".")[0], open(cfg.val_file).read().splitlines()))
test_split = set(map(lambda x: x.split(".")[0], open(cfg.test_file).read().splitlines()))
data = [{"video_id": item.split(".")[0], "sentence_id": item.split("#")[1].split("\t")[0], "caption":item.split("\t")[1]}
for item in open(cfg.annotations_path).read().splitlines()]
sentences = json_normalize(data)
sentences['video_path'] = sentences['video_id'].map(lambda x: os.path.join(cfg.path_to_descriptors, x + cfg.descriptor_suffix + ".npy"))
train_imgs = sentences.loc[sentences["video_id"].isin(train_split)]
train_imgs.reset_index()
val_imgs = sentences.loc[sentences["video_id"].isin(val_split)]
val_imgs.reset_index()
test_imgs = sentences.loc[sentences["video_id"].isin(test_split)]
test_imgs.reset_index()
return train_imgs, val_imgs, test_imgs
示例22
def get_msr_vtt_data(cfg):
#trainval data
with open(cfg.trainval_annotations) as data_file:
data = json.load(data_file)
sentences = json_normalize(data['sentences'])
videos = json_normalize(data['videos'])
train_vids = sentences.loc[sentences["video_id"].isin(videos[videos['split'] == "train"]["video_id"])]
val_vids = sentences.loc[sentences["video_id"].isin(videos[videos['split'] == "validate"]["video_id"])]
train_vids['video_path'] = train_vids['video_id'].map(lambda x: os.path.join(cfg.path_to_trainval_descriptors, x + "_incp_v3.npy"))
val_vids['video_path'] = val_vids['video_id'].map(lambda x: os.path.join(cfg.path_to_trainval_descriptors, x + "_incp_v3.npy"))
#test data
with open(cfg.test_annotations) as data_file:
data = json.load(data_file)
sentences = json_normalize(data['sentences'])
videos = json_normalize(data['videos'])
test_vids = sentences.loc[sentences["video_id"].isin(videos[videos['split'] == "test"]["video_id"])]
test_vids['video_path'] = test_vids['video_id'].map(lambda x: os.path.join(cfg.path_to_test_descriptors, x + "_incp_v3.npy"))
return train_vids, val_vids, test_vids
示例23
def format_df(response, job):
if job['type'] == 'trades':
if job['exchange'] == 'btce':
for col in response:
response = response[col]
df = json_normalize(response)
if job['exchange'] == 'coinbase':
df['time'] = to_datetime(df['time'], utc=0)
df['timestamp'] = df['time'].astype(np.int64) // 10**9
else:
df = json_normalize(response)
df = standard_columns(df)
if 'exchange' not in df:
df['exchange'] = job['exchange']
if 'symbol' not in df:
df['symbol'] = job['symbol']
return df
#|Standardize column names and drop columns not in dictionary below
示例24
def test_simple_normalize(self, state_data):
result = json_normalize(state_data[0], 'counties')
expected = DataFrame(state_data[0]['counties'])
tm.assert_frame_equal(result, expected)
result = json_normalize(state_data, 'counties')
expected = []
for rec in state_data:
expected.extend(rec['counties'])
expected = DataFrame(expected)
tm.assert_frame_equal(result, expected)
result = json_normalize(state_data, 'counties', meta='state')
expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
tm.assert_frame_equal(result, expected)
示例25
def test_simple_normalize_with_separator(self, deep_nested):
# GH 14883
result = json_normalize({'A': {'A': 1, 'B': 2}})
expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
tm.assert_frame_equal(result.reindex_like(expected), expected)
result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
tm.assert_frame_equal(result.reindex_like(expected), expected)
result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
tm.assert_frame_equal(result.reindex_like(expected), expected)
result = json_normalize(deep_nested, ['states', 'cities'],
meta=['country', ['states', 'name']],
sep='_')
expected = Index(['name', 'pop',
'country', 'states_name']).sort_values()
assert result.columns.sort_values().equals(expected)
示例26
def test_more_deeply_nested(self, deep_nested):
result = json_normalize(deep_nested, ['states', 'cities'],
meta=['country', ['states', 'name']])
# meta_prefix={'states': 'state_'})
ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
'states.name': ['California', 'California', 'Ohio', 'Ohio',
'Bayern', 'Nordrhein-Westfalen',
'Nordrhein-Westfalen'],
'name': ['San Francisco', 'Los Angeles', 'Columbus',
'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'],
'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]}
expected = DataFrame(ex_data, columns=result.columns)
tm.assert_frame_equal(result, expected)
示例27
def test_record_prefix(self, state_data):
result = json_normalize(state_data[0], 'counties')
expected = DataFrame(state_data[0]['counties'])
tm.assert_frame_equal(result, expected)
result = json_normalize(state_data, 'counties',
meta='state',
record_prefix='county_')
expected = []
for rec in state_data:
expected.extend(rec['counties'])
expected = DataFrame(expected)
expected = expected.rename(columns=lambda x: 'county_' + x)
expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
tm.assert_frame_equal(result, expected)
示例28
def test_non_ascii_key(self):
if compat.PY3:
testjson = (
b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' +
b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
).decode('utf8')
else:
testjson = ('[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]')
testdata = {
u'sub.A': [1, 3],
u'sub.B': [2, 4],
b"\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1]
}
expected = DataFrame(testdata)
result = json_normalize(json.loads(testjson))
tm.assert_frame_equal(result, expected)
示例29
def to_dataframe(self, normalize=False):
"""Transforms the data into a pandas DataFrame
:param normalize: Whether or not to normalize any nested objects in the results into distinct columns.
:type normalize: bool
:rtype: pandas.DataFrame
"""
try:
import pandas
except ImportError:
raise ImportError("The 'pandas' package could not be imported")
if normalize:
from pandas.io.json import json_normalize
return json_normalize(self.data)
return pandas.DataFrame.from_dict(self.data)
示例30
def test_simple_records(self):
recs = [{'a': 1, 'b': 2, 'c': 3},
{'a': 4, 'b': 5, 'c': 6},
{'a': 7, 'b': 8, 'c': 9},
{'a': 10, 'b': 11, 'c': 12}]
result = json_normalize(recs)
expected = DataFrame(recs)
tm.assert_frame_equal(result, expected)