Python源码示例:pandas.io.json.json_normalize()

示例1
def main(opt):
    scorer = COCOScorer()
    gt_dataframe = json_normalize(
        json.load(open(opt["videoinfo_json"]))['sentences'])
    gts = convert_data_to_coco_scorer_format(gt_dataframe)
    samples = {}
    video_ids = open(opt['video_ids'])
    sents = open(opt['pred'])
    for video_id in video_ids:
        # strip file extensions
        video_id = video_id.split('.')[0]
        sent = sents.readline().strip()
        samples[video_id] = [{'image_id': video_id, 'caption': sent}]
    video_ids.close()
    sents.close()
    with suppress_stdout_stderr():
        valid_score = scorer.score(gts, samples, samples.keys())
    print(valid_score) 
示例2
def test_simple_normalize(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties', meta='state')
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
示例3
def test_simple_normalize_with_separator(self, deep_nested):
        # GH 14883
        result = json_normalize({'A': {'A': 1, 'B': 2}})
        expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
        expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
        expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']],
                                sep='_')
        expected = Index(['name', 'pop',
                          'country', 'states_name']).sort_values()
        assert result.columns.sort_values().equals(expected) 
示例4
def test_more_deeply_nested(self, deep_nested):

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']])
        # meta_prefix={'states': 'state_'})

        ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
                   'states.name': ['California', 'California', 'Ohio', 'Ohio',
                                   'Bayern', 'Nordrhein-Westfalen',
                                   'Nordrhein-Westfalen'],
                   'name': ['San Francisco', 'Los Angeles', 'Columbus',
                            'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'],
                   'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]}

        expected = DataFrame(ex_data, columns=result.columns)
        tm.assert_frame_equal(result, expected) 
示例5
def test_meta_name_conflict(self):
        data = [{'foo': 'hello',
                 'bar': 'there',
                 'data': [{'foo': 'something', 'bar': 'else'},
                          {'foo': 'something2', 'bar': 'else2'}]}]

        msg = (r"Conflicting metadata name (foo|bar),"
               " need distinguishing prefix")
        with pytest.raises(ValueError, match=msg):
            json_normalize(data, 'data', meta=['foo', 'bar'])

        result = json_normalize(data, 'data', meta=['foo', 'bar'],
                                meta_prefix='meta')

        for val in ['metafoo', 'metabar', 'foo', 'bar']:
            assert val in result 
示例6
def test_record_prefix(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties',
                                meta='state',
                                record_prefix='county_')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)
        expected = expected.rename(columns=lambda x: 'county_' + x)
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
示例7
def test_missing_field(self, author_missing_data):
        # GH20030:
        result = json_normalize(author_missing_data)
        ex_data = [
            {'info': np.nan,
             'author_name.first': np.nan,
             'author_name.last_name': np.nan,
             'info.created_at': np.nan,
             'info.last_updated': np.nan},
            {'info': None,
             'author_name.first': 'Jane',
             'author_name.last_name': 'Doe',
             'info.created_at': '11/08/1993',
             'info.last_updated': '26/05/2012'}
        ]
        expected = DataFrame(ex_data)
        tm.assert_frame_equal(result, expected) 
示例8
def test_simple_normalize(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties', meta='state')
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
示例9
def test_simple_normalize_with_separator(self, deep_nested):
        # GH 14883
        result = json_normalize({'A': {'A': 1, 'B': 2}})
        expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
        expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
        expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']],
                                sep='_')
        expected = Index(['name', 'pop',
                          'country', 'states_name']).sort_values()
        assert result.columns.sort_values().equals(expected) 
示例10
def test_record_prefix(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties',
                                meta='state',
                                record_prefix='county_')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)
        expected = expected.rename(columns=lambda x: 'county_' + x)
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
示例11
def test_non_ascii_key(self):
        if compat.PY3:
            testjson = (
                b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' +
                b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
            ).decode('utf8')
        else:
            testjson = ('[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
                        '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]')

        testdata = {
            u'sub.A': [1, 3],
            u'sub.B': [2, 4],
            b"\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1]
        }
        expected = DataFrame(testdata)

        result = json_normalize(json.loads(testjson))
        tm.assert_frame_equal(result, expected) 
示例12
def test_simple_normalize(self):
        result = json_normalize(self.state_data[0], 'counties')
        expected = DataFrame(self.state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(self.state_data, 'counties')

        expected = []
        for rec in self.state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(self.state_data, 'counties', meta='state')
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
示例13
def df_from_file(path):
    data = load_json(path)
    # meta = [["experiment_setup", "task"],
    #         ["experiment_setup", "subcategory"],
    #         ["experiment_setup", "method"],
    #         ["experiment_setup", "embeddings"]]
    dframe = json_normalize(data)
    if "details" in dframe:
        dframe.drop("details", axis="columns", inplace=True)
    default_measurement = "accuracy"
    try:
        default_measurement = dframe["experiment_setup.default_measurement"].unique()[0]
    except KeyError:
        logger.warning(f"default_measurement not specified in {path}")
    dframe["result"] = dframe["result." + default_measurement]
    # df["reciprocal_rank"] = 1 / (df["rank"] + 1)
    return dframe 
示例14
def test_simple_normalize(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties', meta='state')
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
示例15
def test_simple_normalize_with_separator(self, deep_nested):
        # GH 14883
        result = json_normalize({'A': {'A': 1, 'B': 2}})
        expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
        expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
        expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']],
                                sep='_')
        expected = Index(['name', 'pop',
                          'country', 'states_name']).sort_values()
        assert result.columns.sort_values().equals(expected) 
示例16
def test_more_deeply_nested(self, deep_nested):

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']])
        # meta_prefix={'states': 'state_'})

        ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
                   'states.name': ['California', 'California', 'Ohio', 'Ohio',
                                   'Bayern', 'Nordrhein-Westfalen',
                                   'Nordrhein-Westfalen'],
                   'name': ['San Francisco', 'Los Angeles', 'Columbus',
                            'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'],
                   'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]}

        expected = DataFrame(ex_data, columns=result.columns)
        tm.assert_frame_equal(result, expected) 
示例17
def test_meta_name_conflict(self):
        data = [{'foo': 'hello',
                 'bar': 'there',
                 'data': [{'foo': 'something', 'bar': 'else'},
                          {'foo': 'something2', 'bar': 'else2'}]}]

        msg = (r"Conflicting metadata name (foo|bar),"
               " need distinguishing prefix")
        with pytest.raises(ValueError, match=msg):
            json_normalize(data, 'data', meta=['foo', 'bar'])

        result = json_normalize(data, 'data', meta=['foo', 'bar'],
                                meta_prefix='meta')

        for val in ['metafoo', 'metabar', 'foo', 'bar']:
            assert val in result 
示例18
def test_record_prefix(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties',
                                meta='state',
                                record_prefix='county_')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)
        expected = expected.rename(columns=lambda x: 'county_' + x)
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
示例19
def test_missing_field(self, author_missing_data):
        # GH20030:
        result = json_normalize(author_missing_data)
        ex_data = [
            {'info': np.nan,
             'author_name.first': np.nan,
             'author_name.last_name': np.nan,
             'info.created_at': np.nan,
             'info.last_updated': np.nan},
            {'info': None,
             'author_name.first': 'Jane',
             'author_name.last_name': 'Doe',
             'info.created_at': '11/08/1993',
             'info.last_updated': '26/05/2012'}
        ]
        expected = DataFrame(ex_data)
        tm.assert_frame_equal(result, expected) 
示例20
def query_dataframe(url_suffix, query_params):
    '''
    API接口,返回字典结构
    key     value       type
    code    返回值       int
    data    返回结果     dataframe
    msg     返回消息     string
    '''
    return_value = {'code': -1, 'data': None, 'msg': ''}
    rlt = query_json(url_suffix, query_params)
    if rlt is None:
        return_value['msg'] = 'query failed.'
    else:
        if 'code' in rlt.keys():
            return_value['code'] = rlt['code']
        if 'msg' in rlt.keys():
            return_value['msg'] = rlt['msg']
        if 'data' in rlt.keys():
            return_value['data'] = json_normalize(rlt['data'])
    return return_value 
示例21
def get_flickr30k_data(cfg):
    #using the provided splits
    train_split = set(map(lambda x: x.split(".")[0], open(cfg.train_file).read().splitlines()))
    val_split = set(map(lambda x: x.split(".")[0], open(cfg.val_file).read().splitlines()))
    test_split = set(map(lambda x: x.split(".")[0], open(cfg.test_file).read().splitlines()))
    
    data = [{"video_id": item.split(".")[0], "sentence_id": item.split("#")[1].split("\t")[0], "caption":item.split("\t")[1]}
            for item in open(cfg.annotations_path).read().splitlines()]
    
    sentences = json_normalize(data)
    sentences['video_path'] = sentences['video_id'].map(lambda x: os.path.join(cfg.path_to_descriptors, x + cfg.descriptor_suffix + ".npy"))
    
    train_imgs = sentences.loc[sentences["video_id"].isin(train_split)]
    train_imgs.reset_index()
    
    val_imgs = sentences.loc[sentences["video_id"].isin(val_split)]
    val_imgs.reset_index()
    
    test_imgs = sentences.loc[sentences["video_id"].isin(test_split)]
    test_imgs.reset_index()
    
    return train_imgs, val_imgs, test_imgs 
示例22
def get_msr_vtt_data(cfg):
    #trainval data
    with open(cfg.trainval_annotations) as data_file:    
        data = json.load(data_file)
    
    sentences = json_normalize(data['sentences'])
    videos = json_normalize(data['videos'])
    train_vids = sentences.loc[sentences["video_id"].isin(videos[videos['split'] == "train"]["video_id"])]
    val_vids = sentences.loc[sentences["video_id"].isin(videos[videos['split'] == "validate"]["video_id"])]
    train_vids['video_path'] = train_vids['video_id'].map(lambda x: os.path.join(cfg.path_to_trainval_descriptors, x + "_incp_v3.npy"))  
    val_vids['video_path'] = val_vids['video_id'].map(lambda x: os.path.join(cfg.path_to_trainval_descriptors, x + "_incp_v3.npy"))
    
    #test data
    with open(cfg.test_annotations) as data_file:    
        data = json.load(data_file)
    sentences = json_normalize(data['sentences'])
    videos = json_normalize(data['videos'])
    test_vids = sentences.loc[sentences["video_id"].isin(videos[videos['split'] == "test"]["video_id"])]
    test_vids['video_path'] = test_vids['video_id'].map(lambda x: os.path.join(cfg.path_to_test_descriptors, x + "_incp_v3.npy"))
    
    return train_vids, val_vids, test_vids 
示例23
def format_df(response, job):

    if job['type'] == 'trades':
        if job['exchange'] == 'btce':
            for col in response:
                response = response[col]
        df = json_normalize(response)
        if job['exchange'] == 'coinbase':
            df['time'] = to_datetime(df['time'], utc=0)
            df['timestamp'] = df['time'].astype(np.int64) // 10**9
    else:
        df = json_normalize(response)

    df = standard_columns(df)

    if 'exchange' not in df:
        df['exchange'] = job['exchange']
    if 'symbol' not in df:
        df['symbol'] = job['symbol']
    return df

#|Standardize column names and drop columns not in dictionary below 
示例24
def test_simple_normalize(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)

        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties', meta='state')
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
示例25
def test_simple_normalize_with_separator(self, deep_nested):
        # GH 14883
        result = json_normalize({'A': {'A': 1, 'B': 2}})
        expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
        expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
        expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']],
                                sep='_')
        expected = Index(['name', 'pop',
                          'country', 'states_name']).sort_values()
        assert result.columns.sort_values().equals(expected) 
示例26
def test_more_deeply_nested(self, deep_nested):

        result = json_normalize(deep_nested, ['states', 'cities'],
                                meta=['country', ['states', 'name']])
        # meta_prefix={'states': 'state_'})

        ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
                   'states.name': ['California', 'California', 'Ohio', 'Ohio',
                                   'Bayern', 'Nordrhein-Westfalen',
                                   'Nordrhein-Westfalen'],
                   'name': ['San Francisco', 'Los Angeles', 'Columbus',
                            'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'],
                   'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]}

        expected = DataFrame(ex_data, columns=result.columns)
        tm.assert_frame_equal(result, expected) 
示例27
def test_record_prefix(self, state_data):
        result = json_normalize(state_data[0], 'counties')
        expected = DataFrame(state_data[0]['counties'])
        tm.assert_frame_equal(result, expected)

        result = json_normalize(state_data, 'counties',
                                meta='state',
                                record_prefix='county_')

        expected = []
        for rec in state_data:
            expected.extend(rec['counties'])
        expected = DataFrame(expected)
        expected = expected.rename(columns=lambda x: 'county_' + x)
        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])

        tm.assert_frame_equal(result, expected) 
示例28
def test_non_ascii_key(self):
        if compat.PY3:
            testjson = (
                b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' +
                b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
            ).decode('utf8')
        else:
            testjson = ('[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
                        '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]')

        testdata = {
            u'sub.A': [1, 3],
            u'sub.B': [2, 4],
            b"\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1]
        }
        expected = DataFrame(testdata)

        result = json_normalize(json.loads(testjson))
        tm.assert_frame_equal(result, expected) 
示例29
def to_dataframe(self, normalize=False):
        """Transforms the data into a pandas DataFrame

        :param normalize: Whether or not to normalize any nested objects in the results into distinct columns.
        :type normalize: bool
        :rtype: pandas.DataFrame
        """
        try:
            import pandas
        except ImportError:
            raise ImportError("The 'pandas' package could not be imported")
        if normalize:
            from pandas.io.json import json_normalize

            return json_normalize(self.data)
        return pandas.DataFrame.from_dict(self.data) 
示例30
def test_simple_records(self):
        recs = [{'a': 1, 'b': 2, 'c': 3},
                {'a': 4, 'b': 5, 'c': 6},
                {'a': 7, 'b': 8, 'c': 9},
                {'a': 10, 'b': 11, 'c': 12}]

        result = json_normalize(recs)
        expected = DataFrame(recs)

        tm.assert_frame_equal(result, expected)