Python源码示例:dataset.connect()

示例1
def add_shared_args(parser):
    parser.add_argument("query")
    parser.add_argument("--db",
        type=dataset.connect,
        help="SQLAlchemy connection string. Default: " + DEFAULT_DB,
        default=DEFAULT_DB)
    parser.add_argument("--throttle",
        type=int,
        default=15,
        help="""Wait X seconds between requests.
        Default: 15 (to stay under rate limits)""")
    parser.add_argument("--credentials",
        nargs=4,
        help="""
            Four space-separated strings for {}.
            Defaults to environment variables by those names.
        """.format(", ".join(CREDENTIAL_NAMES))
    )
    parser.add_argument("--store-raw",
        help="Store raw tweet JSON, in addition to excerpted fields.",
        action="store_true")
    parser.add_argument("--quiet",
        help="Silence logging.",
        action="store_true") 
示例2
def create_app_info_dict():
    dlist = []
    conn = dataset.connect(config.APP_INFO_SQLITE_FILE)
    print("Creating app-info dict")
    for k, v in config.source_files.items():
        d = pd.read_csv(v, index_col='appId')

        d['store'] = k

        if 'permissions' not in d.columns:
            print(k, v, d.columns)
            d.assign(permissions=["<not recorded>"]*len(d))
        d.columns = d.columns.str.lower().str.replace(' ', '-').str.replace('-', '_')
        dlist.append(d)
    pd.concat(dlist).to_sql('apps', conn.engine, if_exists='replace')
    conn.engine.execute('create index idx_appId on apps(appId)') 
示例3
def setup_db(connection_string):
    db = dataset.connect(connection_string)

    connections = db['connection']
    users = db['user']
    tweets = db['tweet']
    medias = db['media']
    mentions = db['mention']
    urls = db['url']
    hashtags = db['hashtag']

    tweets.create_index(['tweet_id'])
    medias.create_index(['tweet_id'])
    mentions.create_index(['user_id'])
    mentions.create_index(['mentioned_user_id'])
    urls.create_index(['tweet_id'])
    hashtags.create_index(['tweet_id'])
    users.create_index(['user_id'])
    connections.create_index(['friend_id'])
    connections.create_index(['follower_id'])

    return db 
示例4
def setup_db(connection_string):
    db = dataset.connect(connection_string)

    pages = db['page']
    users = db['user']
    posts = db['post']
    comments = db['comment']
    interactions = db['interaction']

    users.create_index(['user_id'])
    posts.create_index(['post_id'])
    comments.create_index(['comment_id'])
    comments.create_index(['post_id'])
    interactions.create_index(['comment_id'])
    interactions.create_index(['post_id'])
    interactions.create_index(['user_id'])

    return db 
示例5
def start(self):
        """
        Initialize db and session monitor thread

        """
        global db
        log.info(":SYS:Starting W.I.L.L")
        db_url = self.configuration_data["db_url"]
        log.info(":SYS:Connecting to database")
        db = dataset.connect(db_url, engine_kwargs={"pool_recycle": 1})
        core.db = db
        API.db = db
        web.db = db
        start_time = self.now.strftime("%I:%M %p %A %m/%d/%Y")
        web.start_time = start_time
        log.info(":SYS:Starting W.I.L.L core")
        core.initialize(db)
        log.info(":SYS:Starting sessions parsing thread")
        core.sessions_monitor(db)
        log.info(":SYS:W.I.L.L started") 
示例6
def __init__(self):
        """"""
        self.exchange = ccxt.poloniex()
        self.exchange.load_markets()
        self.delay_seconds = self.exchange.rateLimit / 1000
        self.symbols = self.exchange.markets
        self.timeframe = '1d'
        self.db_url = 'sqlite:///databases/market_prices.db'
        self.deques = dict()
        self.ohlcv = dict()
        self.database = dataset.connect(self.db_url)
        ensure_dir('databases')
        for symbol in self.symbols:
            if self.exchange.has['fetchOHLCV']:
                print('Obtaining OHLCV data')
                data = self.exchange.fetch_ohlcv(symbol, self.timeframe)
                data = list(zip(*data))
                data[0] = [datetime.datetime.fromtimestamp(ms / 1000)
                           for ms in data[0]]
                self.ohlcv[symbol] = data
                time.sleep(self.delay_seconds)
            else:
                print('No OHLCV data available')
            self.deques[symbol] = deque()
            if len(self.database[symbol]):
                for e in self.database[symbol]:
                    entry = (e['bid'], e['ask'], e['spread'], e['time'])
                    self.deques[symbol].append(entry)
        del self.database
        self.thread = threading.Thread(target=self.__update)
        self.thread.daemon = True
        self.thread.start() 
示例7
def __update(self):
        """
        https://github.com/ccxt-dev/ccxt/wiki/Manual#market-price
        """
        self.database = dataset.connect(self.db_url)
        while True:
            for symbol in self.symbols:

                start_time = time.clock()
                orders = self.exchange.fetch_order_book(symbol)
                bid = orders['bids'][0][0] if len(orders['bids']) > 0 else None
                ask = orders['asks'][0][0] if len(orders['asks']) > 0 else None
                spread = (ask - bid) if (bid and ask) else None
                dtime = datetime.datetime.now()
                self.deques[symbol].append((bid, ask, spread, dtime))
                self.database.begin()
                try:
                    self.database[symbol].insert({
                        'ask': ask,
                        'bid': bid,
                        'spread': spread,
                        'time': dtime
                    })
                    self.database.commit()
                except:
                    self.database.rollback()

                time.sleep(self.delay_seconds - (time.clock() - start_time)) 
示例8
def start(cls):
        cls.db = dataset.connect(cls.database_url)
        super().start() 
示例9
def conn():
    warehouse = dataset.connect(config.WAREHOUSE_URL)
    for table in warehouse.tables:
        warehouse[table].delete()
    return {'warehouse': warehouse} 
示例10
def open_spider(self, spider):
        if spider.conf and spider.conn:
            self.__conf = spider.conf
            self.__conn = spider.conn
        else:
            # For runs trigered by scrapy CLI utility
            self.__conf = helpers.get_variables(config, str.isupper)
            self.__conn = {'warehouse': dataset.connect(config.WAREHOUSE_URL)} 
示例11
def cli(argv):
    # Prepare conf dict
    conf = helpers.get_variables(config, str.isupper)

    # Prepare conn dict
    conn = {
        'warehouse': dataset.connect(config.WAREHOUSE_URL),
    }

    # Get and call collector
    collect = importlib.import_module('collectors.%s' % argv[1]).collect
    collect(conf, conn, *argv[2:]) 
示例12
def run(consumer_key, consumer_secret, access_key, access_secret,
        connection_string, threshold=5000, seed_only=True):

    db = dataset.connect(connection_string)
    api = get_api(consumer_key, consumer_secret, access_key, access_secret)

    if seed_only:
        is_seed = 1
    else:
        is_seed = 0

    user_table = db['user']
    users = user_table.find(user_table.table.columns.friends_count < threshold,
                            friends_collected=0, is_seed=is_seed)
    users = [u for u in users]
    all_users = len(users)
    remaining = all_users

    for u in users:
        try:
            print('Getting friend ids for ' + u['screen_name'])
            next, prev, friend_ids = get_friend_ids(
                api, screen_name=u['screen_name'])

            print('Adding ' + str(len(friend_ids)) + ' user ids to db')
            insert_if_missing(db, user_ids=friend_ids)

            print('Creating relationships for ' + str(u['user_id']))
            create_connections(db, u['user_id'], friend_ids=friend_ids)

            update_dict = dict(id=u['id'], friends_collected=1)
            user_table.update(update_dict, ['id'])

            # Can only make 15 calls in a 15 minute window to this endpoint
            remaining -= 1
            time_left = remaining / 60.0
            print(str(time_left) + ' hours to go')
            print('Sleeping for 1 minute, timestamp: ' + str(datetime.now()))
            time.sleep(60)
        except:
            continue 
示例13
def run(consumer_key, consumer_secret, access_key, access_secret,
        connection_string):

    db = dataset.connect(connection_string)
    api = get_api(consumer_key, consumer_secret, access_key, access_secret)

    user_table = db['user']
    users = user_table.find(user_table.table.columns.user_id != 0,
                            profile_collected=0)
    users = [u for u in users]

    if len(users) == 0:
        print('No users without profiles')
        return None

    ids_to_lookup = []
    for user in users:
        ids_to_lookup.append(user['user_id'])
        if len(ids_to_lookup) == 100:
            print('Getting profiles')
            profiles = get_profiles(api, user_ids=ids_to_lookup)
            print('Updating 100 profiles')
            upsert_profiles(db, profiles)
            ids_to_lookup = []
            print('Sleeping, timestamp: ' + str(datetime.now()))
            time.sleep(5)

    print('Getting profiles')
    profiles = get_profiles(api, user_ids=ids_to_lookup)
    print('Updating ' + str(len(ids_to_lookup)) + ' profiles')
    upsert_profiles(db, profiles)

    print('Finished getting profiles') 
示例14
def help(bot, update):
    '''Echo the help string'''
    bot.sendMessage(update.message.chat_id, help_str)

#def socket_io_thread(bot,session_id, chat_id ):
#    socketIO = SocketIO(SERVER_URL, 80)
#    log.info("In socket_io thread")
#    socketIO.on('connect', lambda: socketIO.emit("get_updates", session_id))
#    socketIO.on('update', lambda x: bot.sendMessage(chat_id, (x["value"])))
#    socketIO.on('disconnect', lambda x: bot.sendMessage(chat_id, "Update server has disconnected"))
#    socketIO.on('debug', lambda x: log.info("Got debug message {0} from socketIO".format(x["value"])))
#    socketIO.wait() 
示例15
def __connect_alarms(self):
        """ Connecting to a SQLite database table 'alarms'. """
        alarms_table = dataset.connect(self.db_file)['alarms']
        return alarms_table 
示例16
def __connect_settings(self):
        """ Connecting to a SQLite database table 'settings'. """
        settings_table = dataset.connect(self.db_file)['settings']
        return settings_table

    #
    # member functions to set settings
    # 
示例17
def __init__(self, url, result_table="results", complementary_table="complementary", space_table="space"):
        super(SQLiteConnection, self).__init__()
        if url.endswith("/"):
            raise RuntimeError("Empty database name {}".format(url))

        if url.endswith((" ", "\t")):
            raise RuntimeError("Database name ends with space {}".format(url))

        if not url.startswith("sqlite://"):
            raise RuntimeError("Missing 'sqlite:///' at the begin of url".format(url))

        if url == "sqlite://" or url == "sqlite:///:memory:":
            raise RuntimeError("Cannot use memory database as it exists only for the time of the connection")

        match = re.search("sqlite:///(.*)", url)
        if match is not None:
            db_path = match.group(1)
        else:
            raise RuntimeError("Cannot find sqlite db path in {}".format(url))

        self.url = url
        self.result_table_name = result_table
        self.complementary_table_name = complementary_table
        self.space_table_name = space_table

        self._lock = filelock.FileLock("{}.lock".format(db_path))
        self.hold_lock = False

        # with self.lock():
        #     db = dataset.connect(self.url)

        #     # Initialize a result table and ensure float for loss
        #     results = db[self.result_table_name]
        #     results.create_column("_loss", sqlalchemy.Float) 
示例18
def all_results(self):
        """Get a list of all entries of the result table. The order is
        undefined.
        """
        # Only way to ensure old db instances are closed is to force garbage collection
        # See dataset note : https://dataset.readthedocs.io/en/latest/api.html#notes
        gc.collect()
        db = dataset.connect(self.url)
        return list(db[self.result_table_name].all()) 
示例19
def find_results(self, filter):
        """Get a list of all results associated with *filter*. The order is
        undefined.
        """
        gc.collect()
        db = dataset.connect(self.url)
        return list(db[self.result_table_name].find(**filter)) 
示例20
def insert_result(self, document):
        """Insert a new *document* in the result table. The columns must not
        be defined nor all present. Any new column will be added to the
        database and any missing column will get value None.
        """
        gc.collect()
        db = dataset.connect(self.url)
        return db[self.result_table_name].insert(document) 
示例21
def update_result(self, filter, values):
        """Update or add *values* of given rows in the result table.

        Args:
            filter: An identifier of the rows to update.
            values: A mapping of values to update or add.
        """
        gc.collect()
        filter = filter.copy()
        keys = list(filter.keys())
        filter.update(values)
        db = dataset.connect(self.url)
        return db[self.result_table_name].update(filter, keys) 
示例22
def all_complementary(self):
        """Get all entries of the complementary information table as a list.
        The order is undefined.
        """
        gc.collect()
        db = dataset.connect(self.url)
        return list(db[self.complementary_table_name].all()) 
示例23
def insert_complementary(self, document):
        """Insert a new document (row) in the complementary information table.
        """
        gc.collect()
        db = dataset.connect(self.url)
        return db[self.complementary_table_name].insert(document) 
示例24
def find_complementary(self, filter):
        """Find a document (row) from the complementary information table.
        """
        gc.collect()
        db = dataset.connect(self.url)
        return db[self.complementary_table_name].find_one(**filter) 
示例25
def get_space(self):
        """Returns the space used for previous experiments.

        Raises:
            AssertionError: If there are more than one space in the database.
        """
        gc.collect()
        db = dataset.connect(self.url)
        entry_count = db[self.space_table_name].count()
        if entry_count == 0:
            return None

        assert entry_count == 1, "Space table unexpectedly contains more than one space."
        return pickle.loads(db[self.space_table_name].find_one()["space"]) 
示例26
def insert_space(self, space):
        """Insert a space in the database.

        Raises:
            AssertionError: If a space is already present in the database.
        """
        gc.collect()
        db = dataset.connect(self.url)
        assert db[self.space_table_name].count() == 0, ("Space table cannot contain more than one space, "
                                                        "clear table first.")
        return db[self.space_table_name].insert({"space": pickle.dumps(space)}) 
示例27
def load_datastore():
    if not hasattr(settings, '_datastore'):
        # do not pool connections for the datastore
        engine_kwargs = {'poolclass': NullPool}
        settings._datastore = dataset.connect(settings.DATASTORE_URI,
                                              engine_kwargs=engine_kwargs)
        # Use bigint to store integers by default
        settings._datastore.types.integer = settings._datastore.types.bigint
    return settings._datastore 
示例28
def __init__(self, connection_string): 
            db = dataset.connect(connection_string)
            self.rooms = db.get_table('rooms', primary_id='role_id', primary_type=db.types.bigint)
            self.settings = db.get_table('settings', primary_id='guild_id', primary_type=db.types.bigint)
            self.invites = db.get_table('invites') 
示例29
def connect_to_db(db_name=None, db_engine="sqlite"):
    """Connect to a database. Create the database if there isn't one yet.

    The database can be a SQLite DB (either a DB file or an in-memory DB), or a
    PostgreSQL DB. In order to connect to a PostgreSQL DB you have first to
    create a database, create a user, and finally grant him all necessary
    privileges on that database and tables.
    'postgresql://<username>:<password>@localhost:<PostgreSQL port>/<db name>'
    Note: at the moment it looks it's not possible to close a connection
    manually (e.g. like calling conn.close() in sqlite3).


    Parameters
    ----------
    db_name : str or None
        database name (without file extension .db)
    db_engine : str
        database engine ('sqlite' or 'postgres')

    Returns
    -------
    dataset.persistence.database.Database
        connection to a database
    """
    engines = {"sqlite", "postgres"}
    if db_name is None:
        db_string = "sqlite:///:memory:"
        print("New connection to in-memory SQLite DB...")
    else:
        if db_engine == "sqlite":
            db_string = "sqlite:///{}.db".format(DB_name)
            print("New connection to SQLite DB...")
        elif db_engine == "postgres":
            db_string = "postgresql://test_user:test_password@localhost:5432/testdb"
            # db_string = \
            #     'postgresql://test_user2:test_password2@localhost:5432/testdb'
            print("New connection to PostgreSQL DB...")
        else:
            raise UnsupportedDatabaseEngine(
                "No database engine with this name. "
                "Choose one of the following: {}".format(engines)
            )

    return dataset.connect(db_string) 
示例30
def export_ctf():
    # TODO: For some unknown reason dataset is only able to see alembic_version during tests.
    # Even using a real sqlite database. This makes this test impossible to pass in sqlite.
    db = dataset.connect(get_app_config("SQLALCHEMY_DATABASE_URI"))

    # Backup database
    backup = tempfile.NamedTemporaryFile()

    backup_zip = zipfile.ZipFile(backup, "w")

    tables = db.tables
    for table in tables:
        result = db[table].all()
        result_file = BytesIO()
        freeze_export(result, fileobj=result_file)
        result_file.seek(0)
        backup_zip.writestr("db/{}.json".format(table), result_file.read())

    # # Guarantee that alembic_version is saved into the export
    if "alembic_version" not in tables:
        result = {
            "count": 1,
            "results": [{"version_num": get_current_revision()}],
            "meta": {},
        }
        result_file = BytesIO()
        json.dump(result, result_file)
        result_file.seek(0)
        backup_zip.writestr("db/alembic_version.json", result_file.read())

    # Backup uploads
    uploader = get_uploader()
    uploader.sync()

    upload_folder = os.path.join(
        os.path.normpath(app.root_path), app.config.get("UPLOAD_FOLDER")
    )
    for root, dirs, files in os.walk(upload_folder):
        for file in files:
            parent_dir = os.path.basename(root)
            backup_zip.write(
                os.path.join(root, file),
                arcname=os.path.join("uploads", parent_dir, file),
            )

    backup_zip.close()
    backup.seek(0)
    return backup