Python源码示例:dataset.connect()
示例1
def add_shared_args(parser):
parser.add_argument("query")
parser.add_argument("--db",
type=dataset.connect,
help="SQLAlchemy connection string. Default: " + DEFAULT_DB,
default=DEFAULT_DB)
parser.add_argument("--throttle",
type=int,
default=15,
help="""Wait X seconds between requests.
Default: 15 (to stay under rate limits)""")
parser.add_argument("--credentials",
nargs=4,
help="""
Four space-separated strings for {}.
Defaults to environment variables by those names.
""".format(", ".join(CREDENTIAL_NAMES))
)
parser.add_argument("--store-raw",
help="Store raw tweet JSON, in addition to excerpted fields.",
action="store_true")
parser.add_argument("--quiet",
help="Silence logging.",
action="store_true")
示例2
def create_app_info_dict():
dlist = []
conn = dataset.connect(config.APP_INFO_SQLITE_FILE)
print("Creating app-info dict")
for k, v in config.source_files.items():
d = pd.read_csv(v, index_col='appId')
d['store'] = k
if 'permissions' not in d.columns:
print(k, v, d.columns)
d.assign(permissions=["<not recorded>"]*len(d))
d.columns = d.columns.str.lower().str.replace(' ', '-').str.replace('-', '_')
dlist.append(d)
pd.concat(dlist).to_sql('apps', conn.engine, if_exists='replace')
conn.engine.execute('create index idx_appId on apps(appId)')
示例3
def setup_db(connection_string):
db = dataset.connect(connection_string)
connections = db['connection']
users = db['user']
tweets = db['tweet']
medias = db['media']
mentions = db['mention']
urls = db['url']
hashtags = db['hashtag']
tweets.create_index(['tweet_id'])
medias.create_index(['tweet_id'])
mentions.create_index(['user_id'])
mentions.create_index(['mentioned_user_id'])
urls.create_index(['tweet_id'])
hashtags.create_index(['tweet_id'])
users.create_index(['user_id'])
connections.create_index(['friend_id'])
connections.create_index(['follower_id'])
return db
示例4
def setup_db(connection_string):
db = dataset.connect(connection_string)
pages = db['page']
users = db['user']
posts = db['post']
comments = db['comment']
interactions = db['interaction']
users.create_index(['user_id'])
posts.create_index(['post_id'])
comments.create_index(['comment_id'])
comments.create_index(['post_id'])
interactions.create_index(['comment_id'])
interactions.create_index(['post_id'])
interactions.create_index(['user_id'])
return db
示例5
def start(self):
"""
Initialize db and session monitor thread
"""
global db
log.info(":SYS:Starting W.I.L.L")
db_url = self.configuration_data["db_url"]
log.info(":SYS:Connecting to database")
db = dataset.connect(db_url, engine_kwargs={"pool_recycle": 1})
core.db = db
API.db = db
web.db = db
start_time = self.now.strftime("%I:%M %p %A %m/%d/%Y")
web.start_time = start_time
log.info(":SYS:Starting W.I.L.L core")
core.initialize(db)
log.info(":SYS:Starting sessions parsing thread")
core.sessions_monitor(db)
log.info(":SYS:W.I.L.L started")
示例6
def __init__(self):
""""""
self.exchange = ccxt.poloniex()
self.exchange.load_markets()
self.delay_seconds = self.exchange.rateLimit / 1000
self.symbols = self.exchange.markets
self.timeframe = '1d'
self.db_url = 'sqlite:///databases/market_prices.db'
self.deques = dict()
self.ohlcv = dict()
self.database = dataset.connect(self.db_url)
ensure_dir('databases')
for symbol in self.symbols:
if self.exchange.has['fetchOHLCV']:
print('Obtaining OHLCV data')
data = self.exchange.fetch_ohlcv(symbol, self.timeframe)
data = list(zip(*data))
data[0] = [datetime.datetime.fromtimestamp(ms / 1000)
for ms in data[0]]
self.ohlcv[symbol] = data
time.sleep(self.delay_seconds)
else:
print('No OHLCV data available')
self.deques[symbol] = deque()
if len(self.database[symbol]):
for e in self.database[symbol]:
entry = (e['bid'], e['ask'], e['spread'], e['time'])
self.deques[symbol].append(entry)
del self.database
self.thread = threading.Thread(target=self.__update)
self.thread.daemon = True
self.thread.start()
示例7
def __update(self):
"""
https://github.com/ccxt-dev/ccxt/wiki/Manual#market-price
"""
self.database = dataset.connect(self.db_url)
while True:
for symbol in self.symbols:
start_time = time.clock()
orders = self.exchange.fetch_order_book(symbol)
bid = orders['bids'][0][0] if len(orders['bids']) > 0 else None
ask = orders['asks'][0][0] if len(orders['asks']) > 0 else None
spread = (ask - bid) if (bid and ask) else None
dtime = datetime.datetime.now()
self.deques[symbol].append((bid, ask, spread, dtime))
self.database.begin()
try:
self.database[symbol].insert({
'ask': ask,
'bid': bid,
'spread': spread,
'time': dtime
})
self.database.commit()
except:
self.database.rollback()
time.sleep(self.delay_seconds - (time.clock() - start_time))
示例8
def start(cls):
cls.db = dataset.connect(cls.database_url)
super().start()
示例9
def conn():
warehouse = dataset.connect(config.WAREHOUSE_URL)
for table in warehouse.tables:
warehouse[table].delete()
return {'warehouse': warehouse}
示例10
def open_spider(self, spider):
if spider.conf and spider.conn:
self.__conf = spider.conf
self.__conn = spider.conn
else:
# For runs trigered by scrapy CLI utility
self.__conf = helpers.get_variables(config, str.isupper)
self.__conn = {'warehouse': dataset.connect(config.WAREHOUSE_URL)}
示例11
def cli(argv):
# Prepare conf dict
conf = helpers.get_variables(config, str.isupper)
# Prepare conn dict
conn = {
'warehouse': dataset.connect(config.WAREHOUSE_URL),
}
# Get and call collector
collect = importlib.import_module('collectors.%s' % argv[1]).collect
collect(conf, conn, *argv[2:])
示例12
def run(consumer_key, consumer_secret, access_key, access_secret,
connection_string, threshold=5000, seed_only=True):
db = dataset.connect(connection_string)
api = get_api(consumer_key, consumer_secret, access_key, access_secret)
if seed_only:
is_seed = 1
else:
is_seed = 0
user_table = db['user']
users = user_table.find(user_table.table.columns.friends_count < threshold,
friends_collected=0, is_seed=is_seed)
users = [u for u in users]
all_users = len(users)
remaining = all_users
for u in users:
try:
print('Getting friend ids for ' + u['screen_name'])
next, prev, friend_ids = get_friend_ids(
api, screen_name=u['screen_name'])
print('Adding ' + str(len(friend_ids)) + ' user ids to db')
insert_if_missing(db, user_ids=friend_ids)
print('Creating relationships for ' + str(u['user_id']))
create_connections(db, u['user_id'], friend_ids=friend_ids)
update_dict = dict(id=u['id'], friends_collected=1)
user_table.update(update_dict, ['id'])
# Can only make 15 calls in a 15 minute window to this endpoint
remaining -= 1
time_left = remaining / 60.0
print(str(time_left) + ' hours to go')
print('Sleeping for 1 minute, timestamp: ' + str(datetime.now()))
time.sleep(60)
except:
continue
示例13
def run(consumer_key, consumer_secret, access_key, access_secret,
connection_string):
db = dataset.connect(connection_string)
api = get_api(consumer_key, consumer_secret, access_key, access_secret)
user_table = db['user']
users = user_table.find(user_table.table.columns.user_id != 0,
profile_collected=0)
users = [u for u in users]
if len(users) == 0:
print('No users without profiles')
return None
ids_to_lookup = []
for user in users:
ids_to_lookup.append(user['user_id'])
if len(ids_to_lookup) == 100:
print('Getting profiles')
profiles = get_profiles(api, user_ids=ids_to_lookup)
print('Updating 100 profiles')
upsert_profiles(db, profiles)
ids_to_lookup = []
print('Sleeping, timestamp: ' + str(datetime.now()))
time.sleep(5)
print('Getting profiles')
profiles = get_profiles(api, user_ids=ids_to_lookup)
print('Updating ' + str(len(ids_to_lookup)) + ' profiles')
upsert_profiles(db, profiles)
print('Finished getting profiles')
示例14
def help(bot, update):
'''Echo the help string'''
bot.sendMessage(update.message.chat_id, help_str)
#def socket_io_thread(bot,session_id, chat_id ):
# socketIO = SocketIO(SERVER_URL, 80)
# log.info("In socket_io thread")
# socketIO.on('connect', lambda: socketIO.emit("get_updates", session_id))
# socketIO.on('update', lambda x: bot.sendMessage(chat_id, (x["value"])))
# socketIO.on('disconnect', lambda x: bot.sendMessage(chat_id, "Update server has disconnected"))
# socketIO.on('debug', lambda x: log.info("Got debug message {0} from socketIO".format(x["value"])))
# socketIO.wait()
示例15
def __connect_alarms(self):
""" Connecting to a SQLite database table 'alarms'. """
alarms_table = dataset.connect(self.db_file)['alarms']
return alarms_table
示例16
def __connect_settings(self):
""" Connecting to a SQLite database table 'settings'. """
settings_table = dataset.connect(self.db_file)['settings']
return settings_table
#
# member functions to set settings
#
示例17
def __init__(self, url, result_table="results", complementary_table="complementary", space_table="space"):
super(SQLiteConnection, self).__init__()
if url.endswith("/"):
raise RuntimeError("Empty database name {}".format(url))
if url.endswith((" ", "\t")):
raise RuntimeError("Database name ends with space {}".format(url))
if not url.startswith("sqlite://"):
raise RuntimeError("Missing 'sqlite:///' at the begin of url".format(url))
if url == "sqlite://" or url == "sqlite:///:memory:":
raise RuntimeError("Cannot use memory database as it exists only for the time of the connection")
match = re.search("sqlite:///(.*)", url)
if match is not None:
db_path = match.group(1)
else:
raise RuntimeError("Cannot find sqlite db path in {}".format(url))
self.url = url
self.result_table_name = result_table
self.complementary_table_name = complementary_table
self.space_table_name = space_table
self._lock = filelock.FileLock("{}.lock".format(db_path))
self.hold_lock = False
# with self.lock():
# db = dataset.connect(self.url)
# # Initialize a result table and ensure float for loss
# results = db[self.result_table_name]
# results.create_column("_loss", sqlalchemy.Float)
示例18
def all_results(self):
"""Get a list of all entries of the result table. The order is
undefined.
"""
# Only way to ensure old db instances are closed is to force garbage collection
# See dataset note : https://dataset.readthedocs.io/en/latest/api.html#notes
gc.collect()
db = dataset.connect(self.url)
return list(db[self.result_table_name].all())
示例19
def find_results(self, filter):
"""Get a list of all results associated with *filter*. The order is
undefined.
"""
gc.collect()
db = dataset.connect(self.url)
return list(db[self.result_table_name].find(**filter))
示例20
def insert_result(self, document):
"""Insert a new *document* in the result table. The columns must not
be defined nor all present. Any new column will be added to the
database and any missing column will get value None.
"""
gc.collect()
db = dataset.connect(self.url)
return db[self.result_table_name].insert(document)
示例21
def update_result(self, filter, values):
"""Update or add *values* of given rows in the result table.
Args:
filter: An identifier of the rows to update.
values: A mapping of values to update or add.
"""
gc.collect()
filter = filter.copy()
keys = list(filter.keys())
filter.update(values)
db = dataset.connect(self.url)
return db[self.result_table_name].update(filter, keys)
示例22
def all_complementary(self):
"""Get all entries of the complementary information table as a list.
The order is undefined.
"""
gc.collect()
db = dataset.connect(self.url)
return list(db[self.complementary_table_name].all())
示例23
def insert_complementary(self, document):
"""Insert a new document (row) in the complementary information table.
"""
gc.collect()
db = dataset.connect(self.url)
return db[self.complementary_table_name].insert(document)
示例24
def find_complementary(self, filter):
"""Find a document (row) from the complementary information table.
"""
gc.collect()
db = dataset.connect(self.url)
return db[self.complementary_table_name].find_one(**filter)
示例25
def get_space(self):
"""Returns the space used for previous experiments.
Raises:
AssertionError: If there are more than one space in the database.
"""
gc.collect()
db = dataset.connect(self.url)
entry_count = db[self.space_table_name].count()
if entry_count == 0:
return None
assert entry_count == 1, "Space table unexpectedly contains more than one space."
return pickle.loads(db[self.space_table_name].find_one()["space"])
示例26
def insert_space(self, space):
"""Insert a space in the database.
Raises:
AssertionError: If a space is already present in the database.
"""
gc.collect()
db = dataset.connect(self.url)
assert db[self.space_table_name].count() == 0, ("Space table cannot contain more than one space, "
"clear table first.")
return db[self.space_table_name].insert({"space": pickle.dumps(space)})
示例27
def load_datastore():
if not hasattr(settings, '_datastore'):
# do not pool connections for the datastore
engine_kwargs = {'poolclass': NullPool}
settings._datastore = dataset.connect(settings.DATASTORE_URI,
engine_kwargs=engine_kwargs)
# Use bigint to store integers by default
settings._datastore.types.integer = settings._datastore.types.bigint
return settings._datastore
示例28
def __init__(self, connection_string):
db = dataset.connect(connection_string)
self.rooms = db.get_table('rooms', primary_id='role_id', primary_type=db.types.bigint)
self.settings = db.get_table('settings', primary_id='guild_id', primary_type=db.types.bigint)
self.invites = db.get_table('invites')
示例29
def connect_to_db(db_name=None, db_engine="sqlite"):
"""Connect to a database. Create the database if there isn't one yet.
The database can be a SQLite DB (either a DB file or an in-memory DB), or a
PostgreSQL DB. In order to connect to a PostgreSQL DB you have first to
create a database, create a user, and finally grant him all necessary
privileges on that database and tables.
'postgresql://<username>:<password>@localhost:<PostgreSQL port>/<db name>'
Note: at the moment it looks it's not possible to close a connection
manually (e.g. like calling conn.close() in sqlite3).
Parameters
----------
db_name : str or None
database name (without file extension .db)
db_engine : str
database engine ('sqlite' or 'postgres')
Returns
-------
dataset.persistence.database.Database
connection to a database
"""
engines = {"sqlite", "postgres"}
if db_name is None:
db_string = "sqlite:///:memory:"
print("New connection to in-memory SQLite DB...")
else:
if db_engine == "sqlite":
db_string = "sqlite:///{}.db".format(DB_name)
print("New connection to SQLite DB...")
elif db_engine == "postgres":
db_string = "postgresql://test_user:test_password@localhost:5432/testdb"
# db_string = \
# 'postgresql://test_user2:test_password2@localhost:5432/testdb'
print("New connection to PostgreSQL DB...")
else:
raise UnsupportedDatabaseEngine(
"No database engine with this name. "
"Choose one of the following: {}".format(engines)
)
return dataset.connect(db_string)
示例30
def export_ctf():
# TODO: For some unknown reason dataset is only able to see alembic_version during tests.
# Even using a real sqlite database. This makes this test impossible to pass in sqlite.
db = dataset.connect(get_app_config("SQLALCHEMY_DATABASE_URI"))
# Backup database
backup = tempfile.NamedTemporaryFile()
backup_zip = zipfile.ZipFile(backup, "w")
tables = db.tables
for table in tables:
result = db[table].all()
result_file = BytesIO()
freeze_export(result, fileobj=result_file)
result_file.seek(0)
backup_zip.writestr("db/{}.json".format(table), result_file.read())
# # Guarantee that alembic_version is saved into the export
if "alembic_version" not in tables:
result = {
"count": 1,
"results": [{"version_num": get_current_revision()}],
"meta": {},
}
result_file = BytesIO()
json.dump(result, result_file)
result_file.seek(0)
backup_zip.writestr("db/alembic_version.json", result_file.read())
# Backup uploads
uploader = get_uploader()
uploader.sync()
upload_folder = os.path.join(
os.path.normpath(app.root_path), app.config.get("UPLOAD_FOLDER")
)
for root, dirs, files in os.walk(upload_folder):
for file in files:
parent_dir = os.path.basename(root)
backup_zip.write(
os.path.join(root, file),
arcname=os.path.join("uploads", parent_dir, file),
)
backup_zip.close()
backup.seek(0)
return backup