Aller au contenu
- Base:
- import duckdb
- duckdb.sql(« SELECT 42 »).show()
- r1 = duckdb.sql(« SELECT 42 AS i »)
- duckdb.sql(« SELECT i * 2 AS k FROM r1 »).show()
- Data Input
- import duckdb
- duckdb.read_csv(« example.csv ») # read a CSV file into a Relation
- duckdb.read_parquet(« example.parquet ») # read a Parquet file into a Relation
- duckdb.read_json(« example.json ») # read a JSON file into a Relation
- duckdb.sql(« SELECT * FROM ‘example.csv' ») # directly query a CSV file
- duckdb.sql(« SELECT * FROM ‘example.parquet' ») # directly query a Parquet file
- duckdb.sql(« SELECT * FROM ‘example.json' ») # directly query a JSON file
- Pandas
- pandas_df = pd.DataFrame({« a »: [42]})
- duckdb.sql(« SELECT * FROM pandas_df »)
- Polars
- polars_df = pl.DataFrame({« a »: [42]})
- duckdb.sql(« SELECT * FROM polars_df »)
- PyArrow
- arrow_table = pa.Table.from_pydict({« a »: [42]})
- duckdb.sql(« SELECT * FROM arrow_table »)
- Result Conversion
- duckdb.sql(« SELECT 42 »).fetchall() # Python objects
- duckdb.sql(« SELECT 42 »).df() # Pandas DataFrame
- duckdb.sql(« SELECT 42 »).pl() # Polars DataFrame
- duckdb.sql(« SELECT 42 »).arrow() # Arrow Table
- duckdb.sql(« SELECT 42 »).fetchnumpy() # NumPy Arrays
- writing data to disk
- duckdb.sql(« SELECT 42 »).write_parquet(« out.parquet ») # Write to a Parquet file
- duckdb.sql(« SELECT 42 »).write_csv(« out.csv ») # Write to a CSV file
- duckdb.sql(« COPY (SELECT 42) TO ‘out.parquet' ») # Copy to a Parquet file
- Using In Memory
- con = duckdb.connect()
- con.sql(« SELECT 42 AS x »).show()
- persistent Storage
- # create a connection to a file called ‘file.db’
- con = duckdb.connect(« file.db »)
- # create a table and load data into it
- con.sql(« CREATE TABLE test (i INTEGER) »)
- con.sql(« INSERT INTO test VALUES (42) »)
- # query the table
- con.table(« test »).show()
- # explicitly close the connection
- con.close()
- # Note: connections also closed implicitly when they go out of scope
- Configuration
- con = duckdb.connect(config = {‘threads’: 1})