1%%capture
2%pip install pymongo pandasInstall a new package inside the notebook, without logs.
1%load_ext autoreload
2%autoreload 2Auto reload the internal package.
1import os
2from dotenv import load_dotenv
3
4# .env is the same location as the notebook
5load_dotenv(override=True)
6
7# .env in the parent folder of the notebook
8notebook_dir = Path(os.popen('pwd').read().strip())
9env_path = notebook_dir.parent / '.env'
10load_dotenv(dotenv_path=env_path, override=True)Load
.env file.1def get_files_from_json(file_path: str) -> list[str]:
2 with open(file_path, 'r', encoding='utf-8') as f:
3 data = json.load(f)
4 return data1def save_files_to_json(data, file_path: str):
2 with open(file_path, 'w', encoding='utf-8') as f:
3 json.dump(data, f, ensure_ascii=False, indent=4)1def get_files_from_folder(folder_path: str, extension: str = "pdf") -> list[str]:
2 return [f.name for f in Path(folder_path).glob(f"*.{extension}")]1def query_database(query: str, database_url: str = None, json_columns: list[str] = None) -> list[dict]:
2 """
3 Execute a SQL query and return results as a list of dictionaries.
4
5 Args:
6 query: SQL query string to execute
7 database_url: Database connection string. If None, uses CONNECTION_STRING_DB from environment
8 json_columns: List of column names whose content should be parsed as JSON objects
9
10 Returns:
11 List of dictionaries, where each dictionary represents a row with column names as keys
12
13 Example:
14 >>> rows = query_database(
15 ... "SELECT * FROM document WHERE bot = '-Obbm0B_36eDCTtOhHcN' LIMIT 10",
16 ... json_columns=['metadata', 'config']
17 ... )
18 >>> print(rows[0]['metadata']['filename']) # metadata is now a dict, not a string
19 """
20 if database_url is None:
21 database_url = os.getenv('CONNECTION_STRING_DB')
22 if database_url is None:
23 raise ValueError("Database URL not provided and CONNECTION_STRING_DB not set in environment")
24
25 engine = create_engine(database_url)
26
27 with engine.connect() as conn:
28 result = conn.execute(text(query))
29 rows = [dict(row._mapping) for row in result]
30
31 # Parse JSON columns if specified
32 if json_columns:
33 for row in rows:
34 for col in json_columns:
35 if col in row and row[col] is not None:
36 try:
37 # Parse JSON string to Python object
38 if isinstance(row[col], str):
39 row[col] = json.loads(row[col])
40 except (json.JSONDecodeError, TypeError):
41 # Keep original value if parsing fails
42 pass
43
44 return rows