Using ChemParse with Python

ChemParse can be seamlessly integrated into Python scripts, enabling the extraction and manipulation of data from ORCA output files using familiar tools like pandas.
Example Usage

import pychemparse as chp
from IPython.display import HTML

# The main class is File

orca_file = chp.File("example.out")
display(orca_file)

# You can convert orca output to HTML
# It is better not to insert ccs and js for jupyter view

HTML(orca_file.create_html(insert_css=False, insert_js=False))

# CSS and JS can give way more information

orca_file.save_as_html("example.html", insert_css=True, insert_js=True)

# Let's look at the blocks in the file
# Position stands for the begging and final lines of the block. Positions of the spacers are not identified

orca_file.get_blocks()

# Let's extract the raw data. `get_data` returns pandas DataFrame

orca_file.get_data(extract_only_raw=True)

# Let's extract the processed data

# You will get warnings about the unrecognized blocks. ExtractedData type is None or orcaparser.data.Data

orca_file.get_data()

# Let's extract the specific block that has "My value" in it

# The same text may occur in different blocks

orca_file.get_data(raw_data_substring="My value")

# The easiest way to extract the needed data in this case is to add some other text that is present in the block

orca_file.get_data(raw_data_substring=("My value", "My data"))

# Or to exclude the ones that are not yours

orca_file.get_data(raw_data_substring="My value", raw_data_not_substring="Not my match")

# or we can extract the one we need by the readable name (you can find it in html file TOC)

orca_file.get_data(readable_name="My data")

# You can ask for more than one parameter of search

orca_file.get_data(raw_data_substring=("My value", "My data"), readable_name="My data")

# Lets extract the data and have a look on what it contains

df = orca_file.get_data(readable_name="My data")

data = df.iloc[0].ExtractedData
print(f"{type(data) = }")
print(f"{str(data) = }")

# Even though no method for the extraction was found, the data can be extracted as text

data["raw data"]

# Lets extract the data that is known

# Time is stored as timedelta, tables as pandas Dataframes, values with units as pint Quantity

df = orca_file.get_data(readable_name="TOTAL RUN TIME")
data = df.iloc[0].ExtractedData
print(f"{str(data) = }")
print(f'{data["Run Time"] = }')

df = orca_file.get_data(readable_name="FINAL SINGLE POINT ENERGY")
data = df.iloc[0].ExtractedData
print(f"{str(data) = }")
print(f'{data["Energy"] = }')
print(f'{data["Energy"].magnitude = }')
print(f'{data["Energy"].units = }')