Using ChemParse with Python

ChemParse can be seamlessly integrated into Python scripts, enabling the extraction and manipulation of data from ORCA output files using familiar tools like pandas.

Example Usage

import pychemparse as chp
from IPython.display import HTML


# The main class is File

orca_file = chp.File("example.out")
display(orca_file)


# You can convert orca output to HTML
# It is better not to insert ccs and js for jupyter view

HTML(orca_file.create_html(insert_css=False, insert_js=False))

# CSS and JS can give way more information

orca_file.save_as_html("example.html", insert_css=True, insert_js=True)

# Let's look at the blocks in the file
# Position stands for the begging and final lines of the block. Positions of the spacers are not identified

orca_file.get_blocks()

# Let's extract the raw data. `get_data` returns pandas DataFrame

orca_file.get_data(extract_only_raw=True)

# Let's extract the processed data

# You will get warnings about the unrecognized blocks. ExtractedData type is None or orcaparser.data.Data

orca_file.get_data()


# Let's extract the specific block that has "My value" in it

# The same text may occur in different blocks


orca_file.get_data(raw_data_substring="My value")


# The easiest way to extract the needed data in this case is to add some other text that is present in the block

orca_file.get_data(raw_data_substring=("My value", "My data"))


# Or to exclude the ones that are not yours

orca_file.get_data(raw_data_substring="My value", raw_data_not_substring="Not my match")

# or we can extract the one we need by the readable name (you can find it in html file TOC)

orca_file.get_data(readable_name="My data")

# You can ask for more than one parameter of search

orca_file.get_data(raw_data_substring=("My value", "My data"), readable_name="My data")

# Lets extract the data and have a look on what it contains

df = orca_file.get_data(readable_name="My data")

data = df.iloc[0].ExtractedData
print(f"{type(data) = }")
print(f"{str(data) = }")

# Even though no method for the extraction was found, the data can be extracted as text

data["raw data"]

# Lets extract the data that is known

# Time is stored as timedelta, tables as pandas Dataframes, values with units as pint Quantity

df = orca_file.get_data(readable_name="TOTAL RUN TIME")
data = df.iloc[0].ExtractedData
print(f"{str(data) = }")
print(f'{data["Run Time"] = }')

df = orca_file.get_data(readable_name="FINAL SINGLE POINT ENERGY")
data = df.iloc[0].ExtractedData
print(f"{str(data) = }")
print(f'{data["Energy"] = }')
print(f'{data["Energy"].magnitude = }')
print(f'{data["Energy"].units = }')
_images/python_pd.png