Open XML SDK Code Examples - Asbjoedt/CLISC GitHub Wiki

Writing code in C# with Open XML SDK.

Check if any cell values exist

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;

using (SpreadsheetDocument spreadsheet = SpreadsheetDocument.Open(filepath, false))
{
    bool hascellvalue = false;
    WorkbookPart wbPart = spreadsheet.WorkbookPart;
    DocumentFormat.OpenXml.Spreadsheet.Sheets allSheets = wbPart.Workbook.Sheets;
    if (allSheets != null)
    {
        foreach (Sheet aSheet in allSheets)
        {
            WorksheetPart wsp = (WorksheetPart)spreadsheet.WorkbookPart.GetPartById(aSheet.Id);
            DocumentFormat.OpenXml.Spreadsheet.Worksheet worksheet = wsp.Worksheet;
            var rows = worksheet.GetFirstChild<SheetData>().Elements<Row>();
            int row_count = rows.Count();
            if (row_count > 0)
            {
                hascellvalue = true;
            }
        }
        if (hascellvalue == true)
        {
            Console.WriteLine("--> Cell value detected");
        }
    }
}

Find IDs and names of all worksheets

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;

using (SpreadsheetDocument spreadsheet = SpreadsheetDocument.Open(filepath, false))
{
    Sheets allSheets = spreadsheet.WorkbookPart.Workbook.Sheets;
    foreach (Sheet aSheet in allSheets)
    {
        Console.WriteLine($"--> ID: {aSheet.SheetId.Value}, Name: {aSheet.Name}");
    }
    if (allSheets == null)
    {
        Console.WriteLine("--> No worksheets exist");
    }
}

Find conformance class

The OOXML standard specifies, that strict conformance must be specified in a conformance class, but if the spreadsheet has Transitional conformance it may be specified in the same class, but the conformance class may be left out alltogether and if so, then the spreadsheet defaults to Transitional conformance. Therefore, we have no sure way of determining Transitional conformance. It may only be determined through logic.

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;

using (var spreadsheet = SpreadsheetDocument.Open(filepath, false))
{
    var info = spreadsheet.WorkbookPart.Workbook;
    if (info != null)
    {
        Console.WriteLine(info.Conformance); // Value may be "strict" or "transitional"
    }
    else
    {
        Console.WriteLine(transitional); // By logic, value is "transitional"
    }
}

Another method to find conformance is through the boolean property "StrictRelationshipFound".

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;

using (var spreadsheet = SpreadsheetDocument.Open(filepath, false))
{
    bool? strict = spreadsheet.StrictRelationshipFound; // a boolean
    if (strict == true)
    {
        Console.WriteLine("strict");
    }
    else
    {
        Console.WriteLine("transitional");
    }
}

Change conformance class

Change conformance class from Transitional to Strict.

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;

using (var spreadsheet = SpreadsheetDocument.Open(input_filepath, true))
{
    Workbook workbook = spreadsheet.WorkbookPart.Workbook;
    if (workbook.Conformance == null || workbook.Conformance != "strict")
    {
        workbook.Conformance.Value = ConformanceClass.Enumstrict;
    }
}

Find namespaces

Workbook

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;

using (var spreadsheet = SpreadsheetDocument.Open(filepath, false))
{
    var info = spreadsheet.WorkbookPart.Workbook.NamespaceDeclarations;
    foreach (var item in info)
    {
        Console.WriteLine(item);
    }
}

Worksheets

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;

using (var spreadsheet = SpreadsheetDocument.Open(file, false))
{
    var allSheets = spreadsheet.WorkbookPart.Workbook.Sheets;
    foreach (Sheet aSheet in allSheets)
    {
        Console.WriteLine(aSheet.Name);
        Worksheet sheet = ((WorksheetPart)spreadsheet.WorkbookPart.GetPartById($"rId{aSheet.SheetId}")).Worksheet;

        var names = sheet.NamespaceDeclarations.ToList();
        foreach (var name in names)
        {
            Console.WriteLine($"Namespace: {name.Value} --> XML Prefix: {name.Key}");
        }
    }
}

Use AddNamespaceDeclaration(String, String) and RemoveNamespaceDeclaration(String) to change namespaces.

Find embedded objects (OLE, images and 3D models)

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;

using (var spreadsheet = SpreadsheetDocument.Open(filepath, false))
{
    var list = spreadsheet.WorkbookPart.WorksheetParts.ToList();
    foreach (var item in list)
    {
        int count_ole = item.EmbeddedObjectParts.Count(); // Register the number of OLE
        int count_image = item.ImageParts.Count(); // Register number of images
        int count_3d = item.Model3DReferenceRelationshipParts.Count(); // Register number of 3D models
        int count_embedobj = count_ole + count_image + count_3d; // Sum

        if (count_embedobj > 0) // If embedded objects
        {
            Console.WriteLine($"--> {count_embedobj} embedded objects detected");
            var embed_ole = item.EmbeddedObjectParts.ToList(); // Register each OLE to a list
            var embed_image = item.ImageParts.ToList(); // Register each image to a list
            var embed_3d = item.Model3DReferenceRelationshipParts.ToList(); // Register each 3D model to a list
            int embedobj_number = 0;
            foreach (var part in embed_ole) // Inform user of each OLE object
            {
                embedobj_number++;
                Console.WriteLine($"--> Embedded object #{embedobj_number}");
                Console.WriteLine($"----> Content Type: {part.ContentType.ToString()}");
                Console.WriteLine($"----> URI: {part.Uri.ToString()}");

            }
            foreach (var part in embed_image) // Inform user of each image object
            {
                embedobj_number++;
                Console.WriteLine($"--> Embedded object #{embedobj_number}");
                Console.WriteLine($"----> Content Type: {part.ContentType.ToString()}");
                Console.WriteLine($"----> URI: {part.Uri.ToString()}");
            }
            foreach (var part in embed_3d) // Inform user of each 3D object
            {
                embedobj_number++;
                Console.WriteLine($"--> Embedded object #{embedobj_number}");
                Console.WriteLine($"----> Content Type: {part.ContentType.ToString()}");
                Console.WriteLine($"----> URI: {part.Uri.ToString()}");
            }
        }
    }
}

Find external relationships

Check for links to other spreadsheets such as linked values in cells and linked OLE objects (unembedded). It does not check for external data connections. Check for RTD functions is unknown, because I do not have a test sample with RTD functions.

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;

using (SpreadsheetDocument spreadsheet = SpreadsheetDocument.Open(filepath, false))
{
    List<ExternalRelationship> extRels = spreadsheet // Find all external relationships
    .GetAllParts()
    .SelectMany(p => p.ExternalRelationships)
    .ToList();

    if (extRels.Count > 0) // If external relationships
    {
        int extrel_number = 0;
        Console.WriteLine($"--> {extRels.Count} external relationships detected");
        foreach (ExternalRelationship rel in extRels)
        {
            extrel_number++;
            Console.WriteLine($"--> #{extrel_number} external relationship detected");
            Console.WriteLine($"----> ID: {rel.Id}");
            Console.WriteLine($"----> Target URI: {rel.Uri}");
            Console.WriteLine($"----> Relationship type: {rel.RelationshipType}");
            Console.WriteLine($"----> External: {rel.IsExternal}");
            Console.WriteLine($"----> Container: {rel.Container}");
        }
    }
    else // If no external relationships, inform user
    {
        Console.WriteLine($"--> {extRels.Count} external relationships");
    }
}

Find RTD functions

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;

using (SpreadsheetDocument spreadsheet = SpreadsheetDocument.Open(filepath, false))
{
    WorkbookPart wbPart = spreadsheet.WorkbookPart;
    DocumentFormat.OpenXml.Spreadsheet.Sheets allSheets = wbPart.Workbook.Sheets;
    foreach (Sheet aSheet in allSheets)
    {
        WorksheetPart wsp = (WorksheetPart)spreadsheet.WorkbookPart.GetPartById(aSheet.Id);
        DocumentFormat.OpenXml.Spreadsheet.Worksheet worksheet = wsp.Worksheet;
        var rows = worksheet.GetFirstChild<SheetData>().Elements<Row>(); // Find all rows
        foreach (var row in rows)
        {
            var cells = row.Elements<Cell>();
            foreach (Cell cell in cells)
            {
                if (cell.CellFormula != null)
                {
                    string formula = cell.CellFormula.InnerText;
                    string hit = formula.Substring(0, 3); // Transfer first 3 characters to string
                    if (hit == "RTD")
                    {
                        rtd_functions++;
                        Console.WriteLine($"--> RTD function in sheet \"{aSheet.Name}\" cell {cell.CellReference} detected");
                    }
                }
            }
        }
    }
}

Validate file format

using System.Collections.Generic;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using DocumentFormat.OpenXml.Validation;

using (var spreadsheet = SpreadsheetDocument.Open(filepath, false))
{
    var validator = new OpenXmlValidator();
    var validation_errors = validator.Validate(spreadsheet).ToList();
    int error_count = validation_errors.Count;
    int error_number = 0;

    if (validation_errors.Any()) // If errors
    {
        Console.WriteLine($"--> File format is invalid - Spreadsheet has {error_count} validation errors");
        foreach (var error in validation_errors)
        {
            error_number++;
            Console.WriteLine("--> Error " + error_number);
            Console.WriteLine("----> Id: " + error.Id);
            Console.WriteLine("----> Description: " + error.Description);
            Console.WriteLine("----> ErrorType: " + error.ErrorType);
            Console.WriteLine("----> Node: " + error.Node);
            Console.WriteLine("----> Path: " + error.Path.XPath);
            Console.WriteLine("----> Part: " + error.Part.Uri);
            if (error.RelatedNode != null)
            {
                Console.WriteLine("----> Related Node: " + error.RelatedNode);
                Console.WriteLine("----> Related Node Inner Text: " + error.RelatedNode.InnerText);
            }
        }
    }
    else // If no errors
    {
        Console.WriteLine("--> File format is valid");
    }
}

Find hyperlinks

using (SpreadsheetDocument spreadsheet = SpreadsheetDocument.Open(filepath, false))
{
    List<HyperlinkRelationship> hyperlinks = spreadsheet
        .GetAllParts()
        .SelectMany(p => p.HyperlinkRelationships)
        .ToList();
    int hyperlinks_count = hyperlinks.Count;

    if (hyperlinks.Count > 0) // If hyperlinks
    {
        Console.WriteLine($"--> {hyperlinks_count} hyperlinks detected");
        int hyperlink_number = 0;
        foreach (HyperlinkRelationship hyperlink in hyperlinks)
        {
            hyperlink_number++;
            Console.WriteLine($"--> Hyperlink: {hyperlink_number}");
            Console.WriteLine($"----> Address: {hyperlink.Uri}");
        }
    }
}

Find data connections

using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;

int conn_count = 0;

using (SpreadsheetDocument spreadsheet = SpreadsheetDocument.Open(filepath, false))
{
    ConnectionsPart conn = spreadsheet.WorkbookPart.ConnectionsPart;
    if (conn != null)
    {
        conn_count = conn.Connections.Count();
        Console.WriteLine($"--> {conn_count} data connections detected");
    }
}
⚠️ **GitHub.com Fallback** ⚠️