From e66499cfe2853f73d1d7241bb6feeafceace79a3 Mon Sep 17 00:00:00 2001 From: Enrico Speranza Date: Tue, 5 Sep 2023 13:23:23 +0000 Subject: [PATCH] Add ODTFIleMetadata --- README.md | 2 +- go.mod | 1 + go.sum | 2 ++ opencrucible.go | 12 ++++++++++++ opencrucible_test.go | 15 +++++++++++++++ 5 files changed, 31 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 391631e..90cc5cc 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ List of formats read: | ------------- | ------------- | ------------- | ------------- | | TXT | X | text/plain; charset=utf-8 | | | RTF | X | text/rtf | | -| ODT | X | application/vnd.oasis.opendocument.text | | +| ODT | X | application/vnd.oasis.opendocument.text | X | | DOCX | X | application/vnd.openxmlformats-officedocument.wordprocessingml.document | | | PPTX | X | application/vnd.openxmlformats-officedocument.presentationml.presentation | | | PDF | X | application/pdf | X | diff --git a/go.mod b/go.mod index 35815f5..447f6ff 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( require ( github.com/Tulip-Data/pdf v1.0.2 github.com/flotzilla/pdf_parser v0.1.96 + github.com/gocaio/metagopenoffice v0.0.0-20190424182207-bbc961c10caf github.com/h2non/filetype v1.1.3 github.com/lu4p/cat v0.1.5 golang.org/x/net v0.10.0 // indirect diff --git a/go.sum b/go.sum index 27dd7a1..a4afeae 100644 --- a/go.sum +++ b/go.sum @@ -8,6 +8,8 @@ github.com/flotzilla/pdf_parser v0.1.96/go.mod h1:/CPB1OWEeFqRbtnFWXgArmOnA3u7sm github.com/gabriel-vasile/mimetype v1.1.1/go.mod h1:6CDPel/o/3/s4+bp6kIbsWATq8pmgOisOPG40CJa6To= github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= +github.com/gocaio/metagopenoffice v0.0.0-20190424182207-bbc961c10caf h1:ZGa9zKy3lONMNzsJzgP9LEECjjzFzTSTJllM3GEIq7c= +github.com/gocaio/metagopenoffice v0.0.0-20190424182207-bbc961c10caf/go.mod h1:RySwFSDrnowBfrD7tQP1DucjTiKwCMdI+67mMy8YaaM= github.com/h2non/filetype v1.1.3 h1:FKkx9QbD7HR/zjK1Ia5XiBsq9zdLi5Kf3zGyFTAFkGg= github.com/h2non/filetype v1.1.3/go.mod h1:319b3zT68BvV+WRj7cwy856M2ehB3HqNOt6sy1HndBY= github.com/lu4p/cat v0.1.5 h1:s51Bp/ns3u6n+hjjL2F77ySY6j/GD5SJG/t6Ok4Y1S0= diff --git a/opencrucible.go b/opencrucible.go index 28f3c4c..9d51d9b 100644 --- a/opencrucible.go +++ b/opencrucible.go @@ -12,6 +12,7 @@ import ( "github.com/h2non/filetype" "github.com/lu4p/cat" "github.com/flotzilla/pdf_parser" + "github.com/gocaio/metagopenoffice" ) // Version exposes the current package version. @@ -151,4 +152,15 @@ func PPTXFileParseToString(FileToParse string) (string, error) { func PDFFileMetadata(FileToParse string) (*pdf_parser.PdfInfo, error) { pdf_parsed, errors := pdf_parser.ParsePdf(FileToParse) return pdf_parsed, errors +} + +//See for return: https://stackoverflow.com/questions/50697914/return-nil-for-a-struct-in-go +func ODTFileMetadata(FileToParse string) (*metagopenoffice.OpenOfficeXML, error) { + file, err := os.Open(FileToParse) + if err != nil { + return nil, fmt.Errorf("error opening file: %s", err) + } + file.Close() + content, err := metagopenoffice.GetMetada(file) + return &content, err } \ No newline at end of file diff --git a/opencrucible_test.go b/opencrucible_test.go index be96040..b4a3b64 100644 --- a/opencrucible_test.go +++ b/opencrucible_test.go @@ -62,6 +62,21 @@ func TestODTFileParser(t *testing.T) { } } + +func TestODTMetadata(t *testing.T) { + got, err := ODTFileMetadata(filepath.Join("test_file", "test_file_odt.odt")) + if err != nil { + t.Errorf("error loading file \n %s", err) + } + want := "LibreOffice/7.0.5.2$Windows_X86_64 LibreOffice_project/64390860c6cd0aca4beafafcfd84613dd9dfb63a" + t.Logf("Parsed: %s", got.Meta.Generator) + t.Logf("Parsed: %s", got.Meta.Title) + t.Logf("Parsed: %s", got.Meta.CreationDate) + if got.Meta.Generator != want { + t.Errorf("got %q, wanted %q", got.Meta.Generator, want) + } +} + func TestRTFParser(t *testing.T) { rtf, err := os.ReadFile(filepath.Join("test_file", "test_file_rtf.rtf")) if err != nil {