/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.microsoft;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFOldDocument;
import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.model.StyleDescription;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.microsoft.AbstractPOIFSExtractor;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.SAXException;

public class WordExtractor
extends AbstractPOIFSExtractor {
    public WordExtractor(ParseContext context) {
        super(context);
    }

    protected void parse(POIFSFileSystem filesystem, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
        HWPFDocument document;
        try {
            document = new HWPFDocument(filesystem);
        }
        catch (OldWordFileFormatException e) {
            this.parseWord6(filesystem, xhtml);
            return;
        }
        org.apache.poi.hwpf.extractor.WordExtractor wordExtractor = new org.apache.poi.hwpf.extractor.WordExtractor(document);
        this.addTextIfAny(xhtml, "header", wordExtractor.getHeaderText());
        PicturesTable pictureTable = document.getPicturesTable();
        PicturesSource pictures = new PicturesSource(document);
        Range r = document.getRange();
        for (int i = 0; i < r.numParagraphs(); ++i) {
            Paragraph p = r.getParagraph(i);
            i += this.handleParagraph(p, 0, r, document, pictures, pictureTable, xhtml);
        }
        for (String paragraph : wordExtractor.getFootnoteText()) {
            xhtml.element("p", paragraph);
        }
        for (String paragraph : wordExtractor.getCommentsText()) {
            xhtml.element("p", paragraph);
        }
        for (String paragraph : wordExtractor.getEndnoteText()) {
            xhtml.element("p", paragraph);
        }
        this.addTextIfAny(xhtml, "footer", wordExtractor.getFooterText());
        Picture p = pictures.nextUnclaimed();
        while (p != null) {
            this.handlePictureCharacterRun(null, p, pictures, xhtml);
            p = pictures.nextUnclaimed();
        }
        try {
            DirectoryEntry op = (DirectoryEntry)filesystem.getRoot().getEntry("ObjectPool");
            for (Entry entry : op) {
                if (!entry.getName().startsWith("_") || !(entry instanceof DirectoryEntry)) continue;
                this.handleEmbededOfficeDoc((DirectoryEntry)entry, xhtml);
            }
        }
        catch (FileNotFoundException e) {
            // empty catch block
        }
    }

    private int handleParagraph(Paragraph p, int parentTableLevel, Range r, HWPFDocument document, PicturesSource pictures, PicturesTable pictureTable, XHTMLContentHandler xhtml) throws SAXException, IOException, TikaException {
        if (p.isInTable() && p.getTableLevel() > parentTableLevel && parentTableLevel == 0) {
            Table t = r.getTable(p);
            xhtml.startElement("table");
            xhtml.startElement("tbody");
            for (int rn = 0; rn < t.numRows(); ++rn) {
                TableRow row = t.getRow(rn);
                xhtml.startElement("tr");
                for (int cn = 0; cn < row.numCells(); ++cn) {
                    TableCell cell = row.getCell(cn);
                    xhtml.startElement("td");
                    for (int pn = 0; pn < cell.numParagraphs(); ++pn) {
                        Paragraph cellP = cell.getParagraph(pn);
                        this.handleParagraph(cellP, p.getTableLevel(), cell, document, pictures, pictureTable, xhtml);
                    }
                    xhtml.endElement("td");
                }
                xhtml.endElement("tr");
            }
            xhtml.endElement("tbody");
            xhtml.endElement("table");
            return t.numParagraphs() - 1;
        }
        StyleDescription style = document.getStyleSheet().getStyleDescription(p.getStyleIndex());
        TagAndStyle tas = WordExtractor.buildParagraphTagAndStyle(style.getName(), parentTableLevel > 0);
        if (tas.getStyleClass() != null) {
            xhtml.startElement(tas.getTag(), "class", tas.getStyleClass());
        } else {
            xhtml.startElement(tas.getTag());
        }
        for (int j = 0; j < p.numCharacterRuns(); ++j) {
            CharacterRun cr = p.getCharacterRun(j);
            if (cr.text().equals("\u0013")) {
                j += this.handleSpecialCharacterRuns(p, j, tas.isHeading(), pictures, xhtml);
                continue;
            }
            if (cr.text().startsWith("\b")) {
                for (int pn = 0; pn < cr.text().length(); ++pn) {
                    Picture picture = pictures.nextUnclaimed();
                    this.handlePictureCharacterRun(cr, picture, pictures, xhtml);
                }
                continue;
            }
            if (pictureTable.hasPicture(cr)) {
                Picture picture = pictures.getFor(cr);
                this.handlePictureCharacterRun(cr, picture, pictures, xhtml);
                continue;
            }
            this.handleCharacterRun(cr, tas.isHeading(), xhtml);
        }
        xhtml.endElement(tas.getTag());
        return 0;
    }

    private void handleCharacterRun(CharacterRun cr, boolean skipStyling, XHTMLContentHandler xhtml) throws SAXException {
        if (cr.text().equals("\r")) {
            return;
        }
        ArrayList<String> tags = new ArrayList<String>();
        if (!skipStyling) {
            if (cr.isBold()) {
                tags.add("b");
            }
            if (cr.isItalic()) {
                tags.add("i");
            }
            if (cr.isStrikeThrough()) {
                tags.add("s");
            }
            for (String tag : tags) {
                xhtml.startElement(tag);
            }
        }
        String text = cr.text();
        if ((text = text.replace('\r', '\n')).endsWith("\u0007")) {
            text = text.substring(0, text.length() - 1);
        }
        xhtml.characters(text);
        for (int tn = tags.size() - 1; tn >= 0; --tn) {
            xhtml.endElement((String)tags.get(tn));
        }
    }

    private int handleSpecialCharacterRuns(Paragraph p, int index, boolean skipStyling, PicturesSource pictures, XHTMLContentHandler xhtml) throws SAXException, TikaException, IOException {
        int i;
        ArrayList<CharacterRun> controls = new ArrayList<CharacterRun>();
        ArrayList<CharacterRun> texts = new ArrayList<CharacterRun>();
        boolean has14 = false;
        for (i = index + 1; i < p.numCharacterRuns(); ++i) {
            CharacterRun cr = p.getCharacterRun(i);
            if (cr.text().equals("\u0013")) {
                int increment = this.handleSpecialCharacterRuns(p, i + 1, skipStyling, pictures, xhtml);
                i += increment;
                continue;
            }
            if (cr.text().equals("\u0014")) {
                has14 = true;
                continue;
            }
            if (cr.text().equals("\u0015")) {
                if (has14) break;
                texts = controls;
                controls = new ArrayList();
                break;
            }
            if (has14) {
                texts.add(cr);
                continue;
            }
            controls.add(cr);
        }
        if (controls.size() > 0) {
            String text = ((CharacterRun)controls.get(0)).text();
            for (int j = 1; j < controls.size(); ++j) {
                text = text + ((CharacterRun)controls.get(j)).text();
            }
            if (text.startsWith("HYPERLINK") && text.indexOf(34) > -1) {
                String url = text.substring(text.indexOf(34) + 1, text.lastIndexOf(34));
                xhtml.startElement("a", "href", url);
                for (CharacterRun cr : texts) {
                    this.handleCharacterRun(cr, skipStyling, xhtml);
                }
                xhtml.endElement("a");
            } else {
                for (CharacterRun cr : texts) {
                    if (pictures.hasPicture(cr)) {
                        Picture picture = pictures.getFor(cr);
                        this.handlePictureCharacterRun(cr, picture, pictures, xhtml);
                        continue;
                    }
                    this.handleCharacterRun(cr, skipStyling, xhtml);
                }
            }
        } else {
            for (CharacterRun cr : texts) {
                this.handleCharacterRun(cr, skipStyling, xhtml);
            }
        }
        return i - index;
    }

    private void handlePictureCharacterRun(CharacterRun cr, Picture picture, PicturesSource pictures, XHTMLContentHandler xhtml) throws SAXException, IOException, TikaException {
        if (picture == null) {
            return;
        }
        String extension = picture.suggestFileExtension();
        int pictureNumber = pictures.pictureNumber(picture);
        String filename = "image" + pictureNumber + (extension.length() > 0 ? "." + extension : "");
        String mimeType = picture.getMimeType();
        xhtml.startElement("img", "src", "embedded:" + filename);
        xhtml.endElement("img");
        if (!pictures.hasOutput(picture)) {
            TikaInputStream stream = TikaInputStream.get(picture.getContent());
            this.handleEmbeddedResource(stream, filename, mimeType, xhtml, false);
            pictures.recordOutput(picture);
        }
    }

    private void addTextIfAny(XHTMLContentHandler xhtml, String section, String text) throws SAXException {
        if (text != null && text.length() > 0) {
            xhtml.startElement("div", "class", section);
            xhtml.element("p", text);
            xhtml.endElement("div");
        }
    }

    protected void parseWord6(POIFSFileSystem filesystem, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
        HWPFOldDocument doc = new HWPFOldDocument(filesystem);
        Word6Extractor extractor = new Word6Extractor(doc);
        for (String p : extractor.getParagraphText()) {
            xhtml.element("p", p);
        }
    }

    public static TagAndStyle buildParagraphTagAndStyle(String styleName, boolean isTable) {
        String tag = "p";
        String styleClass = null;
        if (!(styleName.equals("Default") || styleName.equals("Normal") || styleName.equals("Table Contents") && isTable)) {
            if (styleName.equals("heading") || styleName.equals("Heading")) {
                tag = "h1";
            } else if (styleName.startsWith("heading") || styleName.startsWith("Heading")) {
                int num = 1;
                try {
                    num = Integer.parseInt(styleName.substring(styleName.length() - 1));
                }
                catch (NumberFormatException e) {
                    // empty catch block
                }
                tag = "h" + num;
            } else if (styleName.equals("Title")) {
                tag = "h1";
                styleClass = "title";
            } else if (styleName.equals("Subtitle")) {
                tag = "h2";
                styleClass = "subtitle";
            } else if (styleName.equals("HTML Preformatted")) {
                tag = "pre";
            } else {
                styleClass = styleName.replace(' ', '_');
                styleClass = styleClass.substring(0, 1).toLowerCase() + styleClass.substring(1);
            }
        }
        return new TagAndStyle(tag, styleClass);
    }

    private static class PicturesSource {
        private PicturesTable picturesTable;
        private Set<Picture> output = new HashSet<Picture>();
        private Map<Integer, Picture> lookup;
        private List<Picture> nonU1based;
        private List<Picture> all;
        private int pn = 0;

        private PicturesSource(HWPFDocument doc) {
            this.picturesTable = doc.getPicturesTable();
            this.all = this.picturesTable.getAllPictures();
            this.lookup = new HashMap<Integer, Picture>();
            for (Picture p : this.all) {
                String name = p.suggestFullFileName();
                if (name.indexOf(46) > -1) {
                    name = name.substring(0, name.indexOf(46));
                }
                int offset = Integer.parseInt(name, 16);
                this.lookup.put(offset, p);
            }
            this.nonU1based = new ArrayList<Picture>();
            this.nonU1based.addAll(this.all);
            Range r = doc.getRange();
            for (int i = 0; i < r.numCharacterRuns(); ++i) {
                CharacterRun cr = r.getCharacterRun(i);
                if (!this.picturesTable.hasPicture(cr)) continue;
                Picture p = this.getFor(cr);
                int at = this.nonU1based.indexOf(p);
                this.nonU1based.set(at, null);
            }
        }

        private boolean hasPicture(CharacterRun cr) {
            return this.picturesTable.hasPicture(cr);
        }

        private void recordOutput(Picture picture) {
            this.output.add(picture);
        }

        private boolean hasOutput(Picture picture) {
            return this.output.contains(picture);
        }

        private int pictureNumber(Picture picture) {
            return this.all.indexOf(picture) + 1;
        }

        private Picture getFor(CharacterRun cr) {
            return this.lookup.get(cr.getPicOffset());
        }

        private Picture nextUnclaimed() {
            Picture p = null;
            while (this.pn < this.nonU1based.size()) {
                p = this.nonU1based.get(this.pn);
                ++this.pn;
                if (p == null) continue;
                return p;
            }
            return null;
        }
    }

    public static class TagAndStyle {
        private String tag;
        private String styleClass;

        public TagAndStyle(String tag, String styleClass) {
            this.tag = tag;
            this.styleClass = styleClass;
        }

        public String getTag() {
            return this.tag;
        }

        public String getStyleClass() {
            return this.styleClass;
        }

        public boolean isHeading() {
            return this.tag.length() == 2 && this.tag.startsWith("h");
        }
    }
}

