package org.apache.tika.parser.microsoft;

import java.io.FileNotFoundException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import nxt.gg;
import nxt.z70;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFOldDocument;
import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.model.FieldsDocumentPart;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.model.StyleDescription;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Field;
import org.apache.poi.hwpf.usermodel.HeaderStories;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.helpers.AttributesImpl;

/* loaded from: classes.dex */
public class WordExtractor extends AbstractPOIFSExtractor {
    public static final HashMap j;
    public static final TagAndStyle k;
    public boolean g;
    public boolean h;
    public boolean i;

    /* loaded from: classes.dex */
    public static class PicturesSource {
        public final PicturesTable a;
        public final HashSet b;
        public final HashMap c;
        public final ArrayList d;
        public final List e;
        public int f;

        public PicturesSource(HWPFDocument hWPFDocument) {
            this.b = new HashSet();
            this.f = 0;
            PicturesTable picturesTable = hWPFDocument.getPicturesTable();
            this.a = picturesTable;
            List<Picture> allPictures = picturesTable.getAllPictures();
            this.e = allPictures;
            this.c = new HashMap();
            for (Picture picture : allPictures) {
                this.c.put(Integer.valueOf(picture.getStartOffset()), picture);
            }
            ArrayList arrayList = new ArrayList();
            this.d = arrayList;
            arrayList.addAll(this.e);
            Range range = hWPFDocument.getRange();
            for (int i = 0; i < range.numCharacterRuns(); i++) {
                CharacterRun characterRun = range.getCharacterRun(i);
                if (this.a.hasPicture(characterRun)) {
                    this.d.set(this.d.indexOf(g(characterRun)), null);
                }
            }
        }

        public /* synthetic */ PicturesSource(HWPFDocument hWPFDocument, int i) {
            this(hWPFDocument);
        }

        public static int d(PicturesSource picturesSource, Picture picture) {
            return picturesSource.e.indexOf(picture) + 1;
        }

        public static boolean e(PicturesSource picturesSource, Picture picture) {
            return picturesSource.b.contains(picture);
        }

        public static void f(PicturesSource picturesSource, Picture picture) {
            picturesSource.b.add(picture);
        }

        public final Picture g(CharacterRun characterRun) {
            return (Picture) this.c.get(Integer.valueOf(characterRun.getPicOffset()));
        }

        public final boolean h(CharacterRun characterRun) {
            return this.a.hasPicture(characterRun);
        }

        public final Picture i() {
            Picture picture;
            do {
                int i = this.f;
                ArrayList arrayList = this.d;
                if (i >= arrayList.size()) {
                    return null;
                }
                picture = (Picture) arrayList.get(this.f);
                this.f++;
            } while (picture == null);
            return picture;
        }
    }

    /* loaded from: classes.dex */
    public static class TagAndStyle {
        public final String a;
        public final String b;

        public TagAndStyle(String str, String str2) {
            this.a = str;
            this.b = str2;
        }

        public final String a() {
            return this.b;
        }

        public final String b() {
            return this.a;
        }

        public final boolean c() {
            String str = this.a;
            return str.length() == 2 && str.startsWith("h");
        }
    }

    static {
        HashMap hashMap = new HashMap();
        j = hashMap;
        TagAndStyle tagAndStyle = new TagAndStyle("p", null);
        k = tagAndStyle;
        hashMap.put("Default", tagAndStyle);
        hashMap.put("Normal", tagAndStyle);
        hashMap.put("heading", new TagAndStyle("h1", null));
        hashMap.put("Heading", new TagAndStyle("h1", null));
        hashMap.put("Title", new TagAndStyle("h1", "title"));
        hashMap.put("Subtitle", new TagAndStyle("h2", "subtitle"));
        hashMap.put("HTML Preformatted", new TagAndStyle("pre", null));
    }

    public WordExtractor(ParseContext parseContext) {
        super(null, parseContext);
    }

    public static TagAndStyle e(String str, boolean z) {
        String str2;
        String str3;
        TagAndStyle tagAndStyle = (TagAndStyle) j.get(str);
        if (tagAndStyle != null) {
            return tagAndStyle;
        }
        if (str.equals("Table Contents") && z) {
            return k;
        }
        int i = 1;
        if (str.startsWith("heading") || str.startsWith("Heading")) {
            try {
                i = Integer.parseInt(str.substring(str.length() - 1));
            } catch (NumberFormatException unused) {
            }
            str2 = "h" + Math.min(i, 6);
            str3 = null;
        } else {
            String replace = str.replace(' ', '_');
            str3 = replace.substring(0, 1).toLowerCase(Locale.ROOT) + replace.substring(1);
            str2 = "p";
        }
        return new TagAndStyle(str2, str3);
    }

    public static int f(Range... rangeArr) {
        int i = 0;
        for (Range range : rangeArr) {
            if (range != null) {
                i += range.numParagraphs();
            }
        }
        return i;
    }

    public static boolean l(CharacterRun characterRun) {
        return characterRun == null || !characterRun.isMarkedDeleted();
    }

    public static void n(DirectoryNode directoryNode, XHTMLContentHandler xHTMLContentHandler) {
        for (String str : new Word6Extractor(new HWPFOldDocument(directoryNode)).getParagraphText()) {
            xHTMLContentHandler.g("p", str);
        }
    }

    public final void g(CharacterRun characterRun, boolean z, XHTMLContentHandler xHTMLContentHandler) {
        if (!l(characterRun) || characterRun.text().equals("\r")) {
            return;
        }
        if (!z) {
            if (characterRun.isBold() != this.h) {
                if (this.g) {
                    xHTMLContentHandler.h("s");
                    this.g = false;
                }
                if (this.i) {
                    xHTMLContentHandler.h("i");
                    this.i = false;
                }
                if (characterRun.isBold()) {
                    xHTMLContentHandler.l("b");
                } else {
                    xHTMLContentHandler.h("b");
                }
                this.h = characterRun.isBold();
            }
            if (characterRun.isItalic() != this.i) {
                if (this.g) {
                    xHTMLContentHandler.h("s");
                    this.g = false;
                }
                if (characterRun.isItalic()) {
                    xHTMLContentHandler.l("i");
                } else {
                    xHTMLContentHandler.h("i");
                }
                this.i = characterRun.isItalic();
            }
            if (characterRun.isStrikeThrough() != this.g) {
                if (characterRun.isStrikeThrough()) {
                    xHTMLContentHandler.l("s");
                } else {
                    xHTMLContentHandler.h("s");
                }
                this.g = characterRun.isStrikeThrough();
            }
        }
        String replace = characterRun.text().replace('\r', '\n');
        if (replace.endsWith("\u0007")) {
            replace = gg.q(replace, 1, 0);
        }
        xHTMLContentHandler.f(replace.replace((char) 30, (char) 8209).replace((char) 31, (char) 8203).replaceAll("[\u0000-\u001f]", "\n"));
    }

    public final void h(Range[] rangeArr, String str, HWPFDocument hWPFDocument, PicturesSource picturesSource, PicturesTable picturesTable, XHTMLContentHandler xHTMLContentHandler) {
        if (f(rangeArr) > 0) {
            xHTMLContentHandler.m("div", "class", str);
            ListManager listManager = new ListManager(hWPFDocument);
            int length = rangeArr.length;
            int i = 0;
            while (i < length) {
                Range range = rangeArr[i];
                if (range != null) {
                    int i2 = 0;
                    while (i2 < range.numParagraphs()) {
                        i2 = i2 + i(range.getParagraph(i2), 0, range, hWPFDocument, FieldsDocumentPart.HEADER, picturesSource, picturesTable, listManager, xHTMLContentHandler) + 1;
                        range = range;
                        i = i;
                    }
                }
                i++;
            }
            xHTMLContentHandler.h("div");
        }
    }

    /* JADX WARN: Type inference failed for: r0v10 */
    /* JADX WARN: Type inference failed for: r0v7 */
    /* JADX WARN: Type inference failed for: r0v8, types: [boolean, int] */
    public final int i(Paragraph paragraph, int i, Range range, HWPFDocument hWPFDocument, FieldsDocumentPart fieldsDocumentPart, PicturesSource picturesSource, PicturesTable picturesTable, ListManager listManager, XHTMLContentHandler xHTMLContentHandler) {
        TagAndStyle tagAndStyle;
        ?? r0;
        int i2;
        Field fieldByStartOffset;
        if (paragraph.isInTable() && paragraph.getTableLevel() > i && i == 0) {
            Table table = range.getTable(paragraph);
            String str = "table";
            xHTMLContentHandler.l("table");
            String str2 = "tbody";
            xHTMLContentHandler.l("tbody");
            int i3 = 0;
            while (i3 < table.numRows()) {
                TableRow row = table.getRow(i3);
                String str3 = "tr";
                xHTMLContentHandler.l("tr");
                int i4 = 0;
                while (i4 < row.numCells()) {
                    TableCell cell = row.getCell(i4);
                    String str4 = "td";
                    xHTMLContentHandler.l("td");
                    int i5 = 0;
                    while (i5 < cell.numParagraphs()) {
                        TableCell tableCell = cell;
                        i(cell.getParagraph(i5), paragraph.getTableLevel(), tableCell, hWPFDocument, fieldsDocumentPart, picturesSource, picturesTable, listManager, xHTMLContentHandler);
                        i5++;
                        cell = tableCell;
                        str2 = str2;
                        str = str;
                        str4 = str4;
                        str3 = str3;
                        i4 = i4;
                        row = row;
                        i3 = i3;
                        table = table;
                    }
                    xHTMLContentHandler.h(str4);
                    i4++;
                }
                xHTMLContentHandler.h(str3);
                i3++;
            }
            xHTMLContentHandler.h(str2);
            xHTMLContentHandler.h(str);
            return table.numParagraphs() - 1;
        }
        if (paragraph.text().replaceAll("[\\r\\n\\s]+", "").isEmpty()) {
            return 0;
        }
        if (hWPFDocument.getStyleSheet().numStyles() > paragraph.getStyleIndex()) {
            StyleDescription styleDescription = hWPFDocument.getStyleSheet().getStyleDescription(paragraph.getStyleIndex());
            if (styleDescription == null || styleDescription.getName() == null || styleDescription.getName().length() <= 0) {
                tagAndStyle = new TagAndStyle("p", null);
            } else {
                r4 = paragraph.isInList() ? listManager.c(paragraph) : null;
                tagAndStyle = e(styleDescription.getName(), i > 0);
            }
        } else {
            tagAndStyle = new TagAndStyle("p", null);
        }
        TagAndStyle tagAndStyle2 = tagAndStyle;
        if (tagAndStyle2.a() != null) {
            xHTMLContentHandler.m(tagAndStyle2.b(), "class", tagAndStyle2.a());
        } else {
            xHTMLContentHandler.l(tagAndStyle2.b());
        }
        if (r4 != null) {
            xHTMLContentHandler.f(r4);
        }
        int i6 = 0;
        while (i6 < paragraph.numCharacterRuns()) {
            CharacterRun characterRun = paragraph.getCharacterRun(i6);
            if (characterRun.text().getBytes(StandardCharsets.UTF_8)[0] == 19 && (fieldByStartOffset = hWPFDocument.getFields().getFieldByStartOffset(fieldsDocumentPart, characterRun.getStartOffset())) != null && (fieldByStartOffset.getType() == 58 || fieldByStartOffset.getType() == 56)) {
                CharacterRun markSeparatorCharacterRun = fieldByStartOffset.getMarkSeparatorCharacterRun(range);
                String str5 = markSeparatorCharacterRun != null ? "_" + markSeparatorCharacterRun.getPicOffset() : "_unknown_id";
                AttributesImpl attributesImpl = new AttributesImpl();
                attributesImpl.addAttribute("", "class", "class", "CDATA", "embedded");
                attributesImpl.addAttribute("", "id", "id", "CDATA", str5);
                xHTMLContentHandler.n("div", attributesImpl);
                xHTMLContentHandler.h("div");
            }
            if (characterRun.text().equals("\u0013")) {
                i6 += k(paragraph, i6, tagAndStyle2.c(), picturesSource, xHTMLContentHandler);
                i2 = 1;
            } else {
                if (characterRun.text().startsWith("\b")) {
                    for (int i7 = 0; i7 < characterRun.text().length(); i7++) {
                        j(characterRun, picturesSource.i(), picturesSource, xHTMLContentHandler);
                    }
                } else if (picturesTable.hasPicture(characterRun)) {
                    j(characterRun, picturesSource.g(characterRun), picturesSource, xHTMLContentHandler);
                } else {
                    g(characterRun, tagAndStyle2.c(), xHTMLContentHandler);
                }
                i2 = 1;
            }
            i6 += i2;
        }
        if (this.g) {
            xHTMLContentHandler.h("s");
            r0 = 0;
            this.g = false;
        } else {
            r0 = 0;
        }
        if (this.i) {
            xHTMLContentHandler.h("i");
            this.i = r0;
        }
        if (this.h) {
            xHTMLContentHandler.h("b");
            this.h = r0;
        }
        xHTMLContentHandler.h(tagAndStyle2.b());
        return r0;
    }

    public final void j(CharacterRun characterRun, Picture picture, PicturesSource picturesSource, XHTMLContentHandler xHTMLContentHandler) {
        if (!l(characterRun) || picture == null) {
            return;
        }
        String suggestFileExtension = picture.suggestFileExtension();
        int d = PicturesSource.d(picturesSource, picture);
        StringBuilder sb = new StringBuilder("image");
        sb.append(d);
        sb.append(suggestFileExtension.length() > 0 ? ".".concat(suggestFileExtension) : "");
        String sb2 = sb.toString();
        String mimeType = picture.getMimeType();
        AttributesImpl attributesImpl = new AttributesImpl();
        attributesImpl.addAttribute("", "src", "src", "CDATA", z70.u("embedded:", sb2));
        attributesImpl.addAttribute("", "alt", "alt", "CDATA", sb2);
        xHTMLContentHandler.n("img", attributesImpl);
        xHTMLContentHandler.h("img");
        if (PicturesSource.e(picturesSource, picture)) {
            return;
        }
        c(TikaInputStream.o(picture.getContent()), sb2, null, mimeType, xHTMLContentHandler, false);
        PicturesSource.f(picturesSource, picture);
    }

    public final int k(Paragraph paragraph, int i, boolean z, PicturesSource picturesSource, XHTMLContentHandler xHTMLContentHandler) {
        ArrayList arrayList = new ArrayList();
        ArrayList<CharacterRun> arrayList2 = new ArrayList();
        int i2 = i + 1;
        boolean z2 = false;
        while (true) {
            if (i2 >= paragraph.numCharacterRuns()) {
                break;
            }
            CharacterRun characterRun = paragraph.getCharacterRun(i2);
            if (characterRun.text().equals("\u0013")) {
                i2 += k(paragraph, i2 + 1, z, picturesSource, xHTMLContentHandler);
            } else if (characterRun.text().equals("\u0014")) {
                z2 = true;
            } else if (characterRun.text().equals("\u0015")) {
                if (!z2) {
                    arrayList2 = arrayList;
                    arrayList = new ArrayList();
                }
            } else if (z2) {
                arrayList2.add(characterRun);
            } else {
                arrayList.add(characterRun);
            }
            i2++;
        }
        if (arrayList.size() > 0) {
            String text = ((CharacterRun) arrayList.get(0)).text();
            for (int i3 = 1; i3 < arrayList.size(); i3++) {
                StringBuilder y = z70.y(text);
                y.append(((CharacterRun) arrayList.get(i3)).text());
                text = y.toString();
            }
            if ((text.startsWith("HYPERLINK") || text.startsWith(" HYPERLINK")) && text.indexOf(34) > -1) {
                int indexOf = text.indexOf(34) + 1;
                int lastIndexOf = text.lastIndexOf(34);
                if (lastIndexOf <= indexOf && (lastIndexOf = text.lastIndexOf(8221)) <= indexOf && (lastIndexOf = text.lastIndexOf(13)) <= indexOf) {
                    lastIndexOf = text.length();
                }
                xHTMLContentHandler.m("a", "href", (indexOf < 0 || indexOf >= lastIndexOf || lastIndexOf > text.length()) ? "" : text.substring(indexOf, lastIndexOf));
                Iterator it = arrayList2.iterator();
                while (it.hasNext()) {
                    g((CharacterRun) it.next(), z, xHTMLContentHandler);
                }
                xHTMLContentHandler.h("a");
            } else {
                for (CharacterRun characterRun2 : arrayList2) {
                    if (picturesSource.h(characterRun2)) {
                        j(characterRun2, picturesSource.g(characterRun2), picturesSource, xHTMLContentHandler);
                    } else {
                        g(characterRun2, z, xHTMLContentHandler);
                    }
                }
            }
        } else {
            Iterator it2 = arrayList2.iterator();
            while (it2.hasNext()) {
                g((CharacterRun) it2.next(), z, xHTMLContentHandler);
            }
        }
        return i2 - i;
    }

    public final void m(DirectoryNode directoryNode, XHTMLContentHandler xHTMLContentHandler) {
        int i = 0;
        try {
            HWPFDocument hWPFDocument = new HWPFDocument(directoryNode);
            org.apache.poi.hwpf.extractor.WordExtractor wordExtractor = new org.apache.poi.hwpf.extractor.WordExtractor(hWPFDocument);
            HeaderStories headerStories = new HeaderStories(hWPFDocument);
            PicturesTable picturesTable = hWPFDocument.getPicturesTable();
            PicturesSource picturesSource = new PicturesSource(hWPFDocument, i);
            h(new Range[]{headerStories.getFirstHeaderSubrange(), headerStories.getEvenHeaderSubrange(), headerStories.getOddHeaderSubrange()}, "header", hWPFDocument, picturesSource, picturesTable, xHTMLContentHandler);
            Range range = hWPFDocument.getRange();
            ListManager listManager = new ListManager(hWPFDocument);
            int i2 = 0;
            while (i2 < range.numParagraphs()) {
                ListManager listManager2 = listManager;
                i2 = i2 + i(range.getParagraph(i2), 0, range, hWPFDocument, FieldsDocumentPart.MAIN, picturesSource, picturesTable, listManager2, xHTMLContentHandler) + 1;
                range = range;
                listManager = listManager2;
                headerStories = headerStories;
                wordExtractor = wordExtractor;
                hWPFDocument = hWPFDocument;
            }
            HeaderStories headerStories2 = headerStories;
            org.apache.poi.hwpf.extractor.WordExtractor wordExtractor2 = wordExtractor;
            HWPFDocument hWPFDocument2 = hWPFDocument;
            for (String str : wordExtractor2.getMainTextboxText()) {
                xHTMLContentHandler.g("p", str);
            }
            for (String str2 : wordExtractor2.getFootnoteText()) {
                xHTMLContentHandler.g("p", str2);
            }
            for (String str3 : wordExtractor2.getCommentsText()) {
                xHTMLContentHandler.g("p", str3);
            }
            for (String str4 : wordExtractor2.getEndnoteText()) {
                xHTMLContentHandler.g("p", str4);
            }
            h(new Range[]{headerStories2.getFirstFooterSubrange(), headerStories2.getEvenFooterSubrange(), headerStories2.getOddFooterSubrange()}, "footer", hWPFDocument2, picturesSource, picturesTable, xHTMLContentHandler);
            for (Picture i3 = picturesSource.i(); i3 != null; i3 = picturesSource.i()) {
                j(null, i3, picturesSource, xHTMLContentHandler);
            }
            try {
                for (Entry entry : directoryNode.getEntry("ObjectPool")) {
                    if (entry.getName().startsWith("_") && (entry instanceof DirectoryEntry)) {
                        b((DirectoryEntry) entry, xHTMLContentHandler);
                    }
                }
            } catch (FileNotFoundException unused) {
            }
        } catch (OldWordFileFormatException unused2) {
            n(directoryNode, xHTMLContentHandler);
        }
    }
}
