package org.docx4j.fonts;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.docx4j.XmlUtils;
import org.docx4j.dml.TextFont;
import org.docx4j.jaxb.Context;
import org.docx4j.model.PropertyResolver;
import org.docx4j.model.properties.Property;
import org.docx4j.model.styles.StyleUtil;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.ThemePart;
import org.docx4j.wml.CTLanguage;
import org.docx4j.wml.PPr;
import org.docx4j.wml.RFonts;
import org.docx4j.wml.RPr;
import org.docx4j.wml.STHint;
import org.docx4j.wml.Style;
import org.docx4j.wml.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;

/**
 * Apply the appropriate font to the characters in the run,
 * following the rules specified in
 * http://webapp.docx4java.org/OnlineDemo/ecma376/WordML/rFonts.html
 * and http://msdn.microsoft.com/en-us/library/ff533743.aspx
 * ([MS-OI29500] 2.1.87)
 * 
 * See also http://blogs.msdn.com/b/officeinteroperability/archive/2013/04/22/office-open-xml-themes-schemes-and-fonts.aspx
 * 
 * The ASCII font formats all characters in the ASCII range (character values 0\u201a\u00c4\u00ec127). 
 * This font is specified using the ascii attribute on the rFonts element.
 * 
 * The East Asian font formats all characters that belong to Unicode sub ranges for East Asian languages. 
 * This font is specified using the eastAsia attribute on the rFonts element.
 * 
 * The complex script font formats all characters that belong to Unicode sub ranges for complex script languages. 
 * This font is specified using the cs attribute on the rFonts element.
 * 
 * The high ANSI font formats all characters that belong to Unicode sub ranges other than those explicitly included 
 * by one of the groups above. This font is specified using the hAnsi attribute on the rFonts element.	
 * 
 * Per Tristan Davis
 * http://openxmldeveloper.org/discussions/formats/f/13/t/150.aspx
 * 
 * First, the characters are classified into the high ansi / east asian / complex script buckets [per above]
 * 
 * Next, we grab *one* theme font from the theme for each bucket - in the settings part, there's an element called themeFontLang
 * The three attributes on that specify the language to use for the characters in each bucket
 * 
 * Then you take the language specified for each attribute and look out for the right language in the theme - and you use that font
 * 
 * See also http://blogs.msdn.com/b/officeinteroperability/archive/2013/04/22/office-open-xml-themes-schemes-and-fonts.aspx
 * regarding what to do if the font is not available on the computer.
 * 
 * @author jharrop
 *
 */
public class RunFontSelector {
	
	protected static Logger log = LoggerFactory.getLogger(RunFontSelector.class);	

	private WordprocessingMLPackage wordMLPackage;
	private RunFontCharacterVisitor vis;
		
	private RunFontActionType outputType;
	public enum RunFontActionType {
		XSL_FO,
		XHTML,
		DISCOVERY
	}
	
	public RunFontSelector(WordprocessingMLPackage wordMLPackage, RunFontCharacterVisitor visitor, 
			RunFontActionType outputType) {
		
		this.wordMLPackage = wordMLPackage;
		this.vis = visitor;
		this.outputType = outputType;
				
		vis.setRunFontSelector(this);
		
		fallbackFont = getPhysicalFont(getDefaultFont());
		if (fallbackFont==null) {
			fallbackFont = getDefaultFont();
			if (outputType!= RunFontActionType.DISCOVERY) {
				log.warn(getDefaultFont() + " is not mapped!");
			}
		} 
		
		vis.setFallbackFont(fallbackFont);
		
		if (wordMLPackage.getMainDocumentPart().getDocumentSettingsPart()!=null) {
			themeFontLang = wordMLPackage.getMainDocumentPart().getDocumentSettingsPart().getContents().getThemeFontLang();
		}
		
	}
	
	String fallbackFont = null;
	
	CTLanguage themeFontLang = null;
	
	public final static String CSS_NAME = "font-family"; 
	public final static String FO_NAME  = "font-family"; 

	public String getCssName() {
		return CSS_NAME;
	}
	
	
	private ThemePart getThemePart() {
		return wordMLPackage.getMainDocumentPart().getThemePart();
	}
	
	private Style defaultParagraphStyle;
	
    private Style getDefaultPStyle() {
    	
    	if (defaultParagraphStyle==null) {
			defaultParagraphStyle = 
					(wordMLPackage.getMainDocumentPart().getStyleDefinitionsPart(false) != null ?
							wordMLPackage.getMainDocumentPart().getStyleDefinitionsPart(false).getDefaultParagraphStyle() :
					null);
    	}
		return defaultParagraphStyle;
    }
    
    
    private String defaultFont = null;
	public String getDefaultFont() {
		
		if (defaultFont == null) {
			
	    	PropertyResolver propertyResolver = wordMLPackage.getMainDocumentPart().getPropertyResolver();
			
			org.docx4j.wml.RFonts rFonts = propertyResolver.getDocumentDefaultRPr().getRFonts();
		
			if (rFonts==null) {
				log.info("No styles/docDefaults/rPrDefault/rPr/rFonts - default to Times New Roman");
				// Yes, Times New Roman is still buried in Word 2007
				defaultFont = "Times New Roman"; 						
			} else {						
				// Usual case
				if (rFonts.getAsciiTheme()==null ) {
					
					if (rFonts.getAscii()==null ) {
						// TODO
						log.error("Neither ascii or asciTheme.  What to do? ");
						defaultFont = "Times New Roman"; 						
						
					} else {
						log.info("rPrDefault/rFonts referenced " + rFonts.getAscii());								
						defaultFont = rFonts.getAscii(); 							
					}	
					
				} else {
					if (getThemePart()==null) {
						// No theme part - default to Calibri
						log.info("No theme part - default to Calibri");
						defaultFont= "Calibri"; 
					} else {
						String font = getThemePart().getFont(rFonts.getAsciiTheme(), themeFontLang);
						if (font!=null) {
							defaultFont= font; 
						} else {
								// No minorFont/latin in theme part - default to Calibri
								log.info("No minorFont/latin in theme part - default to Calibri");								
								defaultFont= "Calibri"; 
						}
					}
				}  				
			} 
		}
//		System.out.println("!" + defaultFont);
		return defaultFont;
	}
	
	
    private DocumentFragment nullRPr(Document document, String text) {
    	
		if (outputType== RunFontActionType.DISCOVERY) {
			vis.fontAction(getDefaultFont());
			return null;
		} 

		// TODO: At present, we set a font on each and every span; 
		// if we set a default on eg body, this wouldn't be necessary.
		// Similarly for the FO case.
		Element	span = createElement(document);
		if (span!=null) {
			document.appendChild(span);  
			this.setAttribute(span, getDefaultFont());
			span.setTextContent(text);  
		}
		
		return result(document);
    }
    
    private DocumentFragment result(Document document) {
    	
		if (outputType== RunFontActionType.DISCOVERY) {
			/* Avoid
			 * 
				Exception in thread "main" java.lang.NullPointerException
					at com.sun.org.apache.xerces.internal.dom.ParentNode.internalInsertBefore(Unknown Source)
					at com.sun.org.apache.xerces.internal.dom.ParentNode.insertBefore(Unknown Source)
					at com.sun.org.apache.xerces.internal.dom.NodeImpl.appendChild(Unknown Source)
					at org.docx4j.fonts.RunFontSelector.result(RunFontSelector.java:202)
					at org.docx4j.fonts.RunFontSelector.fontSelector(RunFontSelector.java:366)
			 */
			return null;
		}
		DocumentFragment docfrag = document.createDocumentFragment();
		docfrag.appendChild(document.getDocumentElement());
		return docfrag;
    }
    

    public Element createElement(Document document) {
    	Element el=null;
		if (outputType== RunFontActionType.DISCOVERY) {
			return null;
		} else if (outputType==RunFontActionType.XHTML) {
    		 el = document.createElement("span");
    	} else if (outputType==RunFontActionType.XSL_FO) {
    		el = document.createElementNS("http://www.w3.org/1999/XSL/Format", "fo:inline");
    	} 
		/* Can't do document.appendChild(el) here, since its a problem if called multiple times!
		 * 
			org.w3c.dom.DOMException: HIERARCHY_REQUEST_ERR: An attempt was made to insert a node where it is not permitted. 
				at com.sun.org.apache.xerces.internal.dom.CoreDocumentImpl.insertBefore(Unknown Source)
				at com.sun.org.apache.xerces.internal.dom.NodeImpl.appendChild(Unknown Source)
				at org.docx4j.fonts.RunFontSelector.createElement(RunFontSelector.java:205)
				at org.docx4j.convert.out.fo.FOConversionContext$3.createNew(FOConversionContext.java:139)
				at org.docx4j.fonts.RunFontSelector.unicodeRangeToFont(RunFontSelector.java:462)
				at org.docx4j.fonts.RunFontSelector.fontSelector(RunFontSelector.java:428)
				at org.docx4j.convert.out.common.XsltCommonFunctions.fontSelector(XsltCommonFunctions.java:117)
			 */
    	return el;
    }
    
    public void setAttribute(Element el, String fontName) {
    	
    	// could a document fragment contain just a #text node?
    	
		if (outputType== RunFontActionType.DISCOVERY) {
			return;
		} else if (outputType==RunFontActionType.XHTML) {
    		if (spacePreserve) {
    	    	/*
    	    	 * 	Convert @xml:space='preserve' to style="white-space:pre-wrap;"
    				which is good for FF3, and WebKit; not honoured by IE7 though. 
    	    	 */
    			el.setAttribute("style", getCssProperty(fontName) + "white-space:pre-wrap;");
    			
    		} else {
    			el.setAttribute("style", getCssProperty(fontName));
    		}
    	} else if (outputType==RunFontActionType.XSL_FO) {
    		String val = getPhysicalFont(fontName);
    		if (val==null) {
    			// Avoid @font-family="", which FOP doesn't like
    			el.setAttribute("font-family", fallbackFont );
    		} else {	
    			el.setAttribute("font-family", getPhysicalFont(fontName) );
    		}
    	} 
    }
    
    private boolean spacePreserve;
    
    
    /**
     * Apply font selection algorithm to this Text, based on supplied PPr, RPr
     * (and docDefaults, Theme part etc).
     * 
     * @param pPr
     * @param rPr
     * @param wmlText
     * @return
     */
    public Object fontSelector(PPr pPr, RPr rPr, Text wmlText) {
    	
    	String text = wmlText.getValue();
    	log.debug(text);
    	spacePreserve = (wmlText.getSpace()!=null) && (wmlText.getSpace().equals("preserve"));
    	
    	PropertyResolver propertyResolver = wordMLPackage.getMainDocumentPart().getPropertyResolver();
    	
//    	Style pStyle = null;
    	String pStyleId = null;
    	RPr pRPr = null;
    	if (pPr==null || pPr.getPStyle()==null) {
//    		pStyle = getDefaultPStyle(); 
    		if (getDefaultPStyle() == null) {
    			log.warn("getDefaultPStyle() returned null");
    		} else {
	        	log.debug("using default p style");
//	        	pRPr = pStyle.getRPr();  // TODO pStyle.getRPr() should inherit from basedOn
	        	pStyleId = getDefaultPStyle().getStyleId();
    		}
    	} else {
    		pStyleId = pPr.getPStyle().getVal();
    	}
    		
    	if (pStyleId!=null && wordMLPackage.getMainDocumentPart().getStyleDefinitionsPart(false) != null) {
    		// apply the rPr in the stack of styles, including documentDefaultRPr
    		log.debug(pStyleId);
    		pRPr = propertyResolver.getEffectiveRPr(pStyleId);
        	log.debug("before getEffectiveRPrUsingPStyleRPr\n" + XmlUtils.marshaltoString(pRPr));
    	}

    	// Do we need boolean major??
    	// Can work that out from pStyle

    	
    	// now apply the direct rPr
    	rPr = propertyResolver.getEffectiveRPrUsingPStyleRPr(rPr, pRPr); 
    	// TODO use effective rPr, but don't inherit theme val,
    	// TODO, add cache?
    	
    	log.debug("effective\n" + XmlUtils.marshaltoString(rPr));
    	
    	/* eg
    	 * 
				<w:r>
				  <w:rPr>
				    <w:rFonts w:ascii="Courier New" w:cs="Times New Roman" />
				  </w:rPr>
				  <w:t>English \u00ff\u00df\u0178\u00d1\u00ff\u03c0\u00ff\u00b1\u00ff\u00ae\u0178\u00e4\u00ff\u00a9</w:t>
				</w:r>
				
    	 */

		Document document = getDocument();
		
		// No rPr .. only happens if no documentDefaultRPr
		if (rPr==null) {
			
			log.warn("effective rPr is null");
			return nullRPr(document, text);
		}
		
//		System.out.println(XmlUtils.marshaltoString(rPr, true, true));
		
		
		RFonts rFonts = rPr.getRFonts();
		if (rFonts==null) {
			return nullRPr(document, text);
		}		
    	
    	/* If the run has the cs element ("[ISO/IEC-29500-1] \u00ac\u00df17.3.2.7; cs") 
    	 * or the rtl element ("[ISO/IEC-29500-1] \u00ac\u00df17.3.2.30; rtl"), 
    	 * then the cs (or cstheme if defined) font is used, 
    	 * regardless of the Unicode character values of the run's content.
    	 */
    	if (rPr.getCs()!=null || rPr.getRtl()!=null ) {
    		
    		// use the cs (or cstheme if defined) font is used
    		if (rFonts.getCstheme()!=null) {
    			
    			String fontName = null; 
    			if (getThemePart()!=null) {
    				
    				fontName = getThemePart().getFont(rFonts.getCstheme(), themeFontLang);
    			}
    			if (fontName==null
//    					|| fontName.trim().length()==0
    					) {
    				fontName = rFonts.getCs();
    			} 
    			if (fontName==null
//    					|| fontName.trim().length()==0
    					) {
    				// then what?
    				log.warn("font name is null, for " + text);
    				log.warn(XmlUtils.marshaltoString(rPr, true, true));
    				(new Throwable()).printStackTrace();
    			}    		
    			
    			Element	span = createElement(document);
    			if (span!=null) {
    				// It will be null in MainDocumentPart$FontAndStyleFinder case
	    			document.appendChild(span); 
	    			this.setAttribute(span, fontName);
	    			span.setTextContent(text);  
    			}
    			if (outputType== RunFontActionType.DISCOVERY) {
    				vis.fontAction(fontName);
    			}
    			
    			return result(document);
    			
    		} else if (rFonts.getCs()!=null) {

    			String fontName =rFonts.getCs();
    			Element	span = createElement(document);
    			if (span!=null) {
    				// It will be null in MainDocumentPart$FontAndStyleFinder case
	    			document.appendChild(span);     			
	    			this.setAttribute(span, fontName);
	    			span.setTextContent(text);
    			}
    			
    			if (outputType== RunFontActionType.DISCOVERY) {
    				vis.fontAction(fontName);
    			}
    			
    			return result(document);
    			
    		} else {
    			// No CS value.
    			// What to do?
    		}
    	}

		String eastAsia = null;
		String ascii = null;
		String hAnsi = null;
		
		STHint hint = rFonts.getHint(); 
		
		if (rFonts.getEastAsiaTheme()!=null
				&& getThemePart()!=null) {
			eastAsia = getThemePart().getFont(rFonts.getEastAsiaTheme(), themeFontLang);
			
			// ??
			//if (getPhysicalFont(eastAsia)==null) {
			//	log.info("theme font for lang " + themeFontLang + " is " + eastAsia + ", but we don't have that");
	    	//	eastAsia = rFonts.getEastAsia();
			//}
			
			if (eastAsia==null) {
				log.info("theme font for lang " + themeFontLang + " is " + eastAsia + ", but we don't have that");
	    		eastAsia = rFonts.getEastAsia();
			}
			
		} else {
			// No theme, so 
    		eastAsia = rFonts.getEastAsia();
		}
		
		if (rFonts.getAsciiTheme()!=null
				&& getThemePart()!=null) {
			ascii = getThemePart().getFont(rFonts.getAsciiTheme(), themeFontLang);
		} else {
			// No theme, so 
			ascii = rFonts.getAscii();
		}
		
		if (rFonts.getHAnsiTheme()!=null
				&& getThemePart()!=null) {
			hAnsi = getThemePart().getFont(rFonts.getHAnsiTheme(), themeFontLang);
		} else {
			// No theme, so 
			hAnsi = rFonts.getHAnsi();
		}
		
    	/*
    	 * If the eastAsia (or eastAsiaTheme if defined) attribute\u201a\u00c4\u00f4s value is \u201a\u00c4\u00faTimes New Roman\u201a\u00c4\u00f9
    	 * and the ascii (or asciiTheme if defined) and hAnsi (or hAnsiTheme if defined) attributes are equal, 
    	 * then the ascii (or asciiTheme if defined) font is used.
    	 */
		if (("Times New Roman").equals(eastAsia)) {
		
    		if (ascii!=null
    				&& ascii.equals(hAnsi)) {
    			// use ascii
    			
    			Element	span = createElement(document);
    			if (span!=null) {
    				// It will be null in MainDocumentPart$FontAndStyleFinder case    			
    				document.appendChild(span); 
    			}
    			
    			if (outputType== RunFontActionType.DISCOVERY) {
    				vis.fontAction(ascii);
        			return null; 
    			}
    			this.setAttribute(span, ascii);
    			span.setTextContent(text);    	
    			
    			
    			return result(document);
    			
    		}
		}
		
		if (ascii==null) {
			log.warn("No value for ascii, using default font");
			ascii = this.getDefaultFont();
		}
    		    	
    	/* Otherwise, the following table is used. For all ranges not listed in the following table, 
    	 * the hAnsi (or hAnsiTheme if defined) font shall be used.
    	 */
		if (hAnsi==null) {
			log.warn("No value for hAnsi, using default font");
			hAnsi = this.getDefaultFont();				
		}
		
		String langEastAsia = null;
		if (rPr.getLang()!=null) {
			langEastAsia = rPr.getLang().getEastAsia();
		}
		
		vis.setDocument(document);
		return unicodeRangeToFont(text,  hint,  langEastAsia,
	    		 eastAsia,  ascii,  hAnsi );
    }
    
    private boolean contains(String langEastAsia, String lang) {
    	
    	// eg <w:lang w:eastAsia="zh-CN" .. />
    	if (langEastAsia==null) return false;
    	
    	return langEastAsia.contains(lang);
    }
        	
    private Object unicodeRangeToFont(String text, STHint hint, String langEastAsia,
    		String eastAsia, String ascii, String hAnsi) {
    	
    	
    	// See http://stackoverflow.com/questions/196830/what-is-the-easiest-best-most-correct-way-to-iterate-through-the-characters-of-a
    	// and http://stackoverflow.com/questions/8894258/fastest-way-to-iterate-over-all-the-chars-in-a-string
    	
    	// The ranges specified at http://msdn.microsoft.com/en-us/library/ff533743.aspx
    	// are from 0000-FFFF, so here we'll assume there are no characters outside 
    	// Unicode Basic Multilingual Plane...
    	
    	char currentRangeLower='\u0000';
    	char currentRangeUpper='\u0000';
    	    	
    	for (int i = 0; i < text.length(); i++){
    		
    	    char c = text.charAt(i);        
    	    if (vis.isReusable() && 
    	    		(c==' ' ||
    	    		(c>=currentRangeLower && c<=currentRangeUpper))) {
    	    	// Add it to existing
    	    	vis.addCharacterToCurrent(c);
    	    } else {
    	    	
    	    	// Populate previous span
    	    	vis.finishPrevious();
    	    	
    	    	// Create new span
    		    vis.createNew();
    		    vis.setMustCreateNewFlag(false);
    		    
//    		    System.out.println(c);    		    
    		    
    		    // .. Basic Latin
        	    if (c>='\u0000' && c<='\u007F') 
        	    {
        	    	vis.fontAction(ascii); 
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\u0000';
        	    	currentRangeUpper = '\u007F';
        	    } else 
    		    // ..  Latin-1 Supplement
        	    if (c>='\u00A0' && c<='\u00FF') 
        	    {
        	    	/* hAnsi (or hAnsiTheme if defined), with the following exceptions:
    					If hint is eastAsia, the following characters use eastAsia (or eastAsiaTheme if defined): A1, A4, A7 \u201a\u00c4\u00ec A8, AA, AD, AF, B0 \u201a\u00c4\u00ec B4, B6 \u201a\u00c4\u00ec BA, BC \u201a\u00c4\u00ec BF, D7, F7
    					If hint is eastAsia and the language of the run is either Chinese Traditional or Chinese Simplified, the following characters use eastAsia (or eastAsiaTheme if defined): E0 \u201a\u00c4\u00ec E1, E8 \u201a\u00c4\u00ec EA, EC \u201a\u00c4\u00ec ED, F2 \u201a\u00c4\u00ec F3, F9 \u201a\u00c4\u00ec FA, FC
    					*/
        	    	if (hint == STHint.EAST_ASIA) {
        	    		if (contains(langEastAsia, "zh") ) {
        	    			// the following characters use eastAsia (or eastAsiaTheme if defined): E0 \u201a\u00c4\u00ec E1, E8 \u201a\u00c4\u00ec EA, EC \u201a\u00c4\u00ec ED, F2 \u201a\u00c4\u00ec F3, F9 \u201a\u00c4\u00ec FA, FC
        	    			if ( (c>='\u00E0' && c<='\u00E1')         	    					
        	    					|| (c>='\u00E8' && c<='\u00EA')         	    					
        	    					|| (c>='\u00EC' && c<='\u00ED')         	    					
        	    					|| (c>='\u00F2' && c<='\u00F3')         	    					
        	    					|| (c>='\u00F9' && c<='\u00FA') 
        	    					|| c=='\u00FC') {
        	    				vis.fontAction(eastAsia);
        	    			    vis.setMustCreateNewFlag(true);
        	    			} else {
        	    				vis.fontAction(hAnsi);
        	    			}
        	    			
        	    		} else // A1, A4, A7 \u201a\u00c4\u00ec A8, AA, AD, AF, B0 \u201a\u00c4\u00ec B4, B6 \u201a\u00c4\u00ec BA, BC \u201a\u00c4\u00ec BF, D7, F7
        	    			if ( c=='\u00A1' || c=='\u00A4' 
    	    					|| (c>='\u00A7' && c<='\u00A8')         	    					
    	    					|| c=='\u00AA' || c=='\u00AD' || c=='\u00AF'          	    					
    	    					|| (c>='\u00B0' && c<='\u00B4')         	    					
    	    					|| (c>='\u00B6' && c<='\u00BA') 
    	    					|| (c>='\u00BC' && c<='\u00BF') 
    	    					|| c=='\u00D7' || c=='\u00F7' ) {
        	    				
        	    				vis.fontAction(eastAsia);
        	    			    vis.setMustCreateNewFlag(true);
        	    			}  else {
        	    				vis.fontAction(hAnsi);
        	    			}
        	    	} else {
        				vis.fontAction(hAnsi);
        	    	}
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\u0000';
        	    	currentRangeUpper = '\u007F';
        	    } else 
    		    // ..  Latin Extended-A, Latin Extended-B, IPA Extensions
        	    if (c>='\u0100' && c<='\u02AF') 
        	    {
        	    	/* hAnsi (or hAnsiTheme if defined), with the following exception:
    					If hint is eastAsia, and the language of the run is either Chinese Traditional or Chinese Simplified, 
    					or the character set of the eastAsia (or eastAsiaTheme if defined) font is Chinese5 or GB2312 
    					then eastAsia (or eastAsiaTheme if defined) font is used.
    					*/
        	    	if (hint == STHint.EAST_ASIA) {
        	    		if (contains(langEastAsia, "zh") ) {
    	    				vis.fontAction(eastAsia);
    	    			    vis.setMustCreateNewFlag(true);
        	    			
        	    		// else TODO: "or the character set of the eastAsia (or eastAsiaTheme if defined) font is Chinese5 or GB2312" 
        	    		// fetch the character set!?
        	    			
        	    		} else {
    	    				vis.fontAction(hAnsi);
    	    			    vis.setMustCreateNewFlag(true);
    	    			} 
        	    	} else {
        	    		// Usual case
        				vis.fontAction(hAnsi);
        	    	}
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\u0100';
        	    	currentRangeUpper = '\u02AF';
        	    } else 
        	    if (c>='\u02B0' && c<='\u04FF') 
        	    {
        	    	if (hint == STHint.EAST_ASIA) {
        				vis.fontAction(eastAsia);
        	    	} else {
        	    		// Usual case
        	    		vis.fontAction(hAnsi); // checked with russian/cyrillic
        	    	}
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\u02B0';
        	    	currentRangeUpper = '\u04FF';
        	    }
        	    else if (c>='\u0590' && c<='\u07BF') 
        	    {
    				vis.fontAction(ascii); 
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\u0590';
        	    	currentRangeUpper = '\u07BF';
        	    }
        	    else if (c>='\u1100' && c<='\u11FF') 
        	    {
    				vis.fontAction(eastAsia); 
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\u1100';
        	    	currentRangeUpper = '\u11FF';
        	    } else if (c>='\u1E00' && c<='\u1EFF') 
        	    {
        	    	if (hint == STHint.EAST_ASIA) {
        	    		if (contains(langEastAsia, "zh") ) {
    	    				vis.fontAction(eastAsia);	
        	    		} else {
    	    				vis.fontAction(hAnsi);
    	    			} 
        	    	} else {
        	    		// Usual case
        				vis.fontAction(hAnsi);
        	    	}
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\u1E00';
        	    	currentRangeUpper = '\u1EFF';
        	    }
        	    else if (c>='\u2000' && c<='\u2EFF') 
        	    {
        	    	if (hint == STHint.EAST_ASIA) {
        				vis.fontAction(eastAsia); 
        	    	} else {
        	    		// Usual case
        				// TODO .. do what???      	    			    	    		
        	    	}
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\u2000';
        	    	currentRangeUpper = '\u2EFF';
        	    }
        	    else if (c>='\u2F00' && c<='\uDFFF') 
        	    {
        	    	if (eastAsia==null) {
        	    		
            	    	// 2014 02 18 - not necessarily Japanese!
            	    	// eg \u2030\u222b\u00ee\u201e\u00c4\u00c5\u00c2\u00e4\u2265\u00c2\u00e4\u00ae\u00ca\u00e4\u2022\u00c8\u00d6\u00a8 is Chinese
	    				vis.fontAction(hAnsi); 
        	    		
        	    	} else {
	        	    	// Japanese
	    				vis.fontAction(eastAsia); 
        	    	}
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\u2F00';
        	    	currentRangeUpper = '\uDFFF';
        	    }
        	    else if (c>='\uE000' && c<='\uF8FF') 
        	    {
        	    	if (hint == STHint.EAST_ASIA) {
        				vis.fontAction(eastAsia); 
        	    	} else {
        	    		// Usual case
        				// TODO .. do what???      	    			    	    		
        	    	}
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\uE000';
        	    	currentRangeUpper = '\uF8FF';
        	    }
        	    else if (c>='\uF900' && c<='\uFAFF') 
        	    {
    				vis.fontAction(eastAsia); 
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\uF900';
        	    	currentRangeUpper = '\uFAFF';
        	    } else 
    		    // ..  Alphabetic Presentation Forms
        	    if (c>='\uFB00' && c<='\uFB4F') 
        	    {
        	    	/* hAnsi (or hAnsiTheme if defined), with the following exceptions:
        	    	 * 
    							If the hint is eastAsia then eastAsia (or eastAsiaTheme if defined) is used for characters in the range FB00 \u201a\u00c4\u00ec FB1C.
    							For the range FB1D \u201a\u00c4\u00ec FB4F, ascii (or asciiTheme if defined) is used.
    					*/
        	    	if (hint == STHint.EAST_ASIA) {
    	    			if ( c>='\uFB00' && c<='\uFB1C') {
    	    				vis.fontAction(eastAsia);
    	    			    vis.setMustCreateNewFlag(true);
    	    			} else {
    	    				vis.fontAction(hAnsi);
    	    			}
        	    			
        	    	} else if ( c>='\uFB1D' && c<='\uFB4F') {
        	    				
        				vis.fontAction(ascii);
        			    vis.setMustCreateNewFlag(true);
        				
        	    	} else {
        				vis.fontAction(hAnsi);
        	    	}
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\uFB00';
        	    	currentRangeUpper = '\uFB4F';
        	    } else if (c>='\uFB50' && c<='\uFDFF') {
    				vis.fontAction(eastAsia); 
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\uFB50';
        	    	currentRangeUpper = '\uFDFF';	
        	    } else if (c>='\uFE30' && c<='\uFE6F') {
    				vis.fontAction(eastAsia); 
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\uFE30';
        	    	currentRangeUpper = '\uFE6F';	
        	    } else if (c>='\uFE70' && c<='\uFEFE') {
    				vis.fontAction(ascii); 
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\uFE70';
        	    	currentRangeUpper = '\uFEFE';	
        	    } else if (c>='\uFF00' && c<='\uFFEF') {
        	    	
        	    	if (eastAsia==null) {
        	    		// eg <w:rFonts w:ascii="SimSun" w:hAnsi="SimSun" w:cs="SimSun"/>
        	    		// for "\u00d4\u00ba\u00f5" (0xff1b, semicolonmonospace)  and "\u00d4\u00ba\u00e5" (0xff0c, commamonospace) 
	    				vis.fontAction(hAnsi); 
        	    	} else {
        	    		vis.fontAction(eastAsia);
        	    	}
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower = '\uFF00';
        	    	currentRangeUpper = '\uFFEF';	
        	    } else {
        	    	// Per http://msdn.microsoft.com/en-us/library/ff533743.aspx
        	    	// for all ranges not listed in the above, the hAnsi (or hAnsiTheme if defined) font shall be used.
        	    	String hex = String.format("%04x", (int) c);
        	    	log.debug("Defaulting to hAnsi for char " + hex);
    				vis.fontAction(hAnsi); 
        	    	vis.addCharacterToCurrent(c);
        	    	
        	    	currentRangeLower='\u0000';
        	    	currentRangeUpper='\u0000';
        	    	
        	    	// TODO: enhance to allow current to be reused, if font is same
        	    	
        	    	
        	    }
    	    }
    	} 
    	
    	// Handle final span
    	vis.finishPrevious();
    	return vis.getResult();
    }
    
    private Document getDocument() {

		try {
			return XmlUtils.getDocumentBuilderFactory().newDocumentBuilder().newDocument();
					
		} catch (ParserConfigurationException e) {
			log.error(e.getMessage(), e);
		}			
		return null;
    	
    }
	
	private String getCssProperty(String fontName) {
		
		if (
				log.isDebugEnabled() && 
				fontName==null) {
			Throwable t = new Throwable();
			t.printStackTrace();
		}
		
		String font = getPhysicalFont(fontName);
		
		if (font!=null) {					
			return Property.composeCss(CSS_NAME, font );
		} else {
			log.warn("No mapping from " + font);
			return Property.CSS_NULL;
		}
		
	}

	
	private String getPhysicalFont(String fontName) {
		
		log.debug("looking for: " + fontName);
//		if (log.isDebugEnabled()) {
//			Throwable t = new Throwable();
//			log.debug("Call stack", t);
//		}		

		PhysicalFont pf = wordMLPackage.getFontMapper().getFontMappings().get(fontName);
		if (pf!=null) {
			log.debug("Font '" + fontName + "' maps to " + pf.getName() );
			return pf.getName();
		} else {
			
			// This is ok if it happens 
			// at org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart.fontsInUse(MainDocumentPart.java:238)
			// at org.docx4j.openpackaging.packages.WordprocessingMLPackage.setFontMapper(WordprocessingMLPackage.java:311)

			

			
			// Special cases; there are more; see http://en.wikipedia.org/wiki/List_of_CJK_fonts
			String englishFromCJK = CJKToEnglish.toEnglish( fontName);
			if (englishFromCJK==null) {
				log.warn("Font '" + fontName + "' is not mapped to a physical font. " );
				return null;
			} else {
				pf = wordMLPackage.getFontMapper().getFontMappings().get(englishFromCJK);
			}
			
			if (pf==null) {
				log.warn("Font '" + englishFromCJK + "' is not mapped to a physical font. " );
				return null;
			}
			
			return pf.getName();
		}		
	}	
	
	public interface RunFontCharacterVisitor {
		
		void setRunFontSelector(RunFontSelector runFontSelector);
		
		void setDocument(Document document);
		
		void addCharacterToCurrent(char c);

		void finishPrevious();

		void createNew();
		
		void setMustCreateNewFlag(boolean val);
		
		boolean isReusable();
		
		void fontAction(String fontname);
		
		void setFallbackFont(String fontname);
		
		Object getResult();  // when used in output a DocumentFragment; when used to find fonts, a Set.

	}

}
