Code:
#!/usr/bin/ksh
#
# COPYRIGHT (c) 2003 - SIMERIAN
#
# e: info@simerian.com
# w: www.simerian.com
#
# DISCLAIMER
# The author of this product does not accept any responsibility for
# loss or damages resulting from the use of said product and makes no
# warranty or representation, either express or implied, including but
# not limited to, any implied warranty of merchantability or fitness for a
# particular purpose. This product is provided "AS IS", and you, its user,
# assume all risks when using it.
#
# DISTRIBUTION
# You may freely redistribute this product subject to the following conditions:
# 1) that the whole product is redistributed, AND,
# 2) that the product or any of its components are NOT altered, AND,
# 3) that no charge be made for any redistribution (ex. consumables & handling).
#
# Feedback is appreciated in order that products can be supported & improved.
#---v----1----v----2----v----3----v----4----v----5----v----6----v----7----v???
typeset -ru pGenealogy=$1
typeset -ru pExtract=$2
typeset -ru pDelimiter=$3
# Genealogy - Supplied as a delimited path name with each element
# accompanied by an optional selection criteria. The delimiter
# character may be defined by the last option, the default is
# forward slash character "/":
#
# e.g. ELEMENT1/ELEMENT2.ATTRIBUTE=1/ELEMENT3
#
# The genealogy is not case-sensitive and double-quotes do not need to
# placed about the value setting.
# Extract - Defines the type of data extract required of the routine:
#
# "-" Inclusive Pre-Read to the opening target element.
# "^" Non-inclusive Pre-Read to the opening target element.
# "$" Non-inclusive Post-Read from the opening target element.
# "+" Inclusive Post-Read from the opening target element.
# "@" Extract opening target element.
# "#" Extract opening & closing target element and any child elements.
# "<" Non-inclusive Pre-Read to the closing target element.
# ">" Inclusive Post-Read from the closing target element.
#
# Example: Using ELEMENT3 as the target element:
#
# - ^ <
# | | |
# | +-{ <OUTER ...> |
# +----> +---{ <INSIDE ...> }---+ <--- @ |
# # | <XXX ...> }-+ | |
# | </XXX> | | }-------+
# +----> </INSIDE> | | }-------+
# </OUTER> | | |
# | | |
# $ + >
# Extract XML component.
typeset vCMD=""
vCMD="${vCMD} -v pG=${pGenealogy}"
vCMD="${vCMD} -v pX=${pExtract}"
vCMD="${vCMD} -v pD=${pDelimiter:-/}"
awk ${vCMD} '
BEGIN {
pGenealogy=toupper(pG)
pExtract=toupper(pX)
pDelimiter=toupper(pD)
cDEBUG=0
cSPC=3
cPRE_NON="^"
cPRE_INC="-"
cPOST_NON="$"
cPOST_INC="+"
cGROUP="#"
cPRE_CLOSE="<"
cPOST_CLOSE=">"
cTAG="@"
cATTRIBUTE="."
vINDEX=cPRE_NON cPRE_INC cPOST_NON cPOST_INC cTAG cGROUP cPRE_TAG cPOST_TAG cPRE_CLOSE cPOST_CLOSE
if (pExtract == "") pExtract=cTAG
if (cDEBUG) { printf "# vINDEX <%s>\n",vINDEX }
match(vINDEX,sprintf("\%s",pExtract))
if (cDEBUG) {
printf "~ RSTART <%s>\n",RSTART
printf "~ RLENGTH <%s>\n",RLENGTH
}
if (RSTART == 0 || RLENGTH != 1) {
vAttribute=pExtract
pExtract=cATTRIBUTE
}
if (cDEBUG) {
printf "# vAttribute <%s>\n",vAttribute
printf "# pExtract <%s>\n",pExtract
}
cDEPTH=0
cCRITERIA=1
if (cDEBUG) {
printf "# Command Line Parameters:\n"
printf "~ Genealogy <%s>\n",pGenealogy
printf "~ Extract <%s>\n",pExtract
printf "~ Delimiter <%s>\n",pDelimiter
}
vGenealogyDepth=split(pGenealogy,vElement,pDelimiter)
if (cDEBUG) {
printf "# Mapping:\n"
}
vTAGPrev=""
for (d=1; d <= vGenealogyDepth; d++) {
x=index(vElement[d],".")
if (x > 0) {
vTAG=substr(vElement[d],1,x-1)
vCriteria=fnFormatCriteria(substr(vElement[d],x+1))
} else {
vTAG=vElement[d]
vCriteria=""
}
vIDX_Key[d]=vTAG
vIDX_TAG[vTAG,cDEPTH]=d
vIDX_TAG[vTAG,cCRITERIA]=vCriteria
if (cDEBUG) printf "~ %s. [%s](%s)",d,vTAG,vCriteria
if (d > 1) {
vIDX_Parent[vTAG]=vTAGPrev
vIDX_Child[vTAGPrev]=vTAG
if (cDEBUG) {
printf ", Parent[%s]",vIDX_Parent[vTAG]
printf ", Child[%s]=%s",vTAGPrev,vIDX_Child[vTAGPrev]
}
}
vTAGPrev=vTAG
if (cDEBUG) printf "\n"
}
vTAG=vIDX_Key[1]
vTAGMatch=sprintf("<%s",vTAG)
if (cDEBUG) {
printf "# Target Point:\n"
printf "~ Tag <%s>\n",vIDX_Key[vGenealogyDepth]
printf "~ Depth <%s>\n",vGenealogyDepth
printf "# Starting Point:\n"
printf "~ Tag <%s>\n",vIDX_Key[1]
printf "# Searching:\n"
}
if (cDEBUG) { printf "# Printing: " }
if (pExtract == cPRE_NON || pExtract == cPRE_INC || pExtract == cPRE_CLOSE) {
fPrintXML=1
if (cDEBUG) { printf "ON\n" }
} else {
fPrintXML=0
if (cDEBUG) { printf "OFF\n" }
}
}
function fnFormatCriteria (pCriteria) {
if (pCriteria == "") return ""
vConditionMax=split(pCriteria,vCondition,".")
pCriteria=""
for (c=1; c <= vConditionMax; c++) {
if (index(vCondition[c],"=") != 0) {
sub("=\"","=",vCondition[c])
sub("=","=\"",vCondition[c])
sub("\"$","",vCondition[c])
sub("$","\"",vCondition[c])
pCriteria=sprintf("%s|%s",pCriteria,vCondition[c])
}
}
sub("^[|]","",pCriteria)
return pCriteria
}
/^[[:space:]]*<\?/ {
next
}
{
vXMLCopy=$0
sub("^[[:space:]]*","",vXMLCopy)
sub("[[:space:]]*$","",vXMLCopy)
vXMLMask=toupper(vXMLCopy)
}
/^[[:space:]]*<[[:alpha:]]+/ {
vOpen=NR
vDepthCur++
vINDENT=sprintf("%*.*s",(vDepthCur-1)*cSPC,(vDepthCur-1)*cSPC," ")
if (cDEBUG) printf "~ +[%02d] [%-60.60s]\n",vDepthCur,vINDENT vXMLCopy
}
(index(vXMLMask,vTAGMatch) == 1) && (vDepthCur == vIDX_TAG[vTAG,cDEPTH]) {
fCriteriaMatch=0
vCriteria=vIDX_TAG[vTAG,cCRITERIA]
if (cDEBUG) {
printf "# !Matching! TAG [%s]",vTAG
printf ", Depth [%s]",vIDX_TAG[vTAG,cDEPTH]
printf ", Criteria [%s]\n",vIDX_TAG[vTAG,cCRITERIA]
}
if (vCriteria != "") {
vConditionMax=split(vCriteria,vCondition,"|")
c=0
while (index(vXMLMask,vCondition[c+1])) { c++ }
if (c == vConditionMax) { fCriteriaMatch=1 }
}
if (vDepthCur == vGenealogyDepth) {
if (vCriteria == "" || fCriteriaMatch) {
if (cDEBUG) printf "# !ACQUIRED! [%s](%s)\n",vTAG,vCriteria
if (pExtract == cPRE_NON) exit 0
if (pExtract == cPRE_INC) fDitherAM=1
if (pExtract == cPOST_NON) fDitherPM=1
if (pExtract == cPOST_INC) fPrintXML=1
if (pExtract == cTAG) {
sub(sprintf("</%s>",vTAG),"",vXMLCopy)
printf "%s\n",vXMLCopy
exit 0
}
if (pExtract == cGROUP) { vDepthOpen=vDepthCur; fPrintXML=1; fGrouping=1 }
if (pExtract == cPRE_CLOSE) { vDepthOpen=vDepthCur; fClosedAM=1 }
if (pExtract == cPOST_CLOSE) { vDepthOpen=vDepthCur; fClosedPM=1 }
if (pExtract == cATTRIBUTE) {
vRegExp=sprintf("%s=\"[^\"]*\"",vAttribute)
match(vXMLMask,vRegExp)
if (vAttribute != "" && RSTART > 0) {
vTypeset=substr(vXMLCopy,RSTART,RLENGTH)
match(vTypeset,"\"[^\"]*\"$")
vAttributeValue=substr(vTypeset,RSTART+1,RLENGTH-2)
printf "%s",vAttributeValue
exit 0
}
}
}
} else {
if (vCriteria == "" || fCriteriaMatch) {
if (cDEBUG) printf "# !Waypoint! [%s](%s)\n",vTAG,vCriteria
vTAG=vIDX_Child[vTAG]
vTAGMatch=sprintf("<%s",vTAG)
}
}
}
/<\// || /\/>/ {
vClose=NR
if (cDEBUG) {
if (vOpen == vClose) {
printf "~ -[%02d] [%-60.60s]\n",vDepthCur,vINDENT "."
} else {
printf "~ -[%02d] [%-60.60s]\n",vDepthCur,vINDENT vXMLCopy
}
}
if (fGrouping && vDepthCur == vDepthOpen) fDitherAM=1
if (fClosedAM && vDepthCur == vDepthOpen) exit 0
if (fClosedPM && vDepthCur == vDepthOpen) fPrintXML=1
fReduceDepth=1
}
fPrintXML {
printf "%s%s\n",vINDENT,vXMLCopy
}
fDitherAM || fDitherPM {
if (fDitherAM) exit 0
if (fDitherPM && fPrintXML == 0) fPrintXML=1
}
fReduceDepth {
fReduceDepth=0
vDepthCur--
vINDENT=sprintf("%*.*s",(vDepthCur-1)*cSPC,(vDepthCur-1)*cSPC," ")
}
' <&0 >&1
#---v----1----v----2----v----3----v----4----v----5----v----6----v----7----v
exit 0