Lecture 23rd May - AG Wissensbasierte Systeme
Transcrição
Lecture 23rd May - AG Wissensbasierte Systeme
Collaborative Intelligence - Lecture SS 2016 - Prof. Dr. Andreas Dengel WM/04.02 S. 376 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?@&"""1"""77"#$%&" WM/04-05 S. 376 Collaborative Intelligence focuses on the support of knowledge workers within socio-technical networks Web Documents (Paper, Fax, Email, eDocuments) Social Network <?xml .. @ Interaction Visualization User Model Document Analysis Recommender Ontologies Index Information Extraction Indexing Classification Search WM/04.02 S. 377 other socio-technical participants !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?@@"""1"""77"#$%&" WM/04-05 S. 377 Collaborative Intelligence focuses on the support of knowledge workers within socio-technical networks Chapter 1: Search & Classification Chapter 2: Attention-based Collaborative Intelligence Chapter 3: Recommender Systems Chapter 4: Proactive Multi-Channel Information Extraction Chapter 5: Usability in Collaborative Systems Chapter 6: Social Media Monitoring, Discovery & Forecast WM/04.02 S. 378 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?@A"""1"""77"#$%&" WM/04-05 S. 378 Chapter 4 Pro-Active Multi-Channel Information Extraction WM/04.02 S. 379 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?@B"""1"""77"#$%&" WM/04-05 S. 379 C44D<(/",9"E,*<4F-"-9,(),*)"-4F*;+-"0+,)-"94"-4:+"<)+,-"4G" 5H,9",")4;F:+(9"<-" $+./012345+5/.67+267+839+:" ,"=<+;+"4G"=,=+*I"J44D0+9I"+9;>I"=*4E<)<(/"<(G4*:,9<4(I" +-=+;<,008"4G",("4K;<,0"4*"0+/,0"(,9F*+" " ,"=<+;+"4G"9+L9"4*"9+L9",()"/*,=H<;-"-94*+)"4(",";4:=F9+*",-"," M0+"G4*":,(<=F0,9<4("J8")4;F:+(9"=*4;+--<(/"-4G95,*+" " +E<)+(;+"4*","=*44G" ;6;3<+ .=>=56? ?3>6? $+./012345+5/.67+267+8399+:" ,"5*<99+("4*")*,5("*+=*+-+(9,9<4("4G"9H4F/H9-" " ,"9+L9F,0"M0+",04(/"5<9H"<9-"-9*F;9F*+",()")+-</("PG4(9-I";404*-I" ,()",))<9<4(,0"<:,/+-Q" " WM/04.02 S. 380 ,"5*<99+("=*44G"F-+)",-"+E<)+(;+" ;6;3< .=>=56? ?3>6? N! 74F*;+- "O""""9H+G*++)<;9<4(,*8>;4:" "OO"5<D<=+)<,>4*/" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?A$"""1"""77"#$%&" " WM/04-05 S. 380 R(+"M*-9"=+*-=+;9<E+"G*4:"5H<;H",")4;F:+(9";,("J+"E<+5+)" <-"9H+"4(+"4G"J+<(/",(",*9<G,;9" '")4;F:+(9"<-",(89H<(/"9H,9"5+";,("*+,)",()"5H<;H"*+0,9+-"94"-4:+" ,-=+;9"4G"9H+"-4;<,0"54*0)" 1 '*9<G,;9" =H8-<;,008",()"-4;<,008" '0045-"ML<(/":+,(<(/"<("," -9,J0+":+)<F:"-+*E<(/"HF:,(" (++)-"G4*","=+*<4)"4G"9<:+" '0045-"(+;+--,*8";H,(/+-"94" *+:,<("<("-8(;"5<9H",";H,(/<(/" 54*0)" @++/++0++1++2++3++4++5+ S45+E+*I"<(","9*,)<9<4(,0";4(-<)+*,9<4(I")4;F:+(9-",*+",--4;<,9+)"5<9H" WM/04.02 S. 381 -F*G,;+-"9H,9";,=9F*+"9H+"<(G4*:,9<4(I"T" O"74F*;+U"V,9+->"!"#$%"&'()%"*+)'!",,*#-./$-"#I"WH+"X4H("S4=D<(-"Y(<E+*-<98"Z*+--I"6,09<:4*+"P%BABQ>" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?A%"""1"""77"#$%&" WM/04-05 S. 381 T"-F*G,;+-",-"9H+8",*+"+:=048+)"G4*":<00+((<F:-"94")<-9*<JF9+" ,()"=*+-+*E+";4::F(<;,9<4("<(9+()-"4E+*"9<:+",()"-=,;+"" WH+",(;+-94*-"4G"94),8[-":,(8")<\+*+(9" -;*<=9-"5+*+"=*4J,J08";,E+"=,<(9<(/-" " ],*08";,E+:,("=,<(9<(/-"5H<;H"5+*+"(49"^F-9" )*,5<(/-"<("9H+"+8+-"4G"9H+<*";*+,94*-"JF9" ,0-4";,**<+)",":+--,/+"H,E+"J++("),9+)"J,;D" -4:+"_$I$$$"8+,*-" " WH+"40)+-9"D(45(",*9<G,;9-"4G"-;*<=9"*+,;H" J,;D"94"9H+"MG9H":<00+((<F:"6`U" `0,8"9,J0+9-")+=<;9"*4F()"<()+(9,9<4(-" 94/+9H+*"5<9H"9H+"<:,/+"4G",(",(<:,0>" '*;H,+404/<-9-"J+0<+E+"9H+-+"<()+(9,9<4(-"94" *+=*+-+(9"9H+"(F:J+*"9+(" WM/04.02 S. 382 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?A#"""1"""77"#$%&" WM/04-05 S. 382 WH+"H<+*4/08=H-"4G"9H+"]/8=9<,(-"-9<00";4(9,<("+0+:+(9-"4G" =<;9F*+"5*<9<(/" S<+*4/08=H-"5+*+"+-9,J0<-H+)"<("]/8=9" " S<+*4/08=H-")<)"(49"*+=*+-+(9+)"=F*+" =<;94/*,:-"(4*"=F*+"=H4(+9<;"-=+00<(/",()" )<)"=*4J,J08"+E40E+"G*4:"9H+"4*(,:+(9,9<4(" 4G"E,-+-" " 6+-<)+-"=<;94/*,:-",()"-980<a+)" P<:,/+1Q-8:J40-"4G";4::4("<9+:-"9H+*+" +L<-9"=H4(4/*,:-"*+=*+-+(9<(/"=H4(+:+-I" ^F-9"0<D+"94),8[-",0=H,J+9-" " 7<(;+"9H+(I"9H+"(F:J+*"4G"-;*<=9";H,*,;9+*-" /*+5"G*4:"*4F/H08"@$$"94",==*4L<:,9+08" _I$$$"94),8" WM/04.02 S. 383 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?A?"""1"""77"#$%&" WM/04-05 S. 383 WH+"`F(+<G4*:"7;*<=9"<-"9H+"M*-9"-9+="945,*)-",J-9*,;9" 5*<9<(/" .*,5<(/"-9*,</H9"0<(+-"4(";0,8"9,J0+9-"<-" :F;H"+,-<+*"9H,(")*,5<(/"<:,/+-" " WH+";F(+<G4*:"-;*<=9"5,-"J4*("5H+("9H+" 6,J804(<,(-",()"7F:+*<,(-"F-+)"5*<9<(/" 9440-"9H,9":,)+"5+)/+"-H,=+)" <()+(9,9<4(-" " `F(+<G4*:"5,-"-4";4:=0+LI"9H,9",0*+,)8"<(" #$$$"6`I"54*0)"0<9+*,9F*+"0<D+"9H+" :4(F:+(9,0"]=<;"4G"2<0/,:+-H"5,-" 5*<99+("<("40)"6,J804(<,("4("95+0E+";0,8" 9,J0+9-" WM/04.02 S. 384 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?Ab"""1"""77"#$%&" WM/04-05 S. 384 Z,=+*",()"Z*<(9"W+;H(404/<+-"4*</<(,9+)"G*4:"9H+" ,(;<+(9"`H<(," c4*","04(/"9<:+I"9H+"`H<(+-+";H,*,;9+*-" 5+*+"5*<99+("4(94"=,=+*"5<9H"d()<,("d(D" " '*4F()"9H+"8+,*"%$_$"'.I"6<"7H+(/" <(E+(9+)"9H+"M*-9":4E,J0+"98=+"=*<(9<(/" =*+--"9+;H(404/8"PS<-"G*,/<0+"98=+-"5+*+" :,)+"4G"J,D+)";0,8Q" " 7H+(/";4F0)"9HF-"=*<(9"4("=,=+*"04(/" J+G4*+"9H+"2+*:,("X4H,((+-"2F9+(J+*/" :,)+"H<-"G,:4F-"<(E+(9<4("P%b?BQ" " S45+E+*I"9H+"`H<(+-+"G,;+)",(49H+*"J</" =*4J0+:U"9H+<*",0=H,J+9";4(9,<(-",J4F9" _$I$$$";H,*,;9+*-" WM/04.02 S. 385 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?A_"""1"""77"#$%&" WM/04-05 S. 385 e+,)<(/"9H+"6<J0+"<("9H+":+)<,+E,0"9<:+-"*+fF<*+)"9H+" =*<E<0+/+"4G"D(45<(/"9H+"C,9<("0,(/F,/+" W4"=*4)F;+",":+)<,+E,0"6<J0+"4(" =,*;H:+(9I"-D<(-"4G"_$$";,0E+-"5+*+" (++)+)"" " T",()","049"4G"=,9<+(;+I",-"+E+*8"0+99+*",()" <:,/+"5+*+")*,5("J8"H,()>" " d("9H4-+"),8-I"0<J*,*<+-"5+*+";H,*,;9+*<a+)" J8","J,JJ0+"4G"E4<;+-" " WH+"*+,)+*-")<)"(49";4:=0,<(I",-"F(9<0"9H+" C,9+"g<))0+"'/+-I"-<0+(9"*+,)+*-"5+*+" -F-=+;9+)"94"J+"<("0+,/F+"5<9H"9H+")+E<0" WM/04.02 S. 386 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?A&"""1"""77"#$%&" WM/04-05 S. 386 h<9H"9H+"<(E+(9<4("4G"0+99+*=*+--"=*<(9<(/","(+5":,*D+9"H,-" 4=+(+)"F="<("]F*4=+" 6+G4*+"9H+"<(E+(9<4("4G"2F9+(J+*/i-" :4E,J0+"98=+"<("%b_&I"J44D-"5+*+"E+*8" *,*+" " g4-9"4G"9H+-+"<(E,0F,J0+":,(F-;*<=9-"5+*+" 04;D+)"F="<("0<J*,*<+-" " 2F9+(J+*/i-"<(E+(9<4("+(,J0+)"9H+" )F=0<;,9<4("4G"J44D-",9",\4*),J0+";4-9-" 9H*4F/H4F9"]F*4=+",()"/,E+"*<-+"94"," ;4:=0+9+08"(+5"+;4(4:<;"-+;94*" " '*4F()"9H+"8+,*"%_$$"9H+*+"5+*+",*4F()" #$$"=*<(9<(/"-H4=-"0"%1'2&/#$34' #$$"=*<(9<(/"-H4=-" 53$/6&-3),5#$37"<("j+(<;+"4(08" WM/04.02 S. 387 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?A@"""1"""77"#$%&" WM/04-05 S. 387 g,--1=*4)F;9<4(":,D+-"J44D-",\4*),J0+"G4*"+E+*84(+" '*4F()"%&@_"'.":4*+"J44D-"5+*+"=*<(9+)" <("2+*:,("9H,("<("C,9<(" " '"*+E40F9<4("5,-"-9,*9<(/"9H,9"54F0)"=+,D" <("9H+"%A9H"`+(9F*8U"WH+"G4*:,9-"J+;,:+" -:,00+*",()"=*<(9<(/"E40F:+-"0"%1'58-$-"#37" 0,*/+*" " WH+"M*-9"0+()<(/"0<J*,*<+-",==+,*+)" " WH+";4-9-"4G"J44D-"-H*F(D",()"9H+"(F:J+*" 4G"*+,)+*-"<(;*+,-+)" " 6F9":4-9"<:=4*9,(908I"*+,)<(/"H,J<9-"H,E+" ;H,(/+)U"d(-9+,)"4G"9H+"*+=+,9+)"0+;9F*+" 4G"9H+"-,:+"J44DI":4-9"J44D-"5+*+"*+,)" ^F-9"4(;+" WM/04.02 S. 388 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?AA"""1"""77"#$%&" WM/04-05 S. 388 W4),8")4;F:+(9-",*+",("<:=4*9,(9"=,*9"4G"4F*";F09F*+I"JF9" J+H<()"9H+"-;+(+-"9H+";4:=+9<9<4("<-"/+99<(/"M+*;+*" .4;F:+(9-",*+"J49H"-4F*;+-"4G" <(G4*:,9<4(",-"5+00",-",":+,(-"G4*" ;4::F(<;,9<4(" " S45+E+*I"9H+"G4*:"<("5H<;H")4;F:+(9-" ,99*,;9I";4::,()I"*+fF+-9I";4(E<(;+I"4*" ,:F-+"<-"-9+,)<08"+E40E<(/",()")<E+*-<G8<(/" )F+"94"9H+":,(8";4::F(<;,9<4(";H,((+0-" 9H,9",*+",E,<0,J0+"9H+-+-"),8-"" WM/04.02 S. 389 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?AB"""1"""77"#$%&" WM/04-05 S. 389 c*4:","H<-94*<;,0"=4<(9"4G"E<+5I"5+";4(-<)+*+)",")4;F:+(9" <:,/+",-","-FJ^+;9"4G"-9F)8",()"<(9+*=*+9,9<4(" WH+"(++)"G4*",(,08a<(/",()"*+;4/(<a<(/")4;F:+(9-"<-"=F-H+)"J8"+;4(4:<;"/4,0-+ A(4B+/4+;6;3<C @/012345+646?7D=D+,()"<30/>4=5=/4O"<(;0F)+-";4(9*<JF9<4(-")+,0<(/"5<9H";4:=F9+*" *+;4/(<9<4("4G";H,*,;9+*-I"-8:J40-I"9+L9I"0<(+-I"/*,=H<;-I"<:,/+-I"H,()5*<9<(/I" -</(,9F*+-I",-"5+00",-",F94:,9<;",(,08-+-"4G"9H+"4E+*,00"=H8-<;,0",()"04/<;,0" -9*F;9F*+-"4G")4;F:+(9-I"5<9H"9H+"F09<:,9+"4J^+;9<E+"4G","H</H10+E+0"F()+*-9,()<(/" WM/04.02 S. 390 4G"9H+<*"-+:,(9<;";4(9+(9>" O"74F*;+U"d(9+*(,9<4(,0"X4F*(,0"4(".4;F:+(9"'(,08-<-",()"e+;4/(<9<4("PdX.'eQ" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?B$"""1"""77"#$%&" WM/04-05 S. 390 WH+"4*</<(,0"<)+,"4G")4;F:+(9",(,08-<-",()"*+;4/(<9<4("5,-" 9H+"9*,(-G4*:,9<4("=*4;+--" Z*4E<)+-"4==4*9F(<98"G4*",(" ,F)<+(;+"94",;;+--"9H4-+" )4;F:+(9-",()"=+*G4*:"*+/F0,*" 4=+*,9<4(-"0<D+"-+,*;HI"+)<9<(/I" *+F-+I";4(E+*-<4(I"=FJ0<-H<(/I"+9;" @/012345+(26>3D+ WM/04.02 S. 391 E5<1051<3.+53F5+ G356.656+ !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?B%"""1"""77"#$%&" WM/04-05 S. 391 c*4:",(",J-9*,;9"=4<(9"4G"E<+5"9H<-"<(;0F)+-"-+E+*,0" 9*,(-G4*:,9<4("-9+=-" (4H/<265=/4+ @656+ E728/?+ !/2;/4345D+ L=F3?D+ I+G364=4>+ I+"<.3<+ I+EJ6;3+ I+!/44305=K=57+ •! `4:=4(+(9-",*+";H,*,;9+*<a+)"J8",";+*9,<(",:4F(9"4G";4((+;9+)"P(+</HJ4*+)Q"=<L+0-" •! 78:J40-",*+")+M(+)"J8",";+*9,<(",**,(/+:+(9-"4G";H,*,;9+*<-9<;,008"-H,=+)" ;4:=4(+(9-" •! .,9,"<-"9H+"*+-F09"4G"+(;4)<(/"-8:J40-"<(","-=+;<M;"4*)+*" WM/04.02 S. 392 •! d(G4*:,9<4(",0045-"94",--4;<,9+"),9,"5<9H",";+*9,<(":+,(<(/" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?B#"""1"""77"#$%&" WM/04-05 S. 392 '"-+;4()"=+*-=+;9<E+"94"044D",9",")4;F:+(9"<-"," 9+;H(404/<;,0"4(+" k45,),8-I")4;F:+(9-+,*+"=*4)F;+)"E<,"=+("P4("=,=+*"4*"4("9,J0+9QI"9+L9",()"/*,=H<;" +)<94*-"P;4((+;9+)"94"=*<(9+*-QI"4*"49H+*"9+;H(<;,0":+,(-I"-F;H",-";,:+*,-I",()I"(49" 94"G4*/+9I":+--,/<(/";H,((+0-"P<(;0>";4::F(<98"=0,9G4*:-Q" 1 2 @++/++0++1++2++3++4++5+ hH+("5+";*+,9+",")4;F:+(9I"5+";,("-=+;<G8"9H+"),9,"94",==+,*I";4(9*40"9H+"0,84F9I" WM/04.02 S. 393 G4*:,99<(/I"/*4F=<(/I",()"-FJ949,0<(/"4G"),9,I",()"-=+;<G8"9H+"=4-<9<4("4G"=,/+"J*+,D-" P<(",))<9<4(I"5+";,("<(;0F)+"=<;9F*+-",()")*,5"J4*)+*-Q" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?B?"""1"""77"#$%&" O"74F*;+U"V,9+->"!"#$%"&'()%"*+)'!",,*#-./$-"#I"WH+"X4H("S4=D<(-"Y(<E+*-<98"Z*+--I"6,09<:4*+"P%BABQ>" WM/04-05 S. 393 '' '*+"5+":4E<(/"G*4:" ,"ML+)"54*0)"4G"=,=+*")4;F:+(9-"" 94","lF<)"54*0)"4G")</<9,0",*9<G,;9-m" WM/04.02 S. 394 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?Bb"""1"""77"#$%&" WM/04-05 S. 394 WH+*+",*+","(F:J+*"4G";4(9*,-9<E+"=,<*-"J8"5H<;H"=,=+*",()" )</<9,0",*9<G,;9-",*+"4G9+(";H,*,;9+*<a+)" L6;3<+@/012345+ M#O(@+ -9,J0+" =+*:,(+(9" -9,9<;" <(,;9<E+" *</<)" A$??+./012345DP+QJ35J3<+5J37+6<3+;<=453.+/<+.=>=56?P+ 3F=D5+=4+6+;3<;3516?+534D=/4+835Q334+RF=57+64.+S1=.=57TC+ @=>=56?+@/012345+ M(N+ WM/04.02 S. 395 O"74F*;+U"C+E8I".>"g>"c<L+)"4*"lF<)m".4;F:+(9"-9,J<0<98",()"(+5":+)<,>"Z*4;++)<(/-"]F*4=+,("`4(G>"4(" S8=+*9+L9"W+;H(404/8I"P==>"#bn?%Q">"k+5"V4*DU"'--4;<,9<4("G4*"`4:=F9<(/"g,;H<(+*8"P%BBbQ"" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?B_"""1"""77"#$%&" WM/04-05 S. 395 WH+*+"<-","JF(;H"4G"+L;<9<(/"*+-+,*;H"fF+-9<4(-",*<-<(/"G*4:" 9H+",09+*(,9<(/"9*,(-<9<4(-" •! •! •! •! •! d-"<9","(+5"E+*-<4("4*","(+5")4;F:+(9m" .4+-"9H+"-</(,9F*+"044D"0<D+"<9"5,-"�H+0=+)�m" d-"9H+")4;F:+(9";*+,9+)",00",9"4(+"9<:+"4*"=*+=,*+)"-+fF+(9<,008m" d-"<9","0+/,0":,(<=F0,9<4("4*","G*,F)",99+:=9m"" WM/04.02 T" S. 396 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?B&"""1"""77"#$%&" WM/04-05 S. 396 eH89H:"J+95++("ML<98",()"lF<)<98"<-"<(lF+(;+)"J8"9H+"/+(*+I" 5H<;H"<9-+0G")+M(+-")4;F:+(9-"J8"=,*9<;F0,*"G4*:-",()"GF(;9<4(2 1 U M/<2+ @++/++0++1++2++3++4++5+ U M1405=/4+ I ](/<(++*<(/".*,5<(/" T" X4J"'==0<;,9<4(" T" `4:=0,<(9" k+5-=,=+* " WM/04.027;<+(9<M;"e+=4*9" S. 397 k4E+0" g+--,/+" T" 7D+9;H" -34<3+ d(E4<;+" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?B@"""1"""77"#$%&" O"74F*;+U"V,9+->"!"#$%"&'()%"*+)'!",,*#-./$-"#I"WH+"X4H("S4=D<(-"Y(<E+*-<98"Z*+--I"6,09<:4*+"P%BABQ>" WM/04-05 S. 397 .4;F:+(9-",*+"GF*9H+*"+:J+))+)"<(94"<(-9<9F9<4(,0"=*4;+--+-" ,()",*+"=,*9"4G"4F*"54*D"=*,;9<;+-"4=+(<(/1F=","9H<*)"E<+5" 2 1 h4*D" 3 HF:,("=*,;9<;+-" <(-9<9F9<4(,0"+:J+):+(9" U M/<2+ @++/++0++1++2++3++4++5+ U M1405=/4+ I ](/<(++*<(/".*,5<(/" T" X4J"'==0<;,9<4(" T" `4:=0,<(9" k+5-=,=+* " WM/04.027;<+(9<M;"e+=4*9" S. 398 k4E+0" g+--,/+" T" 7D+9;H" -34<3+ d(E4<;+" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?BA"""1"""77"#$%&" O"74F*;+U"V,9+->"!"#$%"&'()%"*+)'!",,*#-./$-"#I"WH+"X4H("S4=D<(-"Y(<E+*-<98"Z*+--I"6,09<:4*+"P%BABQ>" WM/04-05 S. 398 "" WM/04.02 S. 399 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?BB"""1"""77"#$%&" WM/04-05 S. 399 "" WH+"%%9H"d'Ze"h4*D-H4="4(".4;F:+(9"'(,08-<-"78-9+:-">>>" WM/04.02 S. 400 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$$"""1"""77"#$%&" WM/04-05 S. 400 "&-$,(a$'(",+ !"O,'&V+ ["&\E]"L+ is-a is-a L*&("@+ ($L&+ is-a has-sponsor M<6403+ @$E+WXYZ+ is-a has-date $;<=?+`_YXP+WXYZ+ has-participant is-a !('V+ \=D3P+\/=0J=+ \=D3P+\/=0J= is-a E2=5JP+&67+ !]$(&+ '/1<D+ "<>=3<P+^364_G6<0 "<>=3< P+^364_G6<0+ #=Q=0B=P+G6<01D #=Q=0B= P+G6<01D+ is-a is-part has-participant &623?P+^364_VK3D P+^364_VK3D+ is-a has-location is-a is-a is-a L!+!]$(&+ is-a WM/04.02 S. 401 L*&E",+ !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$%"""1"""77"#$%&" WM/04-05 S. 401 .4;F:+(9",(,08-<-",()"*+;4/(<9<4("<-",0-4",(",99+:=9"94" <(;*+,-+"9H+"E,0F+"4G",")4;F:+(9" \4/Q?3.>3+ Value (4H/<265=/4+ @656+ E728/?+ !/2;/4345D+ L=F3?D+ I+!/453F5+ I+G364=4>+ I+"<.3<+ I+EJ6;3+ I+!/44305=K=57+ •! `4:=4(+(9-",*+";H,*,;9+*<a+)"J8",";+*9,<(",:4F(9"4G";4((+;9+)"P(+</HJ4*+)Q"=<L+0-" •! 78:J40-",*+")+M(+)"J8",";+*9,<(",**,(/+:+(9-"4G";H,*,;9+*<-9<;,008"-H,=+)" ;4:=4(+(9-" •! .,9,"<-"9H+"*+-F09"4G"+(;4)<(/"-8:J40-"<(","-=+;<M;"4*)+*" •! d(G4*:,9<4(",0045-"94",--4;<,9+"),9,"5<9H",";+*9,<(":+,(<(/+ WM/04.02 S. 402 •! \4/Q?3.>3+6..<3DD3D+5J3+68=?=57+5/+14.3<D564.+=4H/<265=/4+=4+6+>=K34+0/453F5+ !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$#"""1"""77"#$%&" WM/04-05 S. 402 6#`",()"6#`"`4::F(<;,9<4("<-":4-908")4(+"E<,")4;F:+(9-" d(9+*14*/,(<a,9<4(,0";4::F(<;,9<4("E<," )4;F:+(9-":+,(-"+L;H,(/<(/"JF-<(+--":+--,/+-" 5<9H",("+;4(4:<;"4*"0+/,0"J,;D/*4F()" .4;F:+(9",*+"9H+"4J0</,94*8":+,(-"G4*" J*<)/<(/"9<:+",()"-=,;+" '=23+ .4;F:+(9-",*+"*+fF<*+)"G4*",99+-9<(/" ;4:=0<,(;+" E;603+ d(E40E+)"=,*9<+-"0<(D"F="9H+<*";4::F(<;,9<4(" J+H,E<4*" .4;F:+(9-"+L=*+--":F9F,0"+L=+;9,9<4(-" WM/04.02 S. 403 7+()+*"<(;4*=4*,9+-"),9,"<(94"9H+":+--,/+" P;4(9+L9Q",()"9HF-"9*,(-G4*:-"9H+:"<(94" <(G4*:,9<4(" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$?"""1"""77"#$%&" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+" WM/04-05 S. 403 W4),8I"9H+"E,*<4-"-9,D+H40)+*-"4G"JF-<(+--"=*4;+--+-" ;4::F(<;,9+"E<,",(8";H,((+0" <?xml .. @ WM/04.02 S. 404 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$b"""1"""77"#$%&" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+" WM/04-05 S. 404 .<\+*+(9";H,((+0-"=*4E<)+-")<\+*+(9"4=9<4(-"94" ;4::F(<;,9+":+--,/+-" .+=+()<(/"4("9H+"=0,;+"9H+"9+:=4*,0";4(-9*,<(9-I",()"9H+"9+;H(<;,0" <(G*,-9*F;9F*+I"5+"-+0+;9",(49H+*"5,8"4G";4::F(<;,9<4(" h+"-FJ:<9"=,=+*"G4*:-"5H+*+"5+"M00+)"<("4F*"=45+*" ;4(-F:=9<4("),9," h+"-+()"G,L+-"94"<(-F*,(;+-"9H,9";,=9F*+"9H+"),9,"4G"4F*" (+5";,*"" h+"4*)+*"+0+;9*4(<;")+E<;+-"E<,"+:,<0"5H+*+"5+",99,;H"," -;,((+)"4*)+*"G4*:"" @ h+"*,<-+"4F*")<-=0+,-F*+",J4F9",":,0GF(;9<4("J8"F-<(/" 9H+"c,;+J44D";H,((+0"4*"J8";,00<(/"9H+"H+0=")+-D" h+"F-+","+.4;F:+(9"-+*E<;+"5<9H"5H<;H"5+"-FJ:<9","=)G1M0+" WM/04.02 S. 405 94/+9H+*"5<9H")4;F:+(9":+9,),9," <?xml .. !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$_"""1"""77"#$%&" WM/04-05 S. 405 WH+"d(9+*(+9"<-",";F09F*,0"-=,;+"G4*";4::F(<;,9<4(I"5H<;H" ;4(9<(F<-08"+E40E<(/"fF,(9<9,9<E+08",()"fF,0<9,09<E+08" Amount E-Mail @ eDocuments <?xml .. Paper Fax Time 1960 1980 2000 2020 WM/04.02 S. 406 ]:,<0",0*+,)8"J+;,:+"9H+";4::F(<;,9<4(":+)<F:"k4>"%O" O"74F*;+U"http://www.ey.com/press/releases/" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$&"""1"""77"#$%&" WM/04-05 S. 406 d("9H+"54*0)"4G"JF-<(+--I"+:,<0"<-",J4F9"94"J+"9H+")4:<(,(9"" ;4::F(<;,9<4("9440"G4*";4::F(<;,9<(/"JF-<(+--"*+0+E,(9"<(G4*:,9<4(" 7<(;+"#$$_"9H+",:4F(9"4G"+:,<0-",((F,008"<(;*+,-+)"" 9+(G40)"94"?_"J<00<4(-"94),8" 74:+"G,;9-OU" •! B?o"4G",00"+(9+*=*<-+-"F-+"+:,<0",-","=*+G+**+)"9440"" 94";4::F(<;,9+"5<9H";4-9F:+*-"P6#`Q" •! Abo"F-+"+:,<0"94";4::F(<;,9+"6#6" •! @%o"F-+"+:,<0",-":,^4*";4::F(<;,9<4("9440"5H<0+" (+/49<,9<(/";4(9*,;9-" •! &Bo"F-+"+:,<0"94"+L;H,(/+"+0+;9*4(<;"J<00-",()"" J,(D";4((+;9<4(-" •! B%o"-+()";0,--<M+)"<(G4*:,9<4("E<,"+:,<0" WM/04.02 S. 407 9+E/1<03b+J55;bccQQQT./012646>3<T.3c26>6d=4c6<5=B3?efXg_;<=45e<30J5?=0J3e14.e530J4=D0J3eH<6>34TJ52?+ !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$@"""1"""77"#$%&" WM/04-05 S. 407 Asking for the main demands on email management leads to many different answers ! Classification Forwarding Support during processing Structured repository Linking with other sources of information Retrieval Reducing server load Documentation WM/04.02 S. 408 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$A"""1"""77"#$%&" WM/04-05 S. 408 While enterprises aim to maintain a centralized storage system, processing and utilizing stored items is usually a decentralized task Response Management Systems support the classification and forwarding of emails on the basis of known competences, and provide assistance in formulating replies Information Management Systems help to organize a structured repository for the content emails entail, to establish connections with other sources of information and to facilitate retrieval Archiving systems store the content in correspondence with legal requirements while at the same time reducing the server load WM/04.02 S. 409 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$B"""1"""77"#$%&" WM/04-05 S. 409 Let’s consider an example! WM/04.02 S. 410 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%$"""1"""77"#$%&" WM/04-05 S. 410 Let me address Dr. Gesine Kustermann who has time pressure and an increasing work load Dr. Gesine Kustermann is CFO of DFKI, a well known research center in Kaiserslautern Beside many other issues she is responsible for the management of the carpool at DFKI which for many years are ordered via CarFS, a the financial service branch of a leading German car manufacturer Early in January there is a exposition and DFKI’s old van had an accident. So Gesine has to make sure that DKFI can use the new van by January 1st WM/04.02 S. 411 Gesine asked her contact partner at CarFS, a large financial service agency, to get the corresponding application form for the car insurance !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%%"""1"""77"#$%&" WM/04-05 S. 411 Let me further introduce Michael to you, a typical but fictive knowledge worker Michael Lenz works as an insurance specialist at CarFS in Hamburg and is the responsible contact person for all DFKI matters Although he likes to go into Christmas holidays soon, he knows that it is better to react on Gesine‘s request since she could be very annoying WM/04.02 S. 412 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%#"""1"""77"#$%&" WM/04-05 S. 412 The fictive enterprise CarFS intends to improve their processes respecting duration and cost effectiveness One aim at CarFS is to classify incoming documents at multiple channels and to route them to respecive clercks for further processing Situation: Manual indexing lead to bottlenecks Routing is based on individual knowledge of the staff members Search in various repositories and archives is based on full text search or pre-defined index terms Documents are filed as TIFF within a given document taxonomy Data enrichment is done manually WM/04.02 S. 413 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%?"""1"""77"#$%&" WM/04-05 S. 413 In this respect there are various important work packages to be considerered WM/04.02 S. 414 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%b"""1"""77"#$%&" WM/04-05 S. 414 CarFS has introduced the System CoMem, a corporate memory with an integrated multi-channel document recognition system E-Mail Fax-Server eInvoicing Call-Center Scanner WM/04.02 S. 415 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%_"""1"""77"#$%&" WM/04-05 S. 415 d("g<;H,+0p-"),<08"=*,;9<;+"9H+*+"<-","5H40+"JF(;H"4G" ;4::F(<;,9<4(",09+*(,9<E+-"94";4(-<)+*" c,L" Z,=+*":,<0" `,001`+(9+*" ]1g,<0" @ +.4;F:+(9-" <?xml .. (* ) WM/04.02 S. 416 Document Images (*): as Attachment Text Meta Data !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%&"""1"""77"#$%&" WM/04-05 S. 416 Before leaving for Christmas holidays, I have to finish the car insurance for Gesine ... and how to work with a corporate memory, how would it understand what I mean and how I think? WM/04.02 S. 417 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%@"""1"""77"#$%&" WM/04-05 S. 417 Imaginations without terms are blind and terms without imaginations are empty* Our environment consists of items, facts and events that are „real“ and determine our lives („what is going on“) Imagination evokes „vacations” is related to represents Symbol Reality Semiotic Triangle WM/04.02 S. 418 In order to express their thoughts, people use signs, symbols, or characters that may be understood by others („what I couch or explicate“) People reading texts put contents together and create their very individual imagination („what I mean“) * I. Kant (1724 – 1804) !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%A"""1"""77"#$%&" WM/04-05 S. 418 One approach is to look on the ideas of the Semantic Web that builds on predication and ontology to formally represent semantics A Theory of Ontology attempts to give answers to the question: What is there? (the Greek terms „ontos“ and „logos“ mean „to be“ and „word“) Aristotle defined a system of ten categories, such as substance, quality, quantity, where, when, ! A Theory of Predication tries to answer the question: What is it to say something about something? A subject is what a statement is about A predicate is what a statement says about its subject WM/04.02 S. 419 A common definition of an Ontology for Semantic Web researchers is an explicit, formal specification of a conceptualization < Tom Gruber, 1993 > !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%B"""1"""77"#$%&" WM/04-05 S. 419 An ontology provides a shared vocabulary to all participants to express facts about the world Subject Predicate Amellie de Hemp Object 5082403 has-phone-no <rdf:RDF> <rdf:Description rdf:about=“http://www.AAA-verlag.com/deHemp”> <has-phone-no> 5082403</has-phone-no> <rdf:Description> <//rdf:RDF> A fact is expressed as a SubjectPredicate-Object triple Subjects, predicates, and objects are given as names for entities, also called resources or nodes Entities represent something, a person, an appointment, a website, ! Names are URIs, which are global in scope, always referring to the same entity in any RDF document in which they appear The underlying structure of any knowledge can be viewed as a graph (of triples) consisting of nodes (subjects, objects) and labeled directed arcs (predicates) that WM/04.02 S. 420 link pairs of nodes !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#$"""1"""77"#$%&" WM/04-05 S. 420 Ontologies describe a particular vocabulary that can be used to describe aspects of real domains Document Classes Organizations Groups Persons Events Locations/Addresses The vocabulary may follow different “W-Dimensions” of knowledge (what, who, when, where, !) All workflow-relevant aspects of information can be described using a set of explicit categories The categories can be taken from other applications and formally represented using RDFS Appointments Topics Exemplary categories for describing the work context (in RDFS they are called schemata) WM/04.02 S. 421 Tasks !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#%"""1"""77"#$%&" WM/04-05 S. 421 Every document is extending the existing ontology by new facts works-for AAA Agenda #18 has-attachment DFKI sent-to has-address Amellie de Hemp Invitation #432 works-for sent-from Postfach 133500 69023 Heidelberg Germany Thomas Mustermann has-address has-task From: [email protected] Date: January 21, 2010 09:28:11 To: Amellie de Hemp <[email protected]> Subject: Invitation Semantic Desktop Course Dear Amellie: Attached please find the agenda for our next training course on the Semantic Desktop. We would be very glad to welcome you. has-date If you have any question, don‘t hesitate to contact me. haslocation Trippstadter Straße 122 67663 Kaiserslautern 09-02-10 Best regards, Thomas ---------------------------------Agenda.pdf Dr. Thomas Mustermann Head of CRM DFKI GmbH Trippstadter Straße 122, 67663 Kaiserslautern, Germany Phone: +49-631-20575-100 WM/04.02 S. 422 Email: [email protected] Please note that the text in the attachment may itself contain relationships to already available knowledge !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b##"""1"""77"#$%&" WM/04-05 S. 422 For this purpose, he uses the rich RDFS tool box allowing him to formally represent all aspects of information he needs Schemata describe classes of objects in the work context by a fix pattern WM/04.02 S. 423 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#?"""1"""77"#$%&" WM/04-05 S. 423 For this purpose, he uses the rich RDFS tool box allowing him to formally represent all aspects of information he needs Instances are exemplars or elements of a category havening individuals pattern values WM/04.02 S. 424 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#b"""1"""77"#$%&" WM/04-05 S. 424 For this purpose, he uses the rich RDFS tool box allowing him to formally represent all aspects of information he needs Between the concepts of the ontology there are qualified relations called properties Each instance has a is-a-relationship to its class,WM/04.02 i.e. it complies the defined S. 425 pattern !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#_"""1"""77"#$%&" WM/04-05 S. 425 For this purpose, he uses the rich RDFS tool box allowing him to formally represent all aspects of information he needs [email protected] Thomas Mustermann Thomas Email-Address Label First_Name Dr. Mustermann Mustermann Alt–Label Last–Name Alt–Label Alt–Label Dr. Thomas Mustermann T. Mustermann Attributes describe the possible labels WM/04.02 S. 426 of an instance !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#&"""1"""77"#$%&" WM/04-05 S. 426 For this purpose, he uses the rich RDFS tool box allowing him to formally represent all aspects of information he needs [email protected] Thomas Mustermann Thomas Email-Address Label First_Name Dr. Mustermann Mustermann Alt–Label Last–Name Alt–Label Alt–Label Musi T. Mustermann URI http://dfki.de/outlook/contacts/052361784 WM/04.02 S. 427 URI assures the uniqueness of a resource !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#@"""1"""77"#$%&" WM/04-05 S. 427 Employing URIs, an application- and platform-independent unique representation for all resources is created Each information item is a semantic web resource whether it is file (folder or document), an email constituent (i.e. message, sender, recipient, attachment), an address (...), or a calendar entry, ... All resources are identified by a URI (Uniform Ressource Identifier) http://www.AAA-verlag.com far a Website file://Documents/Courses/Agenda#18 for a file file://Documents/Courses/Invitations for a category http://dfki.de/outlook/contact/0019E177 for a contact WM/04.02 S. 428 imap://[email protected]/INBOX/;UID=3 for an Email outlook://appointment/00000000ECD4B99358B9814B9DA for a calender entry !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+) 6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#A"""1"""77"#$%&" WM/04-05 S. 428 Michael’s builds on an ontology provided by CoMem offering him a comprehensive domain vocabulary WM/04.02 S. 429 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9" !"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#B"""1"""77"#$%&" WM/04-05 S. 429 Recognition and linking of document contents in CoMem is done in three steps 1 Information Classification & Routing Mapping the document contents into the organizational structure 2 Information Extraction Mapping the document content into predifined message patterns 3 Information Integration Mapping the document content into the context of processes WM/04.02 S. 430 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?$"""1"""77"#$%&" WM/04-05 S. 430 %1D=43DD+;</03DD3D+ .353<2=43+B4/Q?3.>3+.3264.+ 64.+65+5J3+D623+5=23+;</.103+B4/Q?3.>3h" WM/04.02 S. 431 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?%"""1"""77"#$%&" WM/04-05 S. 431 6F-<(+--"=*4;+--+-"JF()0+"<(G4*:,9<4("<(",";H*4(404/<;,0"" ;4(9+L9" '"JF-<(+--"=*4;+--")+-;*<J+-","-+9"4G"9,-D-I"5H<;H")+=+()<(/"4("),9," -H4F0)"J+"=*4;+--+-"<(","P=*+1Q-=+;<M+)"4*)+*",()"5H<;H"M(,008" =*4E<)+",")+M(+)"*+-F09>"" c4*",;9<E,9<(/"=*4;+--+-I"9H+*+"<-"F-F,008",("4;;,-<4(I"+>/>",";,00"G4*"J<)-I" ,"*+fF+-9"4*",(",==0<;,9<4(" ],;H"=*4;+--"9,-D")+-;*<J+-",")+:,()"G4*"P:+9,Q"),9,"5H<;H"<-"(++)+)"G4*" )+;<-<4(":,D<(/"4*"=*4J0+:"-40E<(/" .4;F:+(9-",*+"-+(9"J,;D",()"G4*9H"94"*+fF+-94*"94")+0<E+*"P:+9,Q"),9,I"<>+>" 9H+8",*+"J,-+)"4("+,;H"49H+*" WM/04.02 S. 432 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?#"""1"""77"#$%&" WM/04-05 S. 432 h4*Dl45"=*4;+--+-",*+"+L=+;9<(/"<(G4*:,9<4("9H,9"<-" -+*E+)"J8":F09<1;H,((+0")4;F:+(9-" 87% Study d(E+-9</,9<4("4G"bA")<\+*+(9" =*4;+--+-"<(",("<(9+*(,9<4(,0",;9<4(" M(,(;<,0"-+*E<;+";4:=,(8+ 100% (48) 29% 54% M=4.=4>D+ d("/+(+*,0I"5+";,(")<\+*+(9<,9+" J+95++("954"98=+-"4G"<(G4*:,9<4("<(" )4;F:+(9-" A?o"<(<9,9+"(+5"=*4;+--+-" _Ao"*+0,9+"94"5,<9<(/"=*4;+--+-" 38% 4% 13% 2% A@o"4G",00"=*4;+--+-",*+"9*<//+*+)"J8" <(;4:<(/":F09<1;H,((+0")4;F:+(9-" WM/04.02 S. 433 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b??"""1"""77"#$%&" WM/04-05 S. 433 CoMem provides strong support for Michael in order to work more effective and to get active support Michael enters each new process in his workflow management system When activating the process a respecting task sequence is proposed to Michael *+,-./-% 1234.567/%8+,97,7:;7/% !"#$!#$$% 0$#0$#$!% 0% & % ' % & % ' % ( % ) % ) % & % ' % & % ' % ( % ) % ) % & % ' % & % Von: [email protected] Datum: 20. Dezember 2011 09:28:11 MEZ An: [email protected] Betreff: Kfz-Versicherung Sehr geehrte Frau Dr. Kustermann, als Anlage übersende ich Ihnen den Antrag für eine KfzVersicherung. Ich bitte Sie, den Antrag auszufüllen und bis 23.12.11 an uns zurück zu schicken. Für Fragen stehe ich Ihnen gerne zur Verfügung. Beste Grüße, M. Lenz ---------------------------------Michael Lenz Car Financial Services AG Kfz Versicherung Schmalbachstr. 1, 38112 Braunschweig Tel.: +49 (531) 212 – 83 212 Fax: +49 (531) 212 – 83 215 1<.,;.6=97,:>5;%87,?#% @/;,.-%'(HI% @/;,.-3A,+B7==:/=;./B% (C66:-D% A,:+% @/;,.-=?+,E#%87,=7/F7/% ':%!0#0$#$!% @/;,.-%7,?.==7/% $% @/;,.-%97.,97:;7/% (,%!2#0$#$!% $% WM/04.02 S. 434 Michael may set deadlines and priorities G% (,%!2#0$#$!% Kfz-A2K.pdf Clicking into the respective check box results in an email that is composed of pre-defined text patterns complemented by respective meta data !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?b"""1"""77"#$%&" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+" WM/04-05 S. 434 Sending out Michael’s email leads to the initialization of a taskoriented workflow Von: [email protected] Datum: 20. Dezember 2011 09:28:11 MEZ An: [email protected] Betreff: Kfz-Versicherungsantrag @/;,.-=?+,E#%87,=7/F7/% Sehr geehrter Frau Dr. Kustermann, als Anlage übersende ich Ihnen den Antrag für eine KfzVersicherung. Ich bitte Sie, den Antrag auszufüllen und bis 23.12.11 an uns zurück zu schicken. &:>5.76%K7/B% Für Fragen stehe ich Ihnen gerne zur Verfügung ',#%L7=:/7%H<=;7,E.//% Beste Grüße, !0#$!#!0$$%0MD!ND$$%&J4% M. Lenz ---------------------------------Michael Lenz Car Financial Services AG Kfz Versicherung Schmalbachstr. 1, 38112 Braunschweig Tel.: +49 (531) 212 – 83 212 Fax: +49 (531) 212 – 83 215 Kfz-A2K.pdf Icons allow to !2#$!#!0$$%00D00D00%&J4% access the H?B%*7,=:>57,</-% original ###% resources WM/04.02 S. 435 The task to be processed is instantiated by bit of information available from CoMem !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?_"""1"""77"#$%&" WM/04-05 S. 435 When CoMem was introduces, CarFS has defined rule bases supporting the automatic routing Sorting rules allow to idetify the corresponding inbox of the document Routing rules define the respective clerk based on the sender, the content, the skill or pre-defined priorities Sorting Rules + Routing Rules + An integrated workload control directs routing and and avoids long processing times Depending on the class of a incoming document, tasks are generated via the running process and delivered to the task context at each workspace together with extracted as well as enriched WM/04.02 S. 436 information ... but how does this work? !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?&"""1"""77"#$%&" WM/04-05 S. 436 The new corporate memory of CarFS provides a socio-technical work environment Subsequent tasks within the workflow are enriched with available information Some tasks labeled by a are taken over and solved by CoMem For that purpose, labeled open tasks are send to a task server WM/04.02 S. 437 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?@"""1"""77"#$%&" WM/04-05 S. 437 The task server manages all open tasks and provides expectation patterns to be “verified” by the document analysis Multi-Channel Document Analysis Workload Control Sorting Rules Open Tasks Bestandssysteme ERP System Routing Rules CarFS WM/04.02 S. 438 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?A"""1"""77"#$%&" WM/04-05 S. 438 When Gesine‘s email arrives at CarFS, it is first archived using a URI and consequently split for further processing CarFS Archive Antrag #18 Von: Gesine Kustermann <[email protected]> [email protected]> Datum: 23. Dezember 2011 11:33:17 MEZ An: [email protected] Betreff: Re: Kfz-Versicherung Hallo Herr Lenz, hat-Anlage Vielen Dank für die Übersendung der Unterlagen. Im Attachment finden Sie den ausgefüllten und eingescannten Antrag mit der Bitte um zügige Bearbeitung. Using information extraction Vielen Dank und frohe Weihnachten, G.Kustermann ------Dr. Gesine Kustermann Kaufm. Geschäftsführung DFKI GmbH Trippstadter Straße 122 D-67663 Kaiserslautern Germany Phone +49-631-200-75 -801 Fax +49-631-200-75-800 Email [email protected] Using document image analysis and understanding Von: Gesine Kustermann <[email protected]> <[email protected] [email protected]> [email protected] Datum: 22. Dezember 2011 11:33:17 MEZ An: [email protected] Betreff: Re: Kfz-Versicherung Hallo Herr Lenz, Vielen Dank für die Übersendung der Unterlagen. Im Attachment finden Sie den ausgefüllten und eingescannten Antrag mit der Bitte um zügige Bearbeitung. Vielen Dank und frohe Weihnachten, G.Kustermann ------Dr. Gesine Kustermann Kaufm. Geschäftsführung DFKI GmbH Trippstadter Straße 122 D-67663 Kaiserslautern Germany Phone +49-631-200-75 -801 Fax +49-631-200-75-800 Email [email protected] Kfz-A2K.pdf WM/04.02 S. 439 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?B"""1"""77"#$%&" WM/04-05 S. 439 Let us first deal with emails! WM/04.02 S. 440 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bb$"""1"""77"#$%&" WM/04-05 S. 440 Emails are analyzed by a multi-step approach in order to get the relevant data Metadata Extraction Classification Information Extraction Verification Workload Control Sorting Rules Open Tasks Bestandssysteme ERP System Routing Rules CarFS WM/04.02 S. 441 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bb%"""1"""77"#$%&" WM/04-05 S. 441 Michael makes use of an ontology-based document understanding system helping him to extract relevant facts Von: Gesine Kustermann <[email protected]> Datum: 22. Dezember 2011 11:33:17 MEZ An: [email protected] Betreff: Re: Kfz-Versicherung Hallo Herr Lenz, Vielen Dank für die Übersendung der Unterlagen. Im Attachment finden Sie den ausgefüllten und eingescannten Antrag mit der Bitte um zügige Bearbeitung. Incoming Email Vielen Dank und frohe Weihnachten, G.Kustermann Ontological Knowledge ------Dr. Gesine Kustermann Kaufm. Geschäftsführung DFKI GmbH Trippstadter Straße 122 D-67663 Kaiserslautern Germany Phone +49-631-200-75 -801 Fax +49-631-200-75-800 Email [email protected] Ontology-Based Document Understanding New Facts WM/04.02 S. 442 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bb#"""1"""77"#$%&" WM/04-05 S. 442 Information extraction stepwize transforms the contents of documents into knowledge relating it to the existing ontology Segmentation of the text into paragraphs, sentences, and words Identification of potential attributes Matching of attributes with known classes and instances Description of the document using intrinsic text features Document type determination WM/04.02 S. 443 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bb?"""1"""77"#$%&" WM/04-05 S. 443 In the Segmentation phase plain text should be segmented into hierarchical lexical units Implementing this task is rather trivial for European languages, by separating white spaces from non white spaces. Despite this, in Chinese or Japanese it is not evident from the typography where word boundaries are An exception is given when analyzing paper documents Input: plaintext Output: segment hierarchy document paragraph paragraph sentence token sentence token token sentence token token token token token WM/04.02 S. 444 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bbb"""1"""77"#$%&" WM/04-05 S. 444 Segmentation - Example: Example of GATE* b.) White space segmentation WM/04.02 S. 445 a.) Paragraph extraction !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bb_"""1"""77"#$%&" WM/04-05 S. 445 * Source: * Source: http://gate.ac.uk North, 2000 Symbolization extracts relevant entities concerning structure and content Relevant tokens are named or structured entities given in text sequences In order to resolve ambiguities, Part-Of-Speech (POS) Tagger and Parser for identifying grammatical items, such as nouns, verbs, adjectives, or adverbs Input: token sequences Output: entities Matching token sequences against glossaries listing names of typed entities such as cities or persons, or using Hidden Markov Models that have been trained with annotated corpora Regular expressions are often used to recognize structured entities (such as addresses). Part-Of-Speech Tagger (POS) annotates token sequences of sentence as corresponding to a particular part of speech Based on token sequences, POS annotations, and grammar rules, WM/04.02 S. 446 a parser is able to extract coherent phrases of tokens (e.g., German Research Center for Artificial Intelligence) !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bb&"""1"""77"#$%&" WM/04-05 S. 446 Symbolization - Example (POS-Tagging)*: In the few short years of its existence, Google has come a long way. In/IN the/DT few/JJ short/JJ years/ NNS of/IN its/PRP$ existence/NN ,/, Google/NNP has/VBZ come/VBN a/DT long/JJ way/NN./. WM/04.02 S. 447 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bb@"""1"""77"#$%&" WM/04-05 S. 447 * tagged with JTextPro*: A Java-based Text Processing Toolkit http://jtextpro.sourceforge.net Symbolization - Patterns for Structured Entities Expression for matching dates (e.g., 2008/02/02) Rule: Date ( {Token.kind == number} {Token.string == "/"} {Token.kind == number} {Token.string == "/"} {Token.kind == number} ):date --> :date.TempDate = {kind = <DATE>} WM/04.02 S. 448 Written in JAPE - Java Annotation Patterns Engine. (http://gate.ac.uk/sale/tao/#chap:jape) !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bbA"""1"""77"#$%&" WM/04-05 S. 448 Symbolization - Example (Entity Recognition)*: WM/04.02 S. 449 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bbB"""1"""77"#$%&" WM/04-05 S. 449 * Source: http://gate.ac.uk In the Instantiation phase, entities as references for possible real world instances, roles, or actions have to be resolved If several entities refer to the same instance in terms of multiple occurrences, pronouns, acronyms, or other abbreviations, a co-referencing analysis performs a unification. Input: entities Output: instances, relations(roles and actions) During Instance Resolution, entities are resolved as one or many instances (e.g., “George Bush” -> President: George Bush Senior | President: George Bush Junior) Relation Resolution resolves relevant roles and actions (e.g., ! “is member of” ! -> rel:employedIn | rel:projectMember ) The co-reference analysis unifies references of single instances in multiple sentences by creating reference chains (e.g., Peter Parker came in. His suite was disrupted. Peter looked angry and WM/04.02 S. 450 studied the latest news about Spiderman.) !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_$"""1"""77"#$%&" WM/04-05 S. 450 Instantiation - Example*: WM/04.02 S. 451 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_%"""1"""77"#$%&" WM/04-05 S. 451 * Source: http://gate.ac.uk ! back to the example! WM/04.02 S. 452 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_#"""1"""77"#$%&" WM/04-05 S. 452 Metadata in the header of the email are extracted and the subject is used as a first orientation point for classification arbeitet-für DFKI Antrag #18 gesendet-an hat-Adressse hat-Anlage Dr. Gesine Kustermann Trippstadter Straße 122 67663 Kaiserslautern CarFS arbeitet-für gesendet-von gesendet-von Michael Lenz hat-Anlage gesendet-an hat-Adressse bearbeitet-Vorgang Von: Gesine Kustermann <[email protected]> [email protected]> Datum: 22. Dezember 2011 11:33:17 MEZ An: [email protected] Betreff: Re: Kfz-Versicherung Antrag DFKI #241 Hallo Herr Lenz, Vielen Dank für die Übersendung der Unterlagen. Im Attachment finden Sie den ausgefüllten und eingescannten Antrag mit der Bitte um zügige Bearbeitung. Von: Vielen Dank und frohe Weihnachten, G.Kustermann ------Dr. Gesine Kustermann Kaufm. Geschäftsführung DFKI GmbH Trippstadter Straße 122 D-67663 Kaiserslautern Germany Phone +49-631-200-75 -801 Fax +49-631-200-75-800 WM/04.02 S. 453 Email [email protected] An: hatTermin Dr Gesine Kustermann Schmalbachstr. 1, 38112 Braunschweig 23.12.11 Michael Lenz Zeit: 22.12.2011 11:33 Uhr Kfz-Versicherung !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_?"""1"""77"#$%&" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+" WM/04-05 S. 453 Subsequently, potential symbols are recognized and it is checked whether they fit into the existing knowledge arbeitet-für DFKI Antrag #18 gesendet-an hat-Adressse hat-Anlage Dr. Gesine Kustermann Trippstadter Straße 122 67663 Kaiserslautern arbeitet-für gesendet-von gesendet-von Michael Lenz hat-Anlage gesendet-an hat-Adressse bearbeitet-Vorgang Von: Gesine Kustermann <[email protected]> [email protected]> Datum: 22. Dezember 2011 11:33:17 MEZ An: [email protected] Betreff: Re: Kfz-Versicherung Antrag DFKI #241 Hallo Herr Lenz, e.g. Vielen Dank für die Übersendung der Unterlagen. Im Attachment finden Sie den ausgefüllten und eingescannten Antrag mit der Bitte um zügige Bearbeitung. Von: Vielen Dank und frohe Weihnachten, G.Kustermann ------Dr. Gesine Kustermann Kaufm. Geschäftsführung DFKI GmbH Trippstadter Straße 122 D-67663 Kaiserslautern Germany Phone +49-631-200-75 -801 Fax +49-631-200-75-800 WM/04.02 S. Email gesine.Kustermann@dfki CarFS An: Dr Gesine Kustermann hatTermin Schmalbachstr. 1, 38112 Braunschweig 23.12.11 Michael Lenz Zeit: 22.12.2011 11:33 Uhr 454 Kfz-Versicherung !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_b"""1"""77"#$%&" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+" WM/04-05 S. 454 Based on these extensions, new relations (properties) may be extracted and incorporated into the ontological context of the message arbeitet-für DFKI Antrag #18 gesendet-an hat-Adressse hat-Anlage Dr. Gesine Kustermann Trippstadter Straße 122 67663 Kaiserslautern CarFS arbeitet-für gesendet-von gesendet-von Michael Lenz hat-Anlage Von: Gesine Kustermann <[email protected]> Datum: 22. Dezember 2011 11:33:17 MEZ An: [email protected] [email protected] Betreff: Re: Kfz-Versicherung gesendet-an hat-Adressse Antrag??? bearbeitet-Vorgang Antrag DFKI #241 Hallo Herr Lenz, Vielen Dank für die Übersendung der Unterlagen. Im Attachment finden Sie den ausgefüllten und eingescannten Antrag mit der Bitte um zügige Bearbeitung. Von: Vielen Dank und frohe Weihnachten, G.Kustermann ------Dr. Gesine Kustermann Kaufm. Geschäftsführung DFKI GmbH Trippstadter Straße 122 D-67663 Kaiserslautern Germany Phone +49-631-200-75 -801 Fax +49-631-200-75-800 WM/04.02 S. Email gesine.Kustermann@dfki An: hatTermin Dr Gesine Kustermann Schmalbachstr. 1, 38112 Braunschweig 23.12.11 Michael Lenz Zeit: 22.12.2011 11:33 Uhr 455 Kfz-Versicherung The email content is related to the existing knowledge via semantic hyperlinks !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b__"""1"""77"#$%&" !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+" WM/04-05 S. 455 How to deal with the attachments? WM/04.02 S. 456 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_&"""1"""77"#$%&" WM/04-05 S. 456 For the analysis of attachments (image documents) a different processing is necessary Form Definitionen Image Processing Classification Information Extraction Verification Workload Control Sorting Rules Open Tasks Bestandssysteme ERP System Routing Rules CarFS WM/04.02 S. 457 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_@"""1"""77"#$%&" WM/04-05 S. 457 A state-of-the-art solution should provide the following Image Filter (Re-Segmentation) Scanner Skew Correction Upside-Down Correction Line Filter Blind Color Recognition WM/04.02 S. 458 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_A"""1"""77"#$%&" WM/04-05 S. 458 For document understanding, we may apply several analysis strategies at the same time Antrag auf Kfz-Versicherung 1. Layout-based Rerecognition (without OCR) ! very fast 2. Search patterns Extraction of known terms/titles/phrases 3. Check boxes labels allowing to mark the existence of different features 4. Form identifiers regular expressions at fix locations on form with same/similar layout WM/04.02 S. 459 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_B"""1"""77"#$%&" WM/04-05 S. 459 The scanned pages need to be reorganized as part of a document Scan Flow Scanner Page Classification Kfz-A2K Page 1 Document Formation Kfz-A3F Page 1 Kfz-A3F Page 2 ... Document Classification Kfz-A2K Page 1 WM/04.02 S. 460 Kfz-A3F Page3 ... Kfz-A3F Page 1+2+3 Note: In practice documents are often physically separated by an empty paper (the content of envelopes Document Understanding !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&$"""1"""77"#$%&" WM/04-05 S. 460 Document Understanding aims to find logical objects (semantic entities) within the document image Name of Medical Doctor Address of Medical Doctor Name of Patient Name of Assured Insurance Number Company Database Invoice-No. Tabular Information Service Dates GOÄ-Numbers WM/04.02 S. 461 Date of Invoice Factors Single Amounts Total Amount Diagnosis Example !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&%"""1"""77"#$%&" WM/04-05 S. 461 Transforming Data into Knowledge Image Objects Layout Structure Image Characters DATA „d“ „S“ „2“ WM/04.02 S. 462 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&#"""1"""77"#$%&" WM/04-05 S. 462 WM/04.02 S. 463 © [email protected] - 2009 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()* +(,'*##(-*,!*++1""=>"b&?"""1"""77"#$%&" WM/04-05 S. 463 Transforming Data into Knowledge Image Objects Layout Structure Image Characters DATA Document Understanding „d“ „S“ „2“ INFORMATION KNOWLEDGE Words Information WM/04.02 S. 464 Presentation ! Sender ! Recipient ! Date ! Reference ! Signature ! ,,, Processes ! Offer ! Order ! Invoice Company Data ! ,,, Logical Objects Message Types !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&b"""1"""77"#$%&" WM/04-05 S. 464 Are there different ways for categorizing printed documents? WM/04.02 S. 465 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&_"""1"""77"#$%&" WM/04-05 S. 465 Categorization – An Example Love Letter Tax Form Delivery Note ? Invoice Cheque Order Offer WM/04.02 S. 466 Report !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&&"""1"""77"#$%&" WM/04-05 S. 466 Remember the model for automatic classification Unknown Document Sample Documents Das Bild kann nicht angezeigt werden. Dieser Computer verfügt möglicherweise über zu wenig Arbeitsspeicher, um das Bild zu öffnen, oder das Bild ist beschädigt. Starten Sie den Computer neu, und öffnen Sie dann erneut die Datei. Wenn weiterhin das rote x angezeigt wird, müssen Sie das Bild möglicherweise löschen und dann erneut einfügen. Attribute Extraction (1) Transformation (2) Das Bild kann nicht angezeigt werden. Dieser Computer verfügt möglicherweise über zu wenig Arbeitsspeicher, um das Bild zu öffnen, oder das Bild ist beschädigt. Starten Sie den Computer neu, und öffnen Sie dann erneut die Datei. Wenn weiterhin das rote x angezeigt wird, müssen Sie das Bild möglicherweise löschen und dann erneut einfügen. Attributes Transformation (A) Attribute-Value Representation (Vectors) Attribute-Value Representation Attribute-Value Learning (3) Attribute-Value Representation (Classifier(s)) Classifier Application (B) Categories (1)! Extraction of relevant features from a set of representative sample documents (2)! Transformation of documents into attribute-value representation based on the identified features (3)! Construction of classifier using the attribute-value (attribute WM/04.02 representation S. 467 value learning) (A) Transformation of unknown documents into respective attribute-value representation (B) Application of classifier in order to assign the document as belonging to one of the given tasks !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&@"""1"""77"#$%&" WM/04-05 S. 467 There are various modes for categorizing printed documents!? WM/04.02 S. 468 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&A"""1"""77"#$%&" WM/04-05 S. 468 Mixed document stacks require different methods Layout features Textual features Tabular features Search Pattern Format features Special indicators WM/04.02 S. 469 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&B"""1"""77"#$%&" WM/04-05 S. 469 How can we use the inherent characteristics of the document layout? WM/04.02 S. 470 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@$"""1"""77"#$%&" WM/04-05 S. 470 Layout guides a reader’s attention Layout is a valuable orientation with helps to drive our attention There are some characteristics which might be useful Each black pixel in an unfilled document “form” is also black in the filled one The filled form contains more black pixels A white pixel in the filled form is white in an empty one ? = An image comparison is not sufficient because scan shifts, translation and rotation do not allow to just subtract net and WM/04.02 S. 471 gross image !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@%"""1"""77"#$%&" WM/04-05 S. 471 Generating layout reference pattern Weighting the individuality of the document layout structure leads to reference pattern for classification Use the net image (only preprinted information) Consider line and block segments in a document image instead of the image itself For each single document of the training set do ! ! measure the degree of relevancy of a text block based on its geometric features For all document of the training set do ! ! compute the degree of individuality of a block for a single document WM/04.02 S. 472 Take result as a class exemplar (reference pattern) !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@#"""1"""77"#$%&" WM/04-05 S. 472 Simple Attribute-Value Representation of Layout Attribute : (x1, y1); (x2, y2); 5(...); .. : (x1, y1); (x2, y2); 2(...); .. : (x1, y1); (x2, y2); 1(...); .. : Value : 0 0 : 0,98 0 : 0,74 : 0,09 0 WM/04.02 S. 473 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@?"""1"""77"#$%&" WM/04-05 S. 473 “Form” Classification using Layout Features Finding the right reference pattern for an unknown document is rather a search problem than a classification task Consider line and block segments in a image of the unknown document Measure similarity of unknown document to reference patterns by finding appropriate text block counterparts (net vs. gross image) Block similarity is computed by a fuzzy match of the geometric features translation & skew relative positions multiple candidates constraint satisfaction WM/04.02 S. 474 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@b"""1"""77"#$%&" WM/04-05 S. 474 Document classes can be learned just by the difference in their layout Take a set of „unfilled“ document forms to train the system System establishes layout reference patterns by its own ! ! independent from OCR (text) data ! independent of number of document classes System classifies unknown documents according to reference patterns Phase1 Sample Document Phase2 Unknown Document Learning Knowledge Base Classification Knowledge Base Purchaser Product-ID Number Prize WM/04.02 S. 475 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@_"""1"""77"#$%&" WM/04-05 S. 475 The initial training contains the definition of regions of interest Scanning of an sample document Specify ID of reference pattern > AOK-Application Generate net image (e.g. by editor) Itemize “regions of interest“ by drawing rectangle with the mouse and specify region > Adresse > Vers.-No. > Bank > Kontonummer > Bankleitzahl WM/04.02 S. 476 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@&"""1"""77"#$%&" WM/04-05 S. 476 Let’s complement the layout characteristics with text features? WM/04.02 S. 477 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@@"""1"""77"#$%&" WM/04-05 S. 477 How to use the text for categorization? WM/04.02 S. 478 AppleS urce Date:7/1-D/-D4-7/10/04 Inv-.ice. No. AS-] Ta: Perm FjeIInwn 1999 Waadside Or Medford. OP 97501 far: Website Creatian Services d &oed s Category Cast Deseri tion Time Amount ~&raphic Design 50/hour -Corporate Identity Creation 08.00.00 $100.00 3 logos based on color scheme, printable and digital formats, letterheads and business cards Category Cost Description Time Amount Service Call $50/hour Server Setup d Config 04.00.00 $200.00 Category Cost De seription Time Amount Saf#k~are Design X50/hour -Create In.#egrated Web Apps 09.00.00 $45000 Form Processor, Mortgage Calculator, Newsletter System, Installations Category Cost Description Time Amount Web Canstructian $501nour Create Website Templates 04.30.00 $225.00 Design navigation. header. footer. and content stylesheets Web –Canstructian $50} hour -Customized E-Commerce System 08.00.00 $400.00 Category Cast Description Count Amount Flat Fee Item $18.00 Website Single-Page 20 $360.00 Time Subtotal 33.30.00 $2035.D0 Lxpenses Category Description Arrount Expense 3 CDs Overnight FedEx $16.90 Expense 5DD Business -Cards, glossy coot finish $95.00 Expense Domain Registration X8.95 Subtotal $]2085 Invoice Total : $2155.05 Thank you far your business. Perry' Payable within 15 days. !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@A"""1"""77"#$%&" WM/04-05 S. 478 Simple Attribute-Value Representation of Text (see Chapter 1) Sehr geehrte Damen und Herren, Für unsere Bemühungen erlauben wir uns 150 Euro zu berechnen Mit freundlichen Grüßen Attribute : aber als : erlauben erfassen : freundlichen geehrte : zu zurück Value Value : 0 : 0 0 : 0 1 : 0 7 0 0 1 0 1 5 : 11 1 : 0 2 0 Words define an index for a vector of values which has the dimension given by the number of words WM/04.02 S. 479 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@B"""1"""77"#$%&" WM/04-05 S. 479 And what about OCR errors? WM/04.02 S. 480 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bA$"""1"""77"#$%&" WM/04-05 S. 480 OCR errors may be considered by including an edit distance between two words Definition of an edit distance between words: If A is an alphabet of characters and A* is a set of words in A, the Levenshtein distance of two words S = s1s2…sn ! A*; n ! 0 and T = t1t2…tm ! A*; m ! 0. is the minimal sum of all elementary edit operations necessary to transform S into T An edit distance must address different types of OCR errors, i.e. substitutions, insertions, and deletions All corresponding costs CostSub, CostIns and CostDel have a unique value of 1 allowing to stepwise increasing the cost value while searching WM/04.02 S. 481 for an optimal Levenshtein distance between two words !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bA%"""1"""77"#$%&" WM/04-05 S. 481 The Levenshtein distance is defined by the minimal edit costs: Thus, every string S can be transformed into another string T Example: A = {A, C, G, T}, S = GATAAGAA, and T = GATTACA GAT (A " T) A(G " C)A(A " !) = GATTACA Note: This is one possible transformation of S into T by three edit operations. In general, there is more than one possible transformation Considering the length of the strings to be transformed we use a normalized distance measure Lev norm ( w1 , w1 ) = WM/04.02 S. 482 Lev( w1 , w1 ) max{| w1 |, |w 2 |} !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bA#"""1"""77"#$%&" WM/04-05 S. 482 But are statistical techniques sufficient for all types of printed documents? WM/04.02 S. 483 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bA?"""1"""77"#$%&" WM/04-05 S. 483 WM/04.02 S. 484 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bAb"""1"""77"#$%&" WM/04-05 S. 484 WH+"+:=048:+(9"4G")4:,<(1-=+;<M;"=*4=+*"(,:+-"<-"," ;*<9<;,0",-=+;9" @/26=4+"45/?/>7+*F62;?3+ : OPSYS : : Apple iOs (...) Bada (...) Blackberry OS (...) Brew (...) RATE ... (...) : ... (...) : PRODUCER Apple (...) BlackBerry (...) HTC (...) Nokia (...) : Samsung Windows Phone (...) WM/04.02 S. 485 (...) List of lexical variations !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bA_"""1"""77"#$%&" WM/04-05 485Analysis * Sentiment DetectionS.and Y-<(/","2*4F()"W*F9H"-+9"4G"+L+:=0,*8")4;F:+(9-"G4*"4(+"98=+" ,0045-"94",F94:,9<;,008"0+,*("*F0+-" eF0+-"-H4F0)"J+"+:=048+)"G4*">>>" ;0,--<M;,9<4("4G")4;F:+(9"98=+"5H<;H"<-"+--+(9<,0"G4*"9H+"=4-91 4*)+*+)"*4F9<(/" ]L9*,;9<4("4G"*+0+E,(9"-9,9+:+(9-"5H<;H"-<:=0<G8"9H+" F()+*-9,()<(/"4G"9H+":+--,/+" c4*"=*+1=*4;+--<(/"9H+")4;F:+(9-"4G"9H+"/*4F()"9*F9H"-+9I"9H+")4:,<(" :4)+0"<-";4(-<)+*+)" Ground Thruth Domain Text Prepocessing : : : Proper Names Normalized (neutralized) Text WM/04.02 S. 486 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bA&"""1"""77"#$%&" WM/04-05 486Analysis * Sentiment DetectionS.and Text Preprocessing WH+"-+9"4G"/*4F()"9*F9H"+0+:+(9-"<-"=*+=,*+)"G4*"0+,*(<(/" q(+F9*,0r"<()<;,94*-"q(+F9*,0<-<+*9r" Da mir Ihre Preise für Complete Mobile S ohnehin zu hoch sind und es für Mac iOS katastrophal funktioniert, kündige ich den Vrtrg. MOB/238-143 zum 30. Juni 2012. Ich bitte Sie, mir die bereits bezahlte Rate umgehend auf das Konto 100123456 zu überweisen da mir ihre preise für complete mobile s ohnehin zu hoch sind und es für mac ios katastrophal funktioniert kündige ich den vrtrg. mob/238-143 zum 30 juni 2012 ich bitte sie mir die bereits bezahlte rate umgehend auf das konto 100123456 zu überweisen da mir ihre preise für RATE ohnehin zu hoch sind und es für OPSYS katastrophal funktioniert kündige ich den vrtrg CONTRNO zum DATE ich bitte sie mir die bereits bezahlte rate umgehend auf das konto ACOUNTNO zu überweisen Elimination of ... ... punctuation marks ... capitals ... Hyphenations ... delimiters of short cuts Introduction of placeholders for ... ... proper names ... dates ... amounts ... other identities WM/04.02 S. 487 Indicator-Learning !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bA@"""1"""77"#$%&" WM/04-05 S. 487 Y-<(/"9H+"(+F9*,0<a+)"9+L9I"5+":,8"0+,*("*F0+-"G4*" ;0,--<G8<(/"9H+")4;F:+(9"98=+" Ground Thruth Domäne Text Preprocessing : : : Eigennamenlisten Normalized (neutralized) Text Indicator-Learning Bag-of-Words Approach + Word Distance + Edit Distance Set of Indicators Rule-Learning Stemming + Thesaurus { kwij mit kwij = [kwi1, kwi2, ..., kwin] für 1<i"m} WM/04.02 S. 488 Set of Rules (incl. Measure of Belief (MoB)) Application of rules is intuitive !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bAA"""1"""77"#$%&" WM/04-05 488Analysis * Sentiment DetectionS.and Examples Refund Damage Message Cancelation - my [2] live insurance - please [2] to - quit :lev 1 - cancellation :lev 2 - next possible [2] date - transfer [2] the - me [3] to - confirm [3] the : WM/04.02 S. 489 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bAB"""1"""77"#$%&" WM/04-05 S. 489 Categorizing real documents demands for many requirements! WM/04.02 S. 490 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bB$"""1"""77"#$%&" WM/04-05 S. 490 From the very beginning a document understanding system can be trained to solve a certain problem Aspects to be considered: Categories and document classes Definition of classification features What to do with „not classified“ documents? Specification of value ranges (domains) for regions of interest Identification of search patters and mathematical or logical constraints What to do if information is missing? Definition of special features fro multipage documents Definition records WM/04.02 of S. 491 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bB%"""1"""77"#$%&" WM/04-05 S. 491 Records allow multi-source verification of results Note: In document understanding even incomplete information in documents is completed through the data base Enterprise data base WM/04.02 S. 492 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bB#"""1"""77"#$%&" WM/04-05 S. 492 Example of a document model editor WM/04.02 S. 493 !"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bB?"""1"""77"#$%&" WM/04-05 S. 493