Lecture 23rd May - AG Wissensbasierte Systeme

Transcrição

Lecture 23rd May - AG Wissensbasierte Systeme
Collaborative Intelligence
- Lecture SS 2016 -
Prof. Dr. Andreas Dengel
WM/04.02 S. 376
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?@&"""1"""77"#$%&"
WM/04-05 S. 376
Collaborative Intelligence focuses on the support of
knowledge workers within socio-technical networks
Web
Documents
(Paper, Fax, Email, eDocuments)
Social Network
<?xml ..
@
Interaction
Visualization
User Model
Document
Analysis
Recommender
Ontologies
Index
Information
Extraction
Indexing
Classification
Search
WM/04.02 S. 377
other socio-technical participants
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?@@"""1"""77"#$%&"
WM/04-05 S. 377
Collaborative Intelligence focuses on the support of
knowledge workers within socio-technical networks
Chapter 1:
Search & Classification
Chapter 2:
Attention-based Collaborative Intelligence
Chapter 3:
Recommender Systems
Chapter 4:
Proactive Multi-Channel Information Extraction
Chapter 5:
Usability in Collaborative Systems
Chapter 6:
Social Media Monitoring, Discovery & Forecast
WM/04.02 S. 378
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?@A"""1"""77"#$%&"
WM/04-05 S. 378
Chapter 4
Pro-Active Multi-Channel
Information Extraction
WM/04.02 S. 379
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?@B"""1"""77"#$%&"
WM/04-05 S. 379
C44D<(/",9"E,*<4F-"-9,(),*)"-4F*;+-"0+,)-"94"-4:+"<)+,-"4G"
5H,9",")4;F:+(9"<-"
$+./012345+5/.67+267+839+:"
,"=<+;+"4G"=,=+*I"J44D0+9I"+9;>I"=*4E<)<(/"<(G4*:,9<4(I"
+-=+;<,008"4G",("4K;<,0"4*"0+/,0"(,9F*+"
"
,"=<+;+"4G"9+L9"4*"9+L9",()"/*,=H<;-"-94*+)"4(",";4:=F9+*",-","
M0+"G4*":,(<=F0,9<4("J8")4;F:+(9"=*4;+--<(/"-4G95,*+"
"
+E<)+(;+"4*","=*44G"
;6;3<+
.=>=56?
?3>6?
$+./012345+5/.67+267+8399+:"
,"5*<99+("4*")*,5("*+=*+-+(9,9<4("4G"9H4F/H9-"
"
,"9+L9F,0"M0+",04(/"5<9H"<9-"-9*F;9F*+",()")+-</("PG4(9-I";404*-I"
,()",))<9<4(,0"<:,/+-Q"
"
WM/04.02
S. 380
,"5*<99+("=*44G"F-+)",-"+E<)+(;+"
;6;3<
.=>=56?
?3>6?
N! 74F*;+- "O""""9H+G*++)<;9<4(,*8>;4:"
"OO"5<D<=+)<,>4*/"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?A$"""1"""77"#$%&"
"
WM/04-05 S. 380
R(+"M*-9"=+*-=+;9<E+"G*4:"5H<;H",")4;F:+(9";,("J+"E<+5+)"
<-"9H+"4(+"4G"J+<(/",(",*9<G,;9"
'")4;F:+(9"<-",(89H<(/"9H,9"5+";,("*+,)",()"5H<;H"*+0,9+-"94"-4:+"
,-=+;9"4G"9H+"-4;<,0"54*0)"
1
'*9<G,;9"
=H8-<;,008",()"-4;<,008"
'0045-"ML<(/":+,(<(/"<(","
-9,J0+":+)<F:"-+*E<(/"HF:,("
(++)-"G4*","=+*<4)"4G"9<:+"
'0045-"(+;+--,*8";H,(/+-"94"
*+:,<("<("-8(;"5<9H",";H,(/<(/"
54*0)"
@++/++0++1++2++3++4++5+
S45+E+*I"<(","9*,)<9<4(,0";4(-<)+*,9<4(I")4;F:+(9-",*+",--4;<,9+)"5<9H"
WM/04.02 S. 381
-F*G,;+-"9H,9";,=9F*+"9H+"<(G4*:,9<4(I"T"
O"74F*;+U"V,9+->"!"#$%"&'()%"*+)'!",,*#-./$-"#I"WH+"X4H("S4=D<(-"Y(<E+*-<98"Z*+--I"6,09<:4*+"P%BABQ>"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?A%"""1"""77"#$%&"
WM/04-05 S. 381
T"-F*G,;+-",-"9H+8",*+"+:=048+)"G4*":<00+((<F:-"94")<-9*<JF9+"
,()"=*+-+*E+";4::F(<;,9<4("<(9+()-"4E+*"9<:+",()"-=,;+""
WH+",(;+-94*-"4G"94),8[-":,(8")<\+*+(9"
-;*<=9-"5+*+"=*4J,J08";,E+"=,<(9<(/-"
"
],*08";,E+:,("=,<(9<(/-"5H<;H"5+*+"(49"^F-9"
)*,5<(/-"<("9H+"+8+-"4G"9H+<*";*+,94*-"JF9"
,0-4";,**<+)",":+--,/+"H,E+"J++("),9+)"J,;D"
-4:+"_$I$$$"8+,*-"
"
WH+"40)+-9"D(45(",*9<G,;9-"4G"-;*<=9"*+,;H"
J,;D"94"9H+"MG9H":<00+((<F:"6`U"
`0,8"9,J0+9-")+=<;9"*4F()"<()+(9,9<4(-"
94/+9H+*"5<9H"9H+"<:,/+"4G",(",(<:,0>"
'*;H,+404/<-9-"J+0<+E+"9H+-+"<()+(9,9<4(-"94"
*+=*+-+(9"9H+"(F:J+*"9+("
WM/04.02 S. 382
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?A#"""1"""77"#$%&"
WM/04-05 S. 382
WH+"H<+*4/08=H-"4G"9H+"]/8=9<,(-"-9<00";4(9,<("+0+:+(9-"4G"
=<;9F*+"5*<9<(/"
S<+*4/08=H-"5+*+"+-9,J0<-H+)"<("]/8=9"
"
S<+*4/08=H-")<)"(49"*+=*+-+(9+)"=F*+"
=<;94/*,:-"(4*"=F*+"=H4(+9<;"-=+00<(/",()"
)<)"=*4J,J08"+E40E+"G*4:"9H+"4*(,:+(9,9<4("
4G"E,-+-"
"
6+-<)+-"=<;94/*,:-",()"-980<a+)"
P<:,/+1Q-8:J40-"4G";4::4("<9+:-"9H+*+"
+L<-9"=H4(4/*,:-"*+=*+-+(9<(/"=H4(+:+-I"
^F-9"0<D+"94),8[-",0=H,J+9-"
"
7<(;+"9H+(I"9H+"(F:J+*"4G"-;*<=9";H,*,;9+*-"
/*+5"G*4:"*4F/H08"@$$"94",==*4L<:,9+08"
_I$$$"94),8"
WM/04.02 S. 383
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?A?"""1"""77"#$%&"
WM/04-05 S. 383
WH+"`F(+<G4*:"7;*<=9"<-"9H+"M*-9"-9+="945,*)-",J-9*,;9"
5*<9<(/"
.*,5<(/"-9*,</H9"0<(+-"4(";0,8"9,J0+9-"<-"
:F;H"+,-<+*"9H,(")*,5<(/"<:,/+-"
"
WH+";F(+<G4*:"-;*<=9"5,-"J4*("5H+("9H+"
6,J804(<,(-",()"7F:+*<,(-"F-+)"5*<9<(/"
9440-"9H,9":,)+"5+)/+"-H,=+)"
<()+(9,9<4(-"
"
`F(+<G4*:"5,-"-4";4:=0+LI"9H,9",0*+,)8"<("
#$$$"6`I"54*0)"0<9+*,9F*+"0<D+"9H+"
:4(F:+(9,0"]=<;"4G"2<0/,:+-H"5,-"
5*<99+("<("40)"6,J804(<,("4("95+0E+";0,8"
9,J0+9-"
WM/04.02 S. 384
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?Ab"""1"""77"#$%&"
WM/04-05 S. 384
Z,=+*",()"Z*<(9"W+;H(404/<+-"4*</<(,9+)"G*4:"9H+"
,(;<+(9"`H<(,"
c4*","04(/"9<:+I"9H+"`H<(+-+";H,*,;9+*-"
5+*+"5*<99+("4(94"=,=+*"5<9H"d()<,("d(D"
"
'*4F()"9H+"8+,*"%$_$"'.I"6<"7H+(/"
<(E+(9+)"9H+"M*-9":4E,J0+"98=+"=*<(9<(/"
=*+--"9+;H(404/8"PS<-"G*,/<0+"98=+-"5+*+"
:,)+"4G"J,D+)";0,8Q"
"
7H+(/";4F0)"9HF-"=*<(9"4("=,=+*"04(/"
J+G4*+"9H+"2+*:,("X4H,((+-"2F9+(J+*/"
:,)+"H<-"G,:4F-"<(E+(9<4("P%b?BQ"
"
S45+E+*I"9H+"`H<(+-+"G,;+)",(49H+*"J</"
=*4J0+:U"9H+<*",0=H,J+9";4(9,<(-",J4F9"
_$I$$$";H,*,;9+*-"
WM/04.02 S. 385
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?A_"""1"""77"#$%&"
WM/04-05 S. 385
e+,)<(/"9H+"6<J0+"<("9H+":+)<,+E,0"9<:+-"*+fF<*+)"9H+"
=*<E<0+/+"4G"D(45<(/"9H+"C,9<("0,(/F,/+"
W4"=*4)F;+",":+)<,+E,0"6<J0+"4("
=,*;H:+(9I"-D<(-"4G"_$$";,0E+-"5+*+"
(++)+)""
"
T",()","049"4G"=,9<+(;+I",-"+E+*8"0+99+*",()"
<:,/+"5+*+")*,5("J8"H,()>"
"
d("9H4-+"),8-I"0<J*,*<+-"5+*+";H,*,;9+*<a+)"
J8","J,JJ0+"4G"E4<;+-"
"
WH+"*+,)+*-")<)"(49";4:=0,<(I",-"F(9<0"9H+"
C,9+"g<))0+"'/+-I"-<0+(9"*+,)+*-"5+*+"
-F-=+;9+)"94"J+"<("0+,/F+"5<9H"9H+")+E<0"
WM/04.02 S. 386
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?A&"""1"""77"#$%&"
WM/04-05 S. 386
h<9H"9H+"<(E+(9<4("4G"0+99+*=*+--"=*<(9<(/","(+5":,*D+9"H,-"
4=+(+)"F="<("]F*4=+"
6+G4*+"9H+"<(E+(9<4("4G"2F9+(J+*/i-"
:4E,J0+"98=+"<("%b_&I"J44D-"5+*+"E+*8"
*,*+"
"
g4-9"4G"9H+-+"<(E,0F,J0+":,(F-;*<=9-"5+*+"
04;D+)"F="<("0<J*,*<+-"
"
2F9+(J+*/i-"<(E+(9<4("+(,J0+)"9H+"
)F=0<;,9<4("4G"J44D-",9",\4*),J0+";4-9-"
9H*4F/H4F9"]F*4=+",()"/,E+"*<-+"94","
;4:=0+9+08"(+5"+;4(4:<;"-+;94*"
"
'*4F()"9H+"8+,*"%_$$"9H+*+"5+*+",*4F()"
#$$"=*<(9<(/"-H4=-"0"%1'2&/#$34'
#$$"=*<(9<(/"-H4=-"
53$/6&-3),5#$37"<("j+(<;+"4(08"
WM/04.02 S. 387
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?A@"""1"""77"#$%&"
WM/04-05 S. 387
g,--1=*4)F;9<4(":,D+-"J44D-",\4*),J0+"G4*"+E+*84(+"
'*4F()"%&@_"'.":4*+"J44D-"5+*+"=*<(9+)"
<("2+*:,("9H,("<("C,9<("
"
'"*+E40F9<4("5,-"-9,*9<(/"9H,9"54F0)"=+,D"
<("9H+"%A9H"`+(9F*8U"WH+"G4*:,9-"J+;,:+"
-:,00+*",()"=*<(9<(/"E40F:+-"0"%1'58-$-"#37"
0,*/+*"
"
WH+"M*-9"0+()<(/"0<J*,*<+-",==+,*+)"
"
WH+";4-9-"4G"J44D-"-H*F(D",()"9H+"(F:J+*"
4G"*+,)+*-"<(;*+,-+)"
"
6F9":4-9"<:=4*9,(908I"*+,)<(/"H,J<9-"H,E+"
;H,(/+)U"d(-9+,)"4G"9H+"*+=+,9+)"0+;9F*+"
4G"9H+"-,:+"J44DI":4-9"J44D-"5+*+"*+,)"
^F-9"4(;+"
WM/04.02 S. 388
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?AA"""1"""77"#$%&"
WM/04-05 S. 388
W4),8")4;F:+(9-",*+",("<:=4*9,(9"=,*9"4G"4F*";F09F*+I"JF9"
J+H<()"9H+"-;+(+-"9H+";4:=+9<9<4("<-"/+99<(/"M+*;+*"
.4;F:+(9-",*+"J49H"-4F*;+-"4G"
<(G4*:,9<4(",-"5+00",-",":+,(-"G4*"
;4::F(<;,9<4("
"
S45+E+*I"9H+"G4*:"<("5H<;H")4;F:+(9-"
,99*,;9I";4::,()I"*+fF+-9I";4(E<(;+I"4*"
,:F-+"<-"-9+,)<08"+E40E<(/",()")<E+*-<G8<(/"
)F+"94"9H+":,(8";4::F(<;,9<4(";H,((+0-"
9H,9",*+",E,<0,J0+"9H+-+-"),8-""
WM/04.02 S. 389
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?AB"""1"""77"#$%&"
WM/04-05 S. 389
c*4:","H<-94*<;,0"=4<(9"4G"E<+5I"5+";4(-<)+*+)",")4;F:+(9"
<:,/+",-","-FJ^+;9"4G"-9F)8",()"<(9+*=*+9,9<4("
WH+"(++)"G4*",(,08a<(/",()"*+;4/(<a<(/")4;F:+(9-"<-"=F-H+)"J8"+;4(4:<;"/4,0-+
A(4B+/4+;6;3<C
@/012345+646?7D=D+,()"<30/>4=5=/4O"<(;0F)+-";4(9*<JF9<4(-")+,0<(/"5<9H";4:=F9+*"
*+;4/(<9<4("4G";H,*,;9+*-I"-8:J40-I"9+L9I"0<(+-I"/*,=H<;-I"<:,/+-I"H,()5*<9<(/I"
-</(,9F*+-I",-"5+00",-",F94:,9<;",(,08-+-"4G"9H+"4E+*,00"=H8-<;,0",()"04/<;,0"
-9*F;9F*+-"4G")4;F:+(9-I"5<9H"9H+"F09<:,9+"4J^+;9<E+"4G","H</H10+E+0"F()+*-9,()<(/"
WM/04.02 S. 390
4G"9H+<*"-+:,(9<;";4(9+(9>"
O"74F*;+U"d(9+*(,9<4(,0"X4F*(,0"4(".4;F:+(9"'(,08-<-",()"e+;4/(<9<4("PdX.'eQ"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?B$"""1"""77"#$%&"
WM/04-05 S. 390
WH+"4*</<(,0"<)+,"4G")4;F:+(9",(,08-<-",()"*+;4/(<9<4("5,-"
9H+"9*,(-G4*:,9<4("=*4;+--"
Z*4E<)+-"4==4*9F(<98"G4*",("
,F)<+(;+"94",;;+--"9H4-+"
)4;F:+(9-",()"=+*G4*:"*+/F0,*"
4=+*,9<4(-"0<D+"-+,*;HI"+)<9<(/I"
*+F-+I";4(E+*-<4(I"=FJ0<-H<(/I"+9;"
@/012345+(26>3D+
WM/04.02 S. 391
E5<1051<3.+53F5+
G356.656+
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?B%"""1"""77"#$%&"
WM/04-05 S. 391
c*4:",(",J-9*,;9"=4<(9"4G"E<+5"9H<-"<(;0F)+-"-+E+*,0"
9*,(-G4*:,9<4("-9+=-"
(4H/<265=/4+
@656+
E728/?+
!/2;/4345D+
L=F3?D+
I+G364=4>+
I+"<.3<+
I+EJ6;3+
I+!/44305=K=57+
•! `4:=4(+(9-",*+";H,*,;9+*<a+)"J8",";+*9,<(",:4F(9"4G";4((+;9+)"P(+</HJ4*+)Q"=<L+0-"
•! 78:J40-",*+")+M(+)"J8",";+*9,<(",**,(/+:+(9-"4G";H,*,;9+*<-9<;,008"-H,=+)"
;4:=4(+(9-"
•! .,9,"<-"9H+"*+-F09"4G"+(;4)<(/"-8:J40-"<(","-=+;<M;"4*)+*"
WM/04.02 S. 392
•! d(G4*:,9<4(",0045-"94",--4;<,9+"),9,"5<9H",";+*9,<(":+,(<(/"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?B#"""1"""77"#$%&"
WM/04-05 S. 392
'"-+;4()"=+*-=+;9<E+"94"044D",9",")4;F:+(9"<-","
9+;H(404/<;,0"4(+"
k45,),8-I")4;F:+(9-+,*+"=*4)F;+)"E<,"=+("P4("=,=+*"4*"4("9,J0+9QI"9+L9",()"/*,=H<;"
+)<94*-"P;4((+;9+)"94"=*<(9+*-QI"4*"49H+*"9+;H(<;,0":+,(-I"-F;H",-";,:+*,-I",()I"(49"
94"G4*/+9I":+--,/<(/";H,((+0-"P<(;0>";4::F(<98"=0,9G4*:-Q"
1
2
@++/++0++1++2++3++4++5+
hH+("5+";*+,9+",")4;F:+(9I"5+";,("-=+;<G8"9H+"),9,"94",==+,*I";4(9*40"9H+"0,84F9I"
WM/04.02 S. 393
G4*:,99<(/I"/*4F=<(/I",()"-FJ949,0<(/"4G"),9,I",()"-=+;<G8"9H+"=4-<9<4("4G"=,/+"J*+,D-"
P<(",))<9<4(I"5+";,("<(;0F)+"=<;9F*+-",()")*,5"J4*)+*-Q"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?B?"""1"""77"#$%&"
O"74F*;+U"V,9+->"!"#$%"&'()%"*+)'!",,*#-./$-"#I"WH+"X4H("S4=D<(-"Y(<E+*-<98"Z*+--I"6,09<:4*+"P%BABQ>"
WM/04-05 S. 393
''
'*+"5+":4E<(/"G*4:"
,"ML+)"54*0)"4G"=,=+*")4;F:+(9-""
94","lF<)"54*0)"4G")</<9,0",*9<G,;9-m"
WM/04.02 S. 394
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?Bb"""1"""77"#$%&"
WM/04-05 S. 394
WH+*+",*+","(F:J+*"4G";4(9*,-9<E+"=,<*-"J8"5H<;H"=,=+*",()"
)</<9,0",*9<G,;9-",*+"4G9+(";H,*,;9+*<a+)"
L6;3<+@/012345+
M#O(@+
-9,J0+"
=+*:,(+(9"
-9,9<;"
<(,;9<E+"
*</<)"
A$??+./012345DP+QJ35J3<+5J37+6<3+;<=453.+/<+.=>=56?P+
3F=D5+=4+6+;3<;3516?+534D=/4+835Q334+RF=57+64.+S1=.=57TC+
@=>=56?+@/012345+
M(N+
WM/04.02 S. 395
O"74F*;+U"C+E8I".>"g>"c<L+)"4*"lF<)m".4;F:+(9"-9,J<0<98",()"(+5":+)<,>"Z*4;++)<(/-"]F*4=+,("`4(G>"4("
S8=+*9+L9"W+;H(404/8I"P==>"#bn?%Q">"k+5"V4*DU"'--4;<,9<4("G4*"`4:=F9<(/"g,;H<(+*8"P%BBbQ""
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?B_"""1"""77"#$%&"
WM/04-05 S. 395
WH+*+"<-","JF(;H"4G"+L;<9<(/"*+-+,*;H"fF+-9<4(-",*<-<(/"G*4:"
9H+",09+*(,9<(/"9*,(-<9<4(-"
•!
•!
•!
•!
•!
d-"<9","(+5"E+*-<4("4*","(+5")4;F:+(9m"
.4+-"9H+"-</(,9F*+"044D"0<D+"<9"5,-"�H+0=+)�m"
d-"9H+")4;F:+(9";*+,9+)",00",9"4(+"9<:+"4*"=*+=,*+)"-+fF+(9<,008m"
d-"<9","0+/,0":,(<=F0,9<4("4*","G*,F)",99+:=9m""
WM/04.02
T" S. 396
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?B&"""1"""77"#$%&"
WM/04-05 S. 396
eH89H:"J+95++("ML<98",()"lF<)<98"<-"<(lF+(;+)"J8"9H+"/+(*+I"
5H<;H"<9-+0G")+M(+-")4;F:+(9-"J8"=,*9<;F0,*"G4*:-",()"GF(;9<4(2
1
U
M/<2+
@++/++0++1++2++3++4++5+
U
M1405=/4+
I
](/<(++*<(/".*,5<(/"
T" X4J"'==0<;,9<4("
T"
`4:=0,<(9"
k+5-=,=+*
"
WM/04.027;<+(9<M;"e+=4*9"
S. 397
k4E+0"
g+--,/+"
T"
7D+9;H"
-34<3+
d(E4<;+"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?B@"""1"""77"#$%&"
O"74F*;+U"V,9+->"!"#$%"&'()%"*+)'!",,*#-./$-"#I"WH+"X4H("S4=D<(-"Y(<E+*-<98"Z*+--I"6,09<:4*+"P%BABQ>"
WM/04-05 S. 397
.4;F:+(9-",*+"GF*9H+*"+:J+))+)"<(94"<(-9<9F9<4(,0"=*4;+--+-"
,()",*+"=,*9"4G"4F*"54*D"=*,;9<;+-"4=+(<(/1F=","9H<*)"E<+5"
2
1
h4*D"
3
HF:,("=*,;9<;+-"
<(-9<9F9<4(,0"+:J+):+(9"
U
M/<2+
@++/++0++1++2++3++4++5+
U
M1405=/4+
I
](/<(++*<(/".*,5<(/"
T" X4J"'==0<;,9<4("
T"
`4:=0,<(9"
k+5-=,=+*
"
WM/04.027;<+(9<M;"e+=4*9"
S. 398
k4E+0"
g+--,/+"
T"
7D+9;H"
-34<3+
d(E4<;+"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?BA"""1"""77"#$%&"
O"74F*;+U"V,9+->"!"#$%"&'()%"*+)'!",,*#-./$-"#I"WH+"X4H("S4=D<(-"Y(<E+*-<98"Z*+--I"6,09<:4*+"P%BABQ>"
WM/04-05 S. 398
""
WM/04.02 S. 399
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"?BB"""1"""77"#$%&"
WM/04-05 S. 399
""
WH+"%%9H"d'Ze"h4*D-H4="4(".4;F:+(9"'(,08-<-"78-9+:-">>>"
WM/04.02 S. 400
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$$"""1"""77"#$%&"
WM/04-05 S. 400
"&-$,(a$'(",+
!"O,'&V+
["&\E]"L+
is-a
is-a
L*&("@+
($L&+
is-a
has-sponsor
M<6403+
@$E+WXYZ+
is-a
has-date
$;<=?+`_YXP+WXYZ+
has-participant
is-a
!('V+
\=D3P+\/=0J=+
\=D3P+\/=0J=
is-a
E2=5JP+&67+
!]$(&+
'/1<D+
"<>=3<P+^364_G6<0
"<>=3<
P+^364_G6<0+
#=Q=0B=P+G6<01D
#=Q=0B=
P+G6<01D+
is-a
is-part
has-participant
&623?P+^364_VK3D
P+^364_VK3D+
is-a
has-location
is-a
is-a
is-a
L!+!]$(&+
is-a
WM/04.02 S. 401
L*&E",+
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$%"""1"""77"#$%&"
WM/04-05 S. 401
.4;F:+(9",(,08-<-",()"*+;4/(<9<4("<-",0-4",(",99+:=9"94"
<(;*+,-+"9H+"E,0F+"4G",")4;F:+(9"
\4/Q?3.>3+
Value
(4H/<265=/4+
@656+
E728/?+
!/2;/4345D+
L=F3?D+
I+!/453F5+
I+G364=4>+
I+"<.3<+
I+EJ6;3+
I+!/44305=K=57+
•! `4:=4(+(9-",*+";H,*,;9+*<a+)"J8",";+*9,<(",:4F(9"4G";4((+;9+)"P(+</HJ4*+)Q"=<L+0-"
•! 78:J40-",*+")+M(+)"J8",";+*9,<(",**,(/+:+(9-"4G";H,*,;9+*<-9<;,008"-H,=+)"
;4:=4(+(9-"
•! .,9,"<-"9H+"*+-F09"4G"+(;4)<(/"-8:J40-"<(","-=+;<M;"4*)+*"
•! d(G4*:,9<4(",0045-"94",--4;<,9+"),9,"5<9H",";+*9,<(":+,(<(/+
WM/04.02 S. 402
•! \4/Q?3.>3+6..<3DD3D+5J3+68=?=57+5/+14.3<D564.+=4H/<265=/4+=4+6+>=K34+0/453F5+
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$#"""1"""77"#$%&"
WM/04-05 S. 402
6#`",()"6#`"`4::F(<;,9<4("<-":4-908")4(+"E<,")4;F:+(9-"
d(9+*14*/,(<a,9<4(,0";4::F(<;,9<4("E<,"
)4;F:+(9-":+,(-"+L;H,(/<(/"JF-<(+--":+--,/+-"
5<9H",("+;4(4:<;"4*"0+/,0"J,;D/*4F()"
.4;F:+(9",*+"9H+"4J0</,94*8":+,(-"G4*"
J*<)/<(/"9<:+",()"-=,;+"
'=23+
.4;F:+(9-",*+"*+fF<*+)"G4*",99+-9<(/"
;4:=0<,(;+"
E;603+
d(E40E+)"=,*9<+-"0<(D"F="9H+<*";4::F(<;,9<4("
J+H,E<4*"
.4;F:+(9-"+L=*+--":F9F,0"+L=+;9,9<4(-"
WM/04.02 S. 403
7+()+*"<(;4*=4*,9+-"),9,"<(94"9H+":+--,/+"
P;4(9+L9Q",()"9HF-"9*,(-G4*:-"9H+:"<(94"
<(G4*:,9<4("
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$?"""1"""77"#$%&"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"
WM/04-05 S. 403
W4),8I"9H+"E,*<4-"-9,D+H40)+*-"4G"JF-<(+--"=*4;+--+-"
;4::F(<;,9+"E<,",(8";H,((+0"
<?xml ..
@
WM/04.02 S. 404
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$b"""1"""77"#$%&"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"
WM/04-05 S. 404
.<\+*+(9";H,((+0-"=*4E<)+-")<\+*+(9"4=9<4(-"94"
;4::F(<;,9+":+--,/+-"
.+=+()<(/"4("9H+"=0,;+"9H+"9+:=4*,0";4(-9*,<(9-I",()"9H+"9+;H(<;,0"
<(G*,-9*F;9F*+I"5+"-+0+;9",(49H+*"5,8"4G";4::F(<;,9<4("
h+"-FJ:<9"=,=+*"G4*:-"5H+*+"5+"M00+)"<("4F*"=45+*"
;4(-F:=9<4("),9,"
h+"-+()"G,L+-"94"<(-F*,(;+-"9H,9";,=9F*+"9H+"),9,"4G"4F*"
(+5";,*""
h+"4*)+*"+0+;9*4(<;")+E<;+-"E<,"+:,<0"5H+*+"5+",99,;H","
-;,((+)"4*)+*"G4*:""
@
h+"*,<-+"4F*")<-=0+,-F*+",J4F9",":,0GF(;9<4("J8"F-<(/"
9H+"c,;+J44D";H,((+0"4*"J8";,00<(/"9H+"H+0=")+-D"
h+"F-+","+.4;F:+(9"-+*E<;+"5<9H"5H<;H"5+"-FJ:<9","=)G1M0+"
WM/04.02 S. 405
94/+9H+*"5<9H")4;F:+(9":+9,),9,"
<?xml ..
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$_"""1"""77"#$%&"
WM/04-05 S. 405
WH+"d(9+*(+9"<-",";F09F*,0"-=,;+"G4*";4::F(<;,9<4(I"5H<;H"
;4(9<(F<-08"+E40E<(/"fF,(9<9,9<E+08",()"fF,0<9,09<E+08"
Amount
E-Mail
@
eDocuments
<?xml ..
Paper
Fax
Time
1960
1980
2000
2020
WM/04.02 S. 406
]:,<0",0*+,)8"J+;,:+"9H+";4::F(<;,9<4(":+)<F:"k4>"%O"
O"74F*;+U"http://www.ey.com/press/releases/"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$&"""1"""77"#$%&"
WM/04-05 S. 406
d("9H+"54*0)"4G"JF-<(+--I"+:,<0"<-",J4F9"94"J+"9H+")4:<(,(9""
;4::F(<;,9<4("9440"G4*";4::F(<;,9<(/"JF-<(+--"*+0+E,(9"<(G4*:,9<4("
7<(;+"#$$_"9H+",:4F(9"4G"+:,<0-",((F,008"<(;*+,-+)""
9+(G40)"94"?_"J<00<4(-"94),8"
74:+"G,;9-OU"
•! B?o"4G",00"+(9+*=*<-+-"F-+"+:,<0",-","=*+G+**+)"9440""
94";4::F(<;,9+"5<9H";4-9F:+*-"P6#`Q"
•! Abo"F-+"+:,<0"94";4::F(<;,9+"6#6"
•! @%o"F-+"+:,<0",-":,^4*";4::F(<;,9<4("9440"5H<0+"
(+/49<,9<(/";4(9*,;9-"
•! &Bo"F-+"+:,<0"94"+L;H,(/+"+0+;9*4(<;"J<00-",()""
J,(D";4((+;9<4(-"
•! B%o"-+()";0,--<M+)"<(G4*:,9<4("E<,"+:,<0"
WM/04.02 S. 407
9+E/1<03b+J55;bccQQQT./012646>3<T.3c26>6d=4c6<5=B3?efXg_;<=45e<30J5?=0J3e14.e530J4=D0J3eH<6>34TJ52?+
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$@"""1"""77"#$%&"
WM/04-05 S. 407
Asking for the main demands on email management leads to
many different answers !
Classification
Forwarding
Support during processing
Structured repository
Linking with other sources of information
Retrieval
Reducing server load
Documentation
WM/04.02 S. 408
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$A"""1"""77"#$%&"
WM/04-05 S. 408
While enterprises aim to maintain a centralized storage system,
processing and utilizing stored items is usually a decentralized task
Response Management Systems support the
classification and forwarding of emails on the basis
of known competences, and provide assistance in
formulating replies
Information Management Systems help to organize a structured
repository for the content emails entail, to establish connections with
other sources of information and to facilitate retrieval
Archiving systems store the content in correspondence
with legal requirements while at the same time reducing the
server load
WM/04.02 S. 409
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b$B"""1"""77"#$%&"
WM/04-05 S. 409
Let’s consider an example!
WM/04.02 S. 410
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%$"""1"""77"#$%&"
WM/04-05 S. 410
Let me address Dr. Gesine Kustermann who has time
pressure and an increasing work load
Dr. Gesine Kustermann is CFO of DFKI, a well known
research center in Kaiserslautern
Beside many other issues she is responsible for the
management of the carpool at DFKI which for many
years are ordered via CarFS, a the financial service
branch of a leading German car manufacturer
Early in January there is a exposition and DFKI’s old van
had an accident. So Gesine has to make sure that DKFI
can use the new van by January 1st
WM/04.02 S. 411
Gesine asked her contact partner at CarFS, a large
financial service agency, to get the corresponding
application form for the car insurance
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%%"""1"""77"#$%&"
WM/04-05 S. 411
Let me further introduce Michael to you, a typical but
fictive knowledge worker
Michael Lenz works as an insurance specialist at
CarFS in Hamburg and is the responsible contact
person for all DFKI matters
Although he likes to go into Christmas holidays soon,
he knows that it is better to react on Gesine‘s request
since she could be very annoying
WM/04.02 S. 412
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%#"""1"""77"#$%&"
WM/04-05 S. 412
The fictive enterprise CarFS intends to improve their processes
respecting duration and cost effectiveness
One aim at CarFS is to classify incoming documents at
multiple channels and to route them to respecive clercks for
further processing
Situation:
Manual indexing lead to bottlenecks
Routing is based on individual knowledge of the staff
members
Search in various repositories and archives is based on
full text search or pre-defined index terms
Documents are filed as TIFF within a given document
taxonomy
Data enrichment is done manually
WM/04.02 S. 413
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%?"""1"""77"#$%&"
WM/04-05 S. 413
In this respect there are various important work packages to be
considerered
WM/04.02 S. 414
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%b"""1"""77"#$%&"
WM/04-05 S. 414
CarFS has introduced the System CoMem, a corporate memory
with an integrated multi-channel document recognition system
E-Mail
Fax-Server
eInvoicing
Call-Center
Scanner
WM/04.02 S. 415
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%_"""1"""77"#$%&"
WM/04-05 S. 415
d("g<;H,+0p-"),<08"=*,;9<;+"9H+*+"<-","5H40+"JF(;H"4G"
;4::F(<;,9<4(",09+*(,9<E+-"94";4(-<)+*"
c,L"
Z,=+*":,<0"
`,001`+(9+*"
]1g,<0"
@
+.4;F:+(9-"
<?xml ..
(* )
WM/04.02 S.
416
Document
Images
(*): as Attachment
Text
Meta Data
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%&"""1"""77"#$%&"
WM/04-05 S. 416
Before leaving for Christmas
holidays, I have to finish the car
insurance for Gesine
... and how to work with a
corporate memory, how would it
understand what I mean
and how I think?
WM/04.02 S. 417
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%@"""1"""77"#$%&"
WM/04-05 S. 417
Imaginations without terms are blind and terms without
imaginations are empty*
Our environment consists of
items, facts and events that are
„real“ and determine our lives
(„what is going on“)
Imagination
evokes
„vacations”
is related to
represents
Symbol
Reality
Semiotic Triangle
WM/04.02 S. 418
In order to express their
thoughts, people use signs,
symbols, or characters that may
be understood by others
(„what I couch or explicate“)
People reading texts put
contents together and create
their very individual imagination
(„what I mean“)
* I. Kant (1724 – 1804)
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%A"""1"""77"#$%&"
WM/04-05 S. 418
One approach is to look on the ideas of the Semantic Web that
builds on predication and ontology to formally represent semantics
A Theory of Ontology attempts to give answers to the
question: What is there?
(the Greek terms „ontos“ and „logos“ mean „to be“ and „word“)
Aristotle defined a system of ten categories, such as
substance, quality, quantity, where, when, !
A Theory of Predication tries to answer the question:
What is it to say something about something?
A subject is what a statement is about
A predicate is what a statement says about its subject
WM/04.02 S. 419
A common definition of an Ontology for Semantic Web
researchers is an explicit, formal specification of a
conceptualization < Tom Gruber, 1993 >
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b%B"""1"""77"#$%&"
WM/04-05 S. 419
An ontology provides a shared vocabulary to all participants to
express facts about the world
Subject
Predicate
Amellie de Hemp
Object
5082403
has-phone-no
<rdf:RDF>
<rdf:Description
rdf:about=“http://www.AAA-verlag.com/deHemp”>
<has-phone-no> 5082403</has-phone-no>
<rdf:Description>
<//rdf:RDF>
A fact is expressed as a SubjectPredicate-Object triple
Subjects, predicates, and objects
are given as names for entities,
also called resources or nodes
Entities represent something,
a person, an appointment,
a website, !
Names are URIs, which are global in scope, always referring to the same entity in
any RDF document in which they appear
The underlying structure of any knowledge can be viewed as a graph (of triples)
consisting of nodes (subjects, objects) and labeled directed arcs (predicates) that
WM/04.02 S. 420
link pairs of nodes
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#$"""1"""77"#$%&"
WM/04-05 S. 420
Ontologies describe a particular vocabulary that can be used to
describe aspects of real domains
Document Classes
Organizations
Groups
Persons
Events
Locations/Addresses
The vocabulary may follow different
“W-Dimensions” of knowledge (what,
who, when, where, !)
All workflow-relevant aspects of
information can be described using a
set of explicit categories
The categories can be taken from
other applications and formally
represented using RDFS
Appointments
Topics
Exemplary categories for describing
the work context (in RDFS they are
called schemata)
WM/04.02 S. 421
Tasks
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#%"""1"""77"#$%&"
WM/04-05 S. 421
Every document is extending the existing ontology by new facts
works-for
AAA
Agenda #18
has-attachment
DFKI
sent-to
has-address
Amellie
de Hemp
Invitation #432
works-for
sent-from
Postfach 133500
69023 Heidelberg
Germany
Thomas
Mustermann
has-address
has-task
From: [email protected]
Date: January 21, 2010 09:28:11
To: Amellie de Hemp <[email protected]>
Subject: Invitation
Semantic Desktop
Course
Dear Amellie:
Attached please find the agenda for our next training
course on the Semantic Desktop. We would be very glad to
welcome you.
has-date
If you have any question, don‘t hesitate to contact me.
haslocation
Trippstadter Straße 122
67663 Kaiserslautern
09-02-10
Best regards,
Thomas
---------------------------------Agenda.pdf
Dr. Thomas Mustermann
Head of CRM
DFKI GmbH
Trippstadter Straße 122, 67663 Kaiserslautern, Germany
Phone: +49-631-20575-100
WM/04.02 S. 422
Email: [email protected]
Please note that the text in the attachment
may itself contain relationships to already
available knowledge
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b##"""1"""77"#$%&"
WM/04-05 S. 422
For this purpose, he uses the rich RDFS tool box allowing him
to formally represent all aspects of information he needs
Schemata describe classes of objects
in the work context by a fix pattern
WM/04.02 S. 423
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#?"""1"""77"#$%&"
WM/04-05 S. 423
For this purpose, he uses the rich RDFS tool box allowing him
to formally represent all aspects of information he needs
Instances are exemplars or elements of
a category havening individuals pattern
values WM/04.02 S. 424
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#b"""1"""77"#$%&"
WM/04-05 S. 424
For this purpose, he uses the rich RDFS tool box allowing him
to formally represent all aspects of information he needs
Between the concepts of the ontology
there are qualified relations called
properties
Each instance has a is-a-relationship to
its class,WM/04.02
i.e. it complies
the defined
S. 425
pattern
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#_"""1"""77"#$%&"
WM/04-05 S. 425
For this purpose, he uses the rich RDFS tool box allowing him
to formally represent all aspects of information he needs
[email protected]
Thomas Mustermann
Thomas
Email-Address
Label
First_Name
Dr. Mustermann
Mustermann
Alt–Label
Last–Name
Alt–Label
Alt–Label
Dr. Thomas Mustermann
T. Mustermann
Attributes
describe the possible labels
WM/04.02 S. 426
of an instance
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#&"""1"""77"#$%&"
WM/04-05 S. 426
For this purpose, he uses the rich RDFS tool box allowing him
to formally represent all aspects of information he needs
[email protected]
Thomas Mustermann
Thomas
Email-Address
Label
First_Name
Dr. Mustermann
Mustermann
Alt–Label
Last–Name
Alt–Label
Alt–Label
Musi
T. Mustermann
URI
http://dfki.de/outlook/contacts/052361784
WM/04.02 S. 427
URI assures the uniqueness of a resource
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#@"""1"""77"#$%&"
WM/04-05 S. 427
Employing URIs, an application- and platform-independent
unique representation for all resources is created
Each information item is a semantic web resource whether it is file (folder or
document), an email constituent (i.e. message, sender, recipient,
attachment), an address (...), or a calendar entry, ...
All resources are identified by a URI (Uniform Ressource Identifier)
http://www.AAA-verlag.com
far a Website
file://Documents/Courses/Agenda#18
for a file
file://Documents/Courses/Invitations
for a category
http://dfki.de/outlook/contact/0019E177
for a contact
WM/04.02
S. 428
imap://[email protected]/INBOX/;UID=3
for an Email
outlook://appointment/00000000ECD4B99358B9814B9DA
for a calender entry
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)
6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#A"""1"""77"#$%&"
WM/04-05 S. 428
Michael’s builds on an ontology provided by CoMem offering
him a comprehensive domain vocabulary
WM/04.02 S. 429
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"
!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b#B"""1"""77"#$%&"
WM/04-05 S. 429
Recognition and linking of document contents in CoMem is
done in three steps
1
Information Classification & Routing
Mapping the document contents into the
organizational structure
2
Information Extraction
Mapping the document content into
predifined message patterns
3
Information Integration
Mapping the document content into the
context of processes
WM/04.02 S. 430
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?$"""1"""77"#$%&"
WM/04-05 S. 430
%1D=43DD+;</03DD3D+
.353<2=43+B4/Q?3.>3+.3264.+
64.+65+5J3+D623+5=23+;</.103+B4/Q?3.>3h"
WM/04.02 S. 431
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?%"""1"""77"#$%&"
WM/04-05 S. 431
6F-<(+--"=*4;+--+-"JF()0+"<(G4*:,9<4("<(",";H*4(404/<;,0""
;4(9+L9"
'"JF-<(+--"=*4;+--")+-;*<J+-","-+9"4G"9,-D-I"5H<;H")+=+()<(/"4("),9,"
-H4F0)"J+"=*4;+--+-"<(","P=*+1Q-=+;<M+)"4*)+*",()"5H<;H"M(,008"
=*4E<)+",")+M(+)"*+-F09>""
c4*",;9<E,9<(/"=*4;+--+-I"9H+*+"<-"F-F,008",("4;;,-<4(I"+>/>",";,00"G4*"J<)-I"
,"*+fF+-9"4*",(",==0<;,9<4("
],;H"=*4;+--"9,-D")+-;*<J+-",")+:,()"G4*"P:+9,Q"),9,"5H<;H"<-"(++)+)"G4*"
)+;<-<4(":,D<(/"4*"=*4J0+:"-40E<(/"
.4;F:+(9-",*+"-+(9"J,;D",()"G4*9H"94"*+fF+-94*"94")+0<E+*"P:+9,Q"),9,I"<>+>"
9H+8",*+"J,-+)"4("+,;H"49H+*"
WM/04.02 S. 432
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?#"""1"""77"#$%&"
WM/04-05 S. 432
h4*Dl45"=*4;+--+-",*+"+L=+;9<(/"<(G4*:,9<4("9H,9"<-"
-+*E+)"J8":F09<1;H,((+0")4;F:+(9-"
87%
Study
d(E+-9</,9<4("4G"bA")<\+*+(9"
=*4;+--+-"<(",("<(9+*(,9<4(,0",;9<4("
M(,(;<,0"-+*E<;+";4:=,(8+
100%
(48)
29%
54%
M=4.=4>D+
d("/+(+*,0I"5+";,(")<\+*+(9<,9+"
J+95++("954"98=+-"4G"<(G4*:,9<4("<("
)4;F:+(9-"
A?o"<(<9,9+"(+5"=*4;+--+-"
_Ao"*+0,9+"94"5,<9<(/"=*4;+--+-"
38%
4%
13%
2%
A@o"4G",00"=*4;+--+-",*+"9*<//+*+)"J8"
<(;4:<(/":F09<1;H,((+0")4;F:+(9-"
WM/04.02 S. 433
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b??"""1"""77"#$%&"
WM/04-05 S. 433
CoMem provides strong support for Michael in order to work
more effective and to get active support
Michael enters each new process
in his workflow management
system
When activating the process a
respecting task sequence is
proposed to Michael
*+,-./-%
1234.567/%8+,97,7:;7/%
!"#$!#$$%
0$#0$#$!%
0%
&
% '
% &
% '
% (
% )
% )
% &
% '
% &
% '
% (
% )
% )
% &
% '
% &
%
Von: [email protected]
Datum: 20. Dezember 2011 09:28:11 MEZ
An: [email protected]
Betreff: Kfz-Versicherung
Sehr geehrte Frau Dr. Kustermann,
als Anlage übersende ich Ihnen den Antrag für eine KfzVersicherung. Ich bitte Sie, den Antrag auszufüllen und bis
23.12.11 an uns zurück zu schicken.
Für Fragen stehe ich Ihnen gerne zur Verfügung.
Beste Grüße,
M. Lenz
---------------------------------Michael Lenz
Car Financial Services AG
Kfz Versicherung
Schmalbachstr. 1, 38112 Braunschweig
Tel.: +49 (531) 212 – 83 212
Fax: +49 (531) 212 – 83 215
1<.,;.6=97,:>5;%87,?#%
@/;,.-%'(HI%
@/;,.-3A,+B7==:/=;./B% (C66:-D%
A,:+%
@/;,.-=?+,E#%87,=7/F7/% ':%!0#0$#$!%
@/;,.-%7,?.==7/%
$%
@/;,.-%97.,97:;7/%
(,%!2#0$#$!%
$%
WM/04.02 S. 434
Michael may set deadlines and priorities
G%
(,%!2#0$#$!%
Kfz-A2K.pdf
Clicking into the respective check
box results in an email that is
composed of pre-defined text
patterns complemented by
respective meta data
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?b"""1"""77"#$%&"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"
WM/04-05 S. 434
Sending out Michael’s email leads to the initialization of a taskoriented workflow
Von: [email protected]
Datum: 20. Dezember 2011 09:28:11 MEZ
An: [email protected]
Betreff: Kfz-Versicherungsantrag
@/;,.-=?+,E#%87,=7/F7/%
Sehr geehrter Frau Dr. Kustermann,
als Anlage übersende ich Ihnen den Antrag für eine KfzVersicherung. Ich bitte Sie, den Antrag auszufüllen und bis
23.12.11 an uns zurück zu schicken.
&:>5.76%K7/B%
Für Fragen stehe ich Ihnen gerne zur Verfügung
',#%L7=:/7%H<=;7,E.//%
Beste Grüße,
!0#$!#!0$$%0MD!ND$$%&J4%
M. Lenz
---------------------------------Michael Lenz
Car Financial Services AG
Kfz Versicherung
Schmalbachstr. 1, 38112 Braunschweig
Tel.: +49 (531) 212 – 83 212
Fax: +49 (531) 212 – 83 215
Kfz-A2K.pdf
Icons allow to
!2#$!#!0$$%00D00D00%&J4%
access the
H?B%*7,=:>57,</-%
original
###%
resources
WM/04.02 S. 435
The task to be processed is instantiated by bit of information available from CoMem
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?_"""1"""77"#$%&"
WM/04-05 S. 435
When CoMem was introduces, CarFS has defined rule bases
supporting the automatic routing
Sorting rules allow to idetify the corresponding
inbox of the document
Routing rules define the respective clerk based on the
sender, the content, the skill or pre-defined priorities
Sorting Rules
+
Routing Rules
+
An integrated workload control directs routing and
and avoids long processing times
Depending on the class of a incoming document, tasks are generated via the running
process and delivered to the task context at each workspace together with extracted
as well as
enriched
WM/04.02
S. 436 information
... but how does this work?
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?&"""1"""77"#$%&"
WM/04-05 S. 436
The new corporate memory of CarFS provides a socio-technical
work environment
Subsequent tasks within the workflow
are enriched with available information
Some tasks labeled by a
are taken over and solved by
CoMem
For that purpose, labeled open
tasks are send to a task server
WM/04.02 S. 437
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?@"""1"""77"#$%&"
WM/04-05 S. 437
The task server manages all open tasks and provides expectation
patterns to be “verified” by the document analysis
Multi-Channel
Document Analysis
Workload
Control
Sorting
Rules
Open Tasks
Bestandssysteme
ERP System
Routing
Rules
CarFS
WM/04.02 S. 438
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?A"""1"""77"#$%&"
WM/04-05 S. 438
When Gesine‘s email arrives at CarFS, it is first archived using a URI
and consequently split for further processing
CarFS
Archive
Antrag #18
Von: Gesine Kustermann <[email protected]>
[email protected]>
Datum: 23. Dezember 2011 11:33:17 MEZ
An: [email protected]
Betreff: Re: Kfz-Versicherung
Hallo Herr Lenz,
hat-Anlage
Vielen Dank für die Übersendung der Unterlagen. Im
Attachment finden Sie den ausgefüllten und eingescannten
Antrag mit der Bitte um zügige Bearbeitung.
Using
information
extraction
Vielen Dank und frohe Weihnachten,
G.Kustermann
------Dr. Gesine Kustermann
Kaufm. Geschäftsführung
DFKI GmbH
Trippstadter Straße 122
D-67663 Kaiserslautern
Germany
Phone +49-631-200-75 -801
Fax
+49-631-200-75-800
Email
[email protected]
Using document
image analysis
and understanding
Von: Gesine Kustermann <[email protected]>
<[email protected]
[email protected]>
[email protected]
Datum: 22. Dezember 2011 11:33:17 MEZ
An: [email protected]
Betreff: Re: Kfz-Versicherung
Hallo Herr Lenz,
Vielen Dank für die Übersendung der Unterlagen. Im
Attachment finden Sie den ausgefüllten und eingescannten
Antrag mit der Bitte um zügige Bearbeitung.
Vielen Dank und frohe Weihnachten,
G.Kustermann
------Dr. Gesine Kustermann
Kaufm. Geschäftsführung
DFKI GmbH
Trippstadter Straße 122
D-67663 Kaiserslautern
Germany
Phone +49-631-200-75 -801
Fax
+49-631-200-75-800
Email
[email protected]
Kfz-A2K.pdf
WM/04.02 S. 439
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b?B"""1"""77"#$%&"
WM/04-05 S. 439
Let us first deal with emails!
WM/04.02 S. 440
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bb$"""1"""77"#$%&"
WM/04-05 S. 440
Emails are analyzed by a multi-step approach in order to get the
relevant data
Metadata Extraction
Classification
Information Extraction
Verification
Workload
Control
Sorting
Rules
Open Tasks
Bestandssysteme
ERP System
Routing
Rules
CarFS
WM/04.02 S. 441
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bb%"""1"""77"#$%&"
WM/04-05 S. 441
Michael makes use of an ontology-based document
understanding system helping him to extract relevant facts
Von: Gesine Kustermann <[email protected]>
Datum: 22. Dezember 2011 11:33:17 MEZ
An: [email protected]
Betreff: Re: Kfz-Versicherung
Hallo Herr Lenz,
Vielen Dank für die Übersendung der Unterlagen. Im
Attachment finden Sie den ausgefüllten und eingescannten
Antrag mit der Bitte um zügige Bearbeitung.
Incoming
Email
Vielen Dank und frohe Weihnachten,
G.Kustermann
Ontological
Knowledge
------Dr. Gesine Kustermann
Kaufm. Geschäftsführung
DFKI GmbH
Trippstadter Straße 122
D-67663 Kaiserslautern
Germany
Phone
+49-631-200-75 -801
Fax
+49-631-200-75-800
Email
[email protected]
Ontology-Based
Document Understanding
New Facts
WM/04.02 S. 442
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bb#"""1"""77"#$%&"
WM/04-05 S. 442
Information extraction stepwize transforms the contents of
documents into knowledge relating it to the existing ontology
Segmentation of the text into paragraphs,
sentences, and words
Identification of potential attributes
Matching of attributes with known classes and
instances
Description of the document using intrinsic text
features
Document type determination
WM/04.02 S. 443
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bb?"""1"""77"#$%&"
WM/04-05 S. 443
In the Segmentation phase plain text should be segmented
into hierarchical lexical units
Implementing this task is rather trivial for European languages, by separating
white spaces from non white spaces. Despite this, in Chinese or Japanese it is
not evident from the typography where word boundaries are
An exception is given when analyzing paper documents
Input:
plaintext
Output:
segment hierarchy
document
paragraph
paragraph
sentence
token
sentence
token
token
sentence
token
token
token
token
token
WM/04.02 S. 444
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bbb"""1"""77"#$%&"
WM/04-05 S. 444
Segmentation - Example:
Example of GATE*
b.) White space segmentation
WM/04.02 S. 445
a.) Paragraph
extraction
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bb_"""1"""77"#$%&"
WM/04-05
S. 445
* Source:
* Source:
http://gate.ac.uk
North, 2000
Symbolization extracts relevant entities concerning structure
and content
Relevant tokens are named or structured entities given in text sequences
In order to resolve ambiguities, Part-Of-Speech (POS) Tagger and Parser for
identifying grammatical items, such as nouns, verbs, adjectives, or adverbs
Input:
token sequences
Output:
entities
Matching token sequences against glossaries listing names of typed
entities such as cities or persons, or using Hidden Markov Models that
have been trained with annotated corpora
Regular expressions are often used to recognize structured entities (such
as addresses).
Part-Of-Speech Tagger (POS) annotates token sequences of sentence
as corresponding to a particular part of speech
Based on token sequences, POS annotations, and grammar rules,
WM/04.02
S. 446
a parser
is able to extract coherent phrases of tokens (e.g.,
German Research Center for Artificial Intelligence)
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bb&"""1"""77"#$%&"
WM/04-05 S. 446
Symbolization - Example (POS-Tagging)*:
In the few short years of
its existence, Google has
come a long way.
In/IN the/DT few/JJ short/JJ years/
NNS of/IN its/PRP$ existence/NN ,/,
Google/NNP has/VBZ come/VBN a/DT
long/JJ way/NN./.
WM/04.02 S. 447
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bb@"""1"""77"#$%&"
WM/04-05
S. 447
* tagged
with JTextPro*: A Java-based Text Processing Toolkit http://jtextpro.sourceforge.net
Symbolization - Patterns for Structured Entities
Expression for matching dates
(e.g., 2008/02/02)
Rule: Date
(
{Token.kind == number}
{Token.string == "/"}
{Token.kind == number}
{Token.string == "/"}
{Token.kind == number}
):date -->
:date.TempDate = {kind = <DATE>}
WM/04.02 S. 448
Written in JAPE - Java Annotation Patterns Engine.
(http://gate.ac.uk/sale/tao/#chap:jape)
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bbA"""1"""77"#$%&"
WM/04-05 S. 448
Symbolization - Example (Entity Recognition)*:
WM/04.02 S. 449
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bbB"""1"""77"#$%&"
WM/04-05
S. 449
* Source:
http://gate.ac.uk
In the Instantiation phase, entities as references for possible
real world instances, roles, or actions have to be resolved
If several entities refer to the same instance in terms of multiple occurrences,
pronouns, acronyms, or other abbreviations, a co-referencing analysis performs
a unification.
Input:
entities
Output:
instances, relations(roles and actions)
During Instance Resolution, entities are resolved as one or many instances
(e.g., “George Bush” -> President: George Bush Senior | President: George
Bush Junior)
Relation Resolution resolves relevant roles and actions
(e.g., ! “is member of” ! -> rel:employedIn | rel:projectMember )
The co-reference analysis unifies references of single instances in multiple
sentences by creating reference chains
(e.g., Peter Parker came in. His suite was disrupted. Peter looked angry and
WM/04.02 S. 450
studied the latest news about Spiderman.)
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_$"""1"""77"#$%&"
WM/04-05 S. 450
Instantiation - Example*:
WM/04.02 S. 451
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_%"""1"""77"#$%&"
WM/04-05
S. 451
* Source:
http://gate.ac.uk
! back to the example!
WM/04.02 S. 452
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_#"""1"""77"#$%&"
WM/04-05 S. 452
Metadata in the header of the email are extracted and the subject
is used as a first orientation point for classification
arbeitet-für
DFKI
Antrag #18
gesendet-an
hat-Adressse
hat-Anlage
Dr. Gesine
Kustermann
Trippstadter Straße 122
67663 Kaiserslautern
CarFS
arbeitet-für
gesendet-von
gesendet-von
Michael
Lenz
hat-Anlage
gesendet-an
hat-Adressse
bearbeitet-Vorgang
Von: Gesine Kustermann <[email protected]>
[email protected]>
Datum: 22. Dezember 2011 11:33:17 MEZ
An: [email protected]
Betreff: Re: Kfz-Versicherung
Antrag DFKI #241
Hallo Herr Lenz,
Vielen Dank für die Übersendung der Unterlagen. Im
Attachment finden Sie den ausgefüllten und eingescannten
Antrag mit der Bitte um zügige Bearbeitung.
Von:
Vielen Dank und frohe Weihnachten,
G.Kustermann
------Dr. Gesine Kustermann
Kaufm. Geschäftsführung
DFKI GmbH
Trippstadter Straße 122
D-67663 Kaiserslautern
Germany
Phone
+49-631-200-75 -801
Fax
+49-631-200-75-800
WM/04.02 S. 453
Email
[email protected]
An:
hatTermin
Dr Gesine
Kustermann
Schmalbachstr. 1,
38112 Braunschweig
23.12.11
Michael
Lenz
Zeit:
22.12.2011
11:33 Uhr
Kfz-Versicherung
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_?"""1"""77"#$%&"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"
WM/04-05 S. 453
Subsequently, potential symbols are recognized and it is checked
whether they fit into the existing knowledge
arbeitet-für
DFKI
Antrag #18
gesendet-an
hat-Adressse
hat-Anlage
Dr. Gesine
Kustermann
Trippstadter Straße 122
67663 Kaiserslautern
arbeitet-für
gesendet-von
gesendet-von
Michael
Lenz
hat-Anlage
gesendet-an
hat-Adressse
bearbeitet-Vorgang
Von: Gesine Kustermann <[email protected]>
[email protected]>
Datum: 22. Dezember 2011 11:33:17 MEZ
An: [email protected]
Betreff: Re: Kfz-Versicherung
Antrag DFKI #241
Hallo Herr Lenz,
e.g.
Vielen Dank für die Übersendung der Unterlagen. Im
Attachment finden Sie den ausgefüllten und eingescannten
Antrag mit der Bitte um zügige Bearbeitung.
Von:
Vielen Dank und frohe Weihnachten,
G.Kustermann
------Dr. Gesine Kustermann
Kaufm. Geschäftsführung
DFKI GmbH
Trippstadter Straße 122
D-67663 Kaiserslautern
Germany
Phone
+49-631-200-75 -801
Fax
+49-631-200-75-800
WM/04.02 S.
Email
gesine.Kustermann@dfki
CarFS
An:
Dr Gesine
Kustermann
hatTermin
Schmalbachstr. 1,
38112 Braunschweig
23.12.11
Michael
Lenz
Zeit:
22.12.2011
11:33 Uhr
454
Kfz-Versicherung
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_b"""1"""77"#$%&"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"
WM/04-05 S. 454
Based on these extensions, new relations (properties) may be
extracted and incorporated into the ontological context of the message
arbeitet-für
DFKI
Antrag #18
gesendet-an
hat-Adressse
hat-Anlage
Dr. Gesine
Kustermann
Trippstadter Straße 122
67663 Kaiserslautern
CarFS
arbeitet-für
gesendet-von
gesendet-von
Michael
Lenz
hat-Anlage
Von: Gesine Kustermann <[email protected]>
Datum: 22. Dezember 2011 11:33:17 MEZ
An: [email protected]
[email protected]
Betreff: Re: Kfz-Versicherung
gesendet-an
hat-Adressse
Antrag???
bearbeitet-Vorgang
Antrag DFKI #241
Hallo Herr Lenz,
Vielen Dank für die Übersendung der Unterlagen. Im
Attachment finden Sie den ausgefüllten und eingescannten
Antrag mit der Bitte um zügige Bearbeitung.
Von:
Vielen Dank und frohe Weihnachten,
G.Kustermann
------Dr. Gesine Kustermann
Kaufm. Geschäftsführung
DFKI GmbH
Trippstadter Straße 122
D-67663 Kaiserslautern
Germany
Phone
+49-631-200-75 -801
Fax
+49-631-200-75-800
WM/04.02 S.
Email
gesine.Kustermann@dfki
An:
hatTermin
Dr Gesine
Kustermann
Schmalbachstr. 1,
38112 Braunschweig
23.12.11
Michael
Lenz
Zeit:
22.12.2011
11:33 Uhr
455
Kfz-Versicherung
The email content is related to the
existing knowledge via semantic
hyperlinks
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b__"""1"""77"#$%&"
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"
WM/04-05 S. 455
How to deal
with the attachments?
WM/04.02 S. 456
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_&"""1"""77"#$%&"
WM/04-05 S. 456
For the analysis of attachments (image documents) a different
processing is necessary
Form
Definitionen
Image Processing
Classification
Information Extraction
Verification
Workload
Control
Sorting
Rules
Open Tasks
Bestandssysteme
ERP System
Routing
Rules
CarFS
WM/04.02 S. 457
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_@"""1"""77"#$%&"
WM/04-05 S. 457
A state-of-the-art solution should provide the following
Image Filter (Re-Segmentation)
Scanner
Skew Correction
Upside-Down Correction
Line Filter
Blind Color Recognition
WM/04.02 S. 458
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_A"""1"""77"#$%&"
WM/04-05 S. 458
For document understanding, we may apply several analysis strategies
at the same time
Antrag auf Kfz-Versicherung
1. Layout-based Rerecognition
(without OCR)
! very fast
2. Search patterns
Extraction of known terms/titles/phrases
3. Check boxes
labels allowing to mark the
existence of different features
4. Form identifiers
regular expressions at fix locations
on form with same/similar layout
WM/04.02 S. 459
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b_B"""1"""77"#$%&"
WM/04-05 S. 459
The scanned pages need to be reorganized as part of a document
Scan Flow
Scanner
Page
Classification
Kfz-A2K
Page 1
Document
Formation
Kfz-A3F
Page 1
Kfz-A3F
Page 2
...
Document
Classification
Kfz-A2K
Page 1
WM/04.02 S. 460
Kfz-A3F
Page3
...
Kfz-A3F
Page 1+2+3
Note: In practice
documents are
often physically
separated by an
empty paper (the
content of
envelopes
Document Understanding
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&$"""1"""77"#$%&"
WM/04-05 S. 460
Document Understanding aims to find logical objects
(semantic entities) within the document image
Name of Medical Doctor
Address of Medical Doctor
Name of Patient
Name of Assured
Insurance Number
Company
Database
Invoice-No.
Tabular Information
Service Dates
GOÄ-Numbers
WM/04.02 S. 461
Date of Invoice
Factors
Single Amounts
Total Amount
Diagnosis
Example
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&%"""1"""77"#$%&"
WM/04-05 S. 461
Transforming Data into Knowledge
Image Objects
Layout
Structure
Image
Characters
DATA
„d“ „S“ „2“
WM/04.02 S. 462
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&#"""1"""77"#$%&"
WM/04-05 S. 462
WM/04.02 S. 463
© [email protected]
- 2009
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*
+(,'*##(-*,!*++1""=>"b&?"""1"""77"#$%&"
WM/04-05 S. 463
Transforming Data into Knowledge
Image Objects
Layout
Structure
Image
Characters
DATA
Document
Understanding
„d“ „S“ „2“
INFORMATION
KNOWLEDGE
Words
Information
WM/04.02 S. 464
Presentation
! Sender
! Recipient
! Date
! Reference
! Signature
! ,,,
Processes
! Offer
! Order
! Invoice
Company
Data
! ,,,
Logical Objects Message Types
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&b"""1"""77"#$%&"
WM/04-05 S. 464
Are there different ways for
categorizing printed documents?
WM/04.02 S. 465
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&_"""1"""77"#$%&"
WM/04-05 S. 465
Categorization – An Example
Love Letter
Tax Form
Delivery Note
?
Invoice
Cheque
Order
Offer
WM/04.02 S. 466
Report
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&&"""1"""77"#$%&"
WM/04-05 S. 466
Remember the model for automatic classification
Unknown Document
Sample Documents
Das Bild kann nicht angezeigt werden. Dieser Computer verfügt möglicherweise über zu wenig Arbeitsspeicher, um das Bild zu öffnen, oder das Bild ist
beschädigt. Starten Sie den Computer neu, und öffnen Sie dann erneut die Datei. Wenn weiterhin das rote x angezeigt wird, müssen Sie das Bild
möglicherweise löschen und dann erneut einfügen.
Attribute Extraction (1)
Transformation (2)
Das Bild kann nicht angezeigt werden. Dieser Computer verfügt möglicherweise über zu wenig Arbeitsspeicher, um das Bild zu öffnen, oder das Bild ist
beschädigt. Starten Sie den Computer neu, und öffnen Sie dann erneut die Datei. Wenn weiterhin das rote x angezeigt wird, müssen Sie das Bild
möglicherweise löschen und dann erneut einfügen.
Attributes
Transformation (A)
Attribute-Value Representation (Vectors)
Attribute-Value Representation
Attribute-Value Learning (3)
Attribute-Value Representation (Classifier(s))
Classifier Application (B)
Categories
(1)!
Extraction of relevant features from a set
of representative sample documents
(2)!
Transformation of documents into
attribute-value representation based on
the identified features
(3)!
Construction of classifier using the
attribute-value
(attribute
WM/04.02 representation
S. 467
value learning)
(A)
Transformation of unknown documents
into respective attribute-value
representation
(B)
Application of classifier in order to assign
the document as belonging to one of the
given tasks
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&@"""1"""77"#$%&"
WM/04-05 S. 467
There are various modes for categorizing printed documents!?
WM/04.02 S. 468
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&A"""1"""77"#$%&"
WM/04-05 S. 468
Mixed document stacks require different methods
Layout features
Textual features
Tabular features
Search Pattern
Format features
Special
indicators
WM/04.02 S. 469
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b&B"""1"""77"#$%&"
WM/04-05 S. 469
How can we use the
inherent characteristics
of the document layout?
WM/04.02 S. 470
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@$"""1"""77"#$%&"
WM/04-05 S. 470
Layout guides a reader’s attention
Layout is a valuable orientation with helps to drive our
attention
There are some characteristics which might be useful
Each black pixel in an unfilled document “form” is
also black in the filled one
The filled form contains more black pixels
A white pixel in the filled form is white in an empty
one
?
=
An image comparison is not sufficient because scan shifts,
translation and rotation do not allow to just subtract net and
WM/04.02 S. 471
gross image
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@%"""1"""77"#$%&"
WM/04-05 S. 471
Generating layout reference pattern
Weighting the individuality of the document layout
structure leads to reference pattern for classification
Use the net image (only preprinted information)
Consider line and block segments in a document
image instead of the image itself
For each single document of the training set do !
! measure the degree of relevancy of a text
block based on its geometric features
For all document of the training set do !
! compute the degree of individuality of a block
for a single document
WM/04.02
S. 472
Take
result
as a class exemplar (reference pattern)
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@#"""1"""77"#$%&"
WM/04-05 S. 472
Simple Attribute-Value Representation of Layout
Attribute
:
(x1, y1); (x2, y2); 5(...); ..
:
(x1, y1); (x2, y2); 2(...); ..
:
(x1, y1); (x2, y2); 1(...); ..
:
Value
:
0
0
:
0,98
0
:
0,74
:
0,09
0
WM/04.02 S. 473
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@?"""1"""77"#$%&"
WM/04-05 S. 473
“Form” Classification using Layout Features
Finding the right reference pattern for an unknown document
is rather a search problem than a classification task
Consider line and block segments in a image of
the unknown document
Measure similarity of unknown document to
reference patterns by finding appropriate text block
counterparts (net vs. gross image)
Block similarity is computed by a fuzzy match of
the geometric features
translation & skew
relative positions
multiple candidates
constraint satisfaction
WM/04.02 S. 474
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@b"""1"""77"#$%&"
WM/04-05 S. 474
Document classes can be learned just by the difference in
their layout
Take a set of „unfilled“
document forms to train the
system
System establishes layout
reference patterns by its
own !
! independent from OCR
(text) data
! independent of number
of document classes
System classifies unknown
documents according to
reference patterns
Phase1
Sample
Document
Phase2
Unknown
Document
Learning
Knowledge
Base
Classification
Knowledge
Base
Purchaser
Product-ID
Number
Prize
WM/04.02 S. 475
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@_"""1"""77"#$%&"
WM/04-05 S. 475
The initial training contains the definition of regions
of interest
Scanning of an sample document
Specify ID of reference pattern
> AOK-Application
Generate net image (e.g. by editor)
Itemize “regions of interest“ by
drawing rectangle with the mouse
and specify region
> Adresse > Vers.-No.
> Bank
> Kontonummer
> Bankleitzahl
WM/04.02 S. 476
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@&"""1"""77"#$%&"
WM/04-05 S. 476
Let’s complement the layout
characteristics with text features?
WM/04.02 S. 477
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@@"""1"""77"#$%&"
WM/04-05 S. 477
How to use the text for categorization?
WM/04.02 S. 478
AppleS urce Date:7/1-D/-D4-7/10/04 Inv-.ice. No. AS-] Ta:
Perm FjeIInwn 1999 Waadside Or Medford. OP 97501 far:
Website Creatian Services d &oed s Category Cast Deseri
tion Time Amount ~&raphic Design 50/hour -Corporate
Identity Creation 08.00.00 $100.00 3 logos based on color
scheme, printable and digital formats, letterheads and
business cards Category Cost Description Time Amount
Service Call $50/hour Server Setup d Config 04.00.00
$200.00 Category Cost De seription Time Amount
Saf#k~are Design X50/hour -Create In.#egrated Web Apps
09.00.00 $45000 Form Processor, Mortgage Calculator,
Newsletter System, Installations Category Cost Description
Time Amount Web Canstructian $501nour Create Website
Templates 04.30.00 $225.00 Design navigation. header.
footer. and content stylesheets Web –Canstructian $50}
hour -Customized E-Commerce System 08.00.00 $400.00
Category Cast Description Count Amount Flat Fee Item
$18.00 Website Single-Page 20 $360.00 Time Subtotal
33.30.00 $2035.D0 Lxpenses Category Description Arrount
Expense 3 CDs Overnight FedEx $16.90 Expense 5DD
Business -Cards, glossy coot finish $95.00 Expense
Domain Registration X8.95 Subtotal $]2085 Invoice Total :
$2155.05 Thank you far your business. Perry' Payable
within 15 days.
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@A"""1"""77"#$%&"
WM/04-05 S. 478
Simple Attribute-Value Representation of Text (see Chapter 1)
Sehr geehrte Damen und Herren,
Für unsere Bemühungen erlauben
wir uns 150 Euro zu berechnen
Mit freundlichen Grüßen
Attribute
:
aber
als
:
erlauben
erfassen
:
freundlichen
geehrte
:
zu
zurück
Value
Value
:
0 :
0 0
: 0
1 :
0 7
0 0
1 0
1 5
: 11
1 :
0 2
0
Words define an index for a vector of values which has the dimension
given by the number of words
WM/04.02 S. 479
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"b@B"""1"""77"#$%&"
WM/04-05 S. 479
And what about OCR errors?
WM/04.02 S. 480
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bA$"""1"""77"#$%&"
WM/04-05 S. 480
OCR errors may be considered by including an edit
distance between two words
Definition of an edit distance between words:
If A is an alphabet of characters and A* is a set of words in A, the
Levenshtein distance of two words
S = s1s2…sn ! A*; n ! 0 and T = t1t2…tm ! A*; m ! 0.
is the minimal sum of all elementary edit operations necessary to
transform S into T
An edit distance must address different types of OCR errors, i.e.
substitutions, insertions, and deletions
All corresponding costs CostSub, CostIns and CostDel have a unique
value of 1 allowing to stepwise increasing the cost value while
searching
WM/04.02
S. 481 for an optimal Levenshtein distance between two words
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bA%"""1"""77"#$%&"
WM/04-05 S. 481
The Levenshtein distance is defined by the minimal edit costs:
Thus, every string S can be transformed into another string T
Example:
A = {A, C, G, T},
S = GATAAGAA, and T = GATTACA
GAT (A " T) A(G " C)A(A " !) = GATTACA
Note: This is one possible
transformation of S into T
by three edit operations. In
general, there is more than
one possible
transformation
Considering the length of the strings to be transformed we use a
normalized distance measure
Lev norm ( w1 , w1 ) =
WM/04.02 S. 482
Lev( w1 , w1 )
max{| w1 |, |w 2 |}
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bA#"""1"""77"#$%&"
WM/04-05 S. 482
But are statistical techniques
sufficient for all types of
printed documents?
WM/04.02 S. 483
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bA?"""1"""77"#$%&"
WM/04-05 S. 483
WM/04.02 S. 484
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bAb"""1"""77"#$%&"
WM/04-05 S. 484
WH+"+:=048:+(9"4G")4:,<(1-=+;<M;"=*4=+*"(,:+-"<-","
;*<9<;,0",-=+;9"
@/26=4+"45/?/>7+*F62;?3+
:
OPSYS
:
:
Apple iOs
(...)
Bada
(...)
Blackberry OS
(...)
Brew
(...)
RATE
... (...)
:
... (...)
:
PRODUCER
Apple
(...)
BlackBerry
(...)
HTC
(...)
Nokia
(...)
:
Samsung
Windows Phone
(...)
WM/04.02 S. 485
(...)
List of lexical variations
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bA_"""1"""77"#$%&"
WM/04-05
485Analysis
* Sentiment
DetectionS.and
Y-<(/","2*4F()"W*F9H"-+9"4G"+L+:=0,*8")4;F:+(9-"G4*"4(+"98=+"
,0045-"94",F94:,9<;,008"0+,*("*F0+-"
eF0+-"-H4F0)"J+"+:=048+)"G4*">>>"
;0,--<M;,9<4("4G")4;F:+(9"98=+"5H<;H"<-"+--+(9<,0"G4*"9H+"=4-91
4*)+*+)"*4F9<(/"
]L9*,;9<4("4G"*+0+E,(9"-9,9+:+(9-"5H<;H"-<:=0<G8"9H+"
F()+*-9,()<(/"4G"9H+":+--,/+"
c4*"=*+1=*4;+--<(/"9H+")4;F:+(9-"4G"9H+"/*4F()"9*F9H"-+9I"9H+")4:,<("
:4)+0"<-";4(-<)+*+)"
Ground Thruth
Domain
Text Prepocessing
:
:
:
Proper Names
Normalized (neutralized) Text
WM/04.02 S. 486
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bA&"""1"""77"#$%&"
WM/04-05
486Analysis
* Sentiment
DetectionS.and
Text Preprocessing
WH+"-+9"4G"/*4F()"9*F9H"+0+:+(9-"<-"=*+=,*+)"G4*"0+,*(<(/"
q(+F9*,0r"<()<;,94*-"q(+F9*,0<-<+*9r"
Da mir Ihre Preise für Complete Mobile S ohnehin zu hoch sind und
es für Mac iOS katastrophal funktioniert, kündige ich den Vrtrg.
MOB/238-143 zum 30. Juni 2012. Ich bitte Sie, mir die bereits
bezahlte Rate umgehend auf das Konto 100123456 zu überweisen
da mir ihre preise für complete mobile s ohnehin zu hoch sind und
es für mac ios katastrophal funktioniert kündige ich den vrtrg.
mob/238-143 zum 30 juni 2012 ich bitte sie mir die bereits
bezahlte rate umgehend auf das konto 100123456 zu überweisen
da mir ihre preise für RATE ohnehin zu hoch sind und es für
OPSYS katastrophal funktioniert kündige ich den vrtrg CONTRNO
zum DATE ich bitte sie mir die bereits bezahlte rate umgehend
auf das konto ACOUNTNO zu überweisen
Elimination of ...
... punctuation marks
... capitals
... Hyphenations
... delimiters of short cuts
Introduction of placeholders for ...
... proper names
... dates
... amounts
... other identities
WM/04.02 S. 487
Indicator-Learning
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bA@"""1"""77"#$%&"
WM/04-05 S. 487
Y-<(/"9H+"(+F9*,0<a+)"9+L9I"5+":,8"0+,*("*F0+-"G4*"
;0,--<G8<(/"9H+")4;F:+(9"98=+"
Ground Thruth
Domäne
Text Preprocessing
:
:
:
Eigennamenlisten
Normalized (neutralized) Text
Indicator-Learning
Bag-of-Words Approach
+
Word Distance
+
Edit Distance
Set of Indicators
Rule-Learning
Stemming
+
Thesaurus
{ kwij mit kwij = [kwi1, kwi2, ..., kwin] für 1<i"m}
WM/04.02 S. 488
Set of Rules
(incl. Measure of Belief (MoB))
Application of rules is intuitive
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bAA"""1"""77"#$%&"
WM/04-05
488Analysis
* Sentiment
DetectionS.and
Examples
Refund
Damage
Message
Cancelation
- my [2] live insurance
- please [2] to
- quit :lev 1
- cancellation :lev 2
- next possible [2] date
- transfer [2] the
- me [3] to
- confirm [3] the
:
WM/04.02 S. 489
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bAB"""1"""77"#$%&"
WM/04-05 S. 489
Categorizing real documents
demands for many requirements!
WM/04.02 S. 490
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bB$"""1"""77"#$%&"
WM/04-05 S. 490
From the very beginning a document understanding
system can be trained to solve a certain problem
Aspects to be considered:
Categories and document classes
Definition of classification features
What to do with „not classified“
documents?
Specification of value ranges
(domains) for regions of interest
Identification of search patters and
mathematical or logical constraints
What to do if information is missing?
Definition of special features fro multipage documents
Definition
records
WM/04.02 of
S. 491
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bB%"""1"""77"#$%&"
WM/04-05 S. 491
Records allow multi-source verification of results
Note: In document understanding
even incomplete information in
documents
is completed through the data
base
Enterprise
data base
WM/04.02 S. 492
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bB#"""1"""77"#$%&"
WM/04-05 S. 492
Example of a document model editor
WM/04.02 S. 493
!"#$%&""'()*+,-".+(/+0""1""'2"3(450+)/+"6,-+)"78-9+:-""1""7;*<=9"!"##$%"&$'()*+(,'*##(-*,!*++1""=>"bB?"""1"""77"#$%&"
WM/04-05 S. 493

Documentos relacionados