Functions related to mentions of the same entity in previous/following context in track chains.

Usage

lastMentionUnit(
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

unitsToLastMention(
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

lastMentionToken(
  tokenOrder = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

tokensToLastMention(
  tokenOrder = NULL,
  chain = NULL,
  zeroProtocol = "literal",
  zeroCond = NULL,
  unitSeq = NULL,
  unitTokenSeqName = NULL,
  unitDF = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

countPrevMentions(
  windowSize,
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

countPrevMentionsIf(
  windowSize,
  cond,
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

countPrevMentionsMatch(
  windowSize,
  field,
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

getPrevMentionField(
  field,
  tokenOrder = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

nextMentionUnit(
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

unitsToNextMention(
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

nextMentionToken(
  tokenOrder = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

tokensToNextMention(
  tokenOrder = NULL,
  chain = NULL,
  zeroProtocol = "literal",
  zeroCond = NULL,
  unitSeq = NULL,
  unitTokenSeqName = NULL,
  unitDF = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

countNextMentions(
  windowSize,
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

countNextMentionsIf(
  windowSize,
  cond,
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

countNextMentionsMatch(
  windowSize,
  field,
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

getNextMentionField(
  field,
  tokenOrder = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

countPrevBridges(
  windowSize,
  frameMatrix,
  unitSeq = NULL,
  chain = NULL,
  inclRelations = NULL
)

Arguments

unitSeq: The vector of tokenOrder values where the mentions appeared. You can choose tokenOrderFirst, tokenOrderFirst, or maybe an average of the two. By default it's tokenOrderFirst.
chain: The chain that each mention belongs to.
exclFrag: Exclude 'fragments' (i.e. members of a combined chunk which do not serve as meaningful chunks in their own right)
combinedChunk: The combinedChunk column of the rezrDF. By default, named combinedChunk.
nonFragmentMember: Vector indicating whether each entry is a non-fragment member, i.e. a member of a combined chunk that also serves as a meaningful chunk in its own right.
tokenOrder: The vector of sequence values values where the mentions appeared. Common choices are docTokenSeqFirst, docTokenSeqLast, wordTokenSeqFirst and wordTokenseqLast (the last two are available after running addIsWordField on a rezrObj. By default it's docTokenSeqLast.
zeroProtocol: If literal, I will take the seq values of the zeroes at face value. (If you set zeros as non-words and use docWordSeqFirst or discourseWordSeLast as your tokenOrder, this will lead to meaningless values.) If unitFinal, I will treat zeroes as if they were the final word of the unit. If unitFirst, I will treat zeroes as if they were the first word of the unit.
zeroCond: A condition for determining whether a token is zero. For most people, this should be (word column) == "<0>".
unitTokenSeqName: The name of the corresponding tokenSeq column in the unit column. By default, docTokenSeqLast is used.
windowSize: The size of the window in which you will be counting.
cond: For if functions, the condition that the previous / next mention must satisfy. It cannot refer to the current mention.
field: The field whose value you want to match or extract.

Note

The default values do not work with case_when(). I am still figuring out why. In the meantime, please specify unitSeq, combinedchunk etc. within case_when().

Examples

sbc007 = addUnitSeq(sbc007, "track")
#Get the number of units to the last mention
sbc007$trackDF$default = sbc007$trackDF$default %>%
rez_mutate(unitsToLastMention = unitsToLastMention(unitSeqLast))
#Get the number of words to the last mention
sbc007$trackDF$default =  sbc007$trackDF$default %>%
rez_mutate(wordsToLastMention = tokensToLastMention(
docWordSeqLast, #What seq to use
zeroProtocol = "unitFinal", #How to treat zeroes
zeroCond = (text == "<0>"),
unitDF = sbc007$unitDF)) #Additional argument for unitFinal protocol
#> Error in .f(df, ...): ℹ In argument: `wordsToLastMention = tokensToLastMention(...)`.
#> Caused by error in `grabFromDF()`:
#> ! object 'docWordSeqLast' not found
#Get the character length of the previous mention
sbc007$trackDF$default = sbc007$trackDF$default %>%
addFieldLocal(fieldName = "prevLength",
              expression = nchar(getPrevMentionField(text)),
              fieldaccess = "auto")
#Get the number of zero mentions and zero status-matching mentions in the last 20 units
sbc007$trackDF$default %>%
rez_mutate(isZero = text == "<0>") %>%
rez_mutate(noPrevZeroMentionsIn20 = countPrevMentionsIf(20, isZero),
            noPrevZeroMentionsIn20 = countPrevMentionsMatch(20, isZero))
#> # A tibble: 236 × 37
#>    id        doc   chain sourceLink token gapWords charCount tokenCount gapUnits
#>    <chr>     <chr> <chr> <chr>      <chr> <chr>        <dbl>      <dbl> <chr>   
#>  1 1096E4AF… sbc0… 278D… ""         37EF… N/A              1          1 N/A     
#>  2 92F20ACA… sbc0… 278D… "174E697E… 9363… 2                1          1 0       
#>  3 7E5BB650… sbc0… 2B67… ""         744A… N/A             17          5 N/A     
#>  4 1F74D2B0… sbc0… 2A01… "52452779… 1265… N/A              4          1 N/A     
#>  5 2485C4F7… sbc0… 278D… "CB1D9787… 2113… 10               3          1 1       
#>  6 1BF2260B… sbc0… 2A01… ""         35E3… 5               12          3 1       
#>  7 6B37B5A8… sbc0… 2A01… "ED8C9230… 233E… 5                3          1 1       
#>  8 259C2C29… sbc0… 251A… ""         1F6B… N/A             40          8 N/A     
#>  9 1D1F2B70… sbc0… 10FA… ""         24FE… N/A             25          5 N/A     
#> 10 1FA38066… sbc0… 3067… ""         158B… N/A             11          2 N/A     
#> # ℹ 226 more rows
#> # ℹ 28 more variables: kind <chr>, place <chr>, text <chr>, transcript <chr>,
#> #   endNote <chr>, order <chr>, negPlace <chr>, corpusSeq <chr>,
#> #   pSentOrder <chr>, POS_dft <chr>, tokenSeq <chr>, chunkType <chr>,
#> #   turnOrder <chr>, largerChunk <chr>, tokenOrderFirst <dbl>,
#> #   tokenOrderLast <dbl>, docTokenSeqFirst <dbl>, docTokenSeqLast <dbl>,
#> #   chainCreateSeq <dbl>, name <chr>, chainSize <dbl>, layer <chr>, …