Skip to contents

See also countCompetitors.

Usage

lastMentionUnit(
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

unitsToLastMention(
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

lastMentionToken(
  tokenOrder = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

tokensToLastMention(
  tokenOrder = NULL,
  chain = NULL,
  zeroProtocol = "literal",
  zeroCond = NULL,
  unitSeq = NULL,
  unitTokenSeqName = NULL,
  unitDF = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

countPrevMentions(
  windowSize,
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

countPrevMentionsIf(
  windowSize,
  cond,
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

countPrevMentionsMatch(
  windowSize,
  field,
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

getPrevMentionField(
  field,
  tokenOrder = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

nextMentionUnit(
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

unitsToNextMention(
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

nextMentionToken(
  tokenOrder = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

tokensToNextMention(
  tokenOrder = NULL,
  chain = NULL,
  zeroProtocol = "literal",
  zeroCond = NULL,
  unitSeq = NULL,
  unitTokenSeqName = NULL,
  unitDF = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

countNextMentions(
  windowSize,
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

countNextMentionsIf(
  windowSize,
  cond,
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

countNextMentionsMatch(
  windowSize,
  field,
  unitSeq = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

getNextMentionField(
  field,
  tokenOrder = NULL,
  chain = NULL,
  exclFrag = F,
  combinedChunk = NULL,
  nonFragmentMember = F
)

countPrevBridges(
  windowSize,
  frameMatrix,
  unitSeq = NULL,
  chain = NULL,
  inclRelations = NULL
)

Arguments

unitSeq

The vector of tokenOrder values where the mentions appeared. You can choose tokenOrderFirst, tokenOrderFirst, or maybe an average of the two. By default it's tokenOrderFirst.

chain

The chain that each mention belongs to.

exclFrag

Exclude 'fragments' (i.e. members of a combined chunk which do not serve as meaningful chunks in their own right)

combinedChunk

The combinedChunk column of the rezrDF. By default, named combinedChunk.

nonFragmentMember

Vector indicating whether each entry is a non-fragment member, i.e. a member of a combined chunk that also serves as a meaningful chunk in its own right.

tokenOrder

The vector of sequence values values where the mentions appeared. Common choices are docTokenSeqFirst, docTokenSeqLast, wordTokenSeqFirst and wordTokenseqLast (the last two are available after running addIsWordField on a rezrObj. By default it's docTokenSeqLast.

zeroProtocol

If literal, I will take the seq values of the zeroes at face value. (If you set zeros as non-words and use docWordSeqFirst or discourseWordSeLast as your tokenOrder, this will lead to meaningless values.) If unitFinal, I will treat zeroes as if they were the final word of the unit. If unitFirst, I will treat zeroes as if they were the first word of the unit.

zeroCond

A condition for determining whether a token is zero. For most people, this should be (word column) == "<0>".

unitTokenSeqName

The name of the corresponding tokenSeq column in the unit column. By default, docTokenSeqLast is used.

windowSize

The size of the window in which you will be counting.

cond

For if functions, the condition that the previous / next mention must satisfy. It cannot refer to the current mention.

field

The field whose value you want to match or extract.

Note

The default values do not work with case_when(). I am still figuring out why. In the meantime, please specify unitSeq, combinedchunk etc. within case_when().

Examples

sbc007 = addUnitSeq(sbc007, "track")
#Get the number of units to the last mention
sbc007$trackDF$default = sbc007$trackDF$default %>%
rez_mutate(unitsToLastMention = unitsToLastMention(unitSeqLast))
#Get the number of words to the last mention
sbc007$trackDF$default =  sbc007$trackDF$default %>%
rez_mutate(wordsToLastMention = tokensToLastMention(
docWordSeqLast, #What seq to use
zeroProtocol = "unitFinal", #How to treat zeroes
zeroCond = (text == "<0>"),
unitDF = sbc007$unitDF)) #Additional argument for unitFinal protocol
#> Error in .f(df, ...):  In argument: `wordsToLastMention = tokensToLastMention(...)`.
#> Caused by error in `grabFromDF()`:
#> ! object 'docWordSeqLast' not found
#Get the character length of the previous mention
sbc007$trackDF$default = sbc007$trackDF$default %>%
addFieldLocal(fieldName = "prevLength",
              expression = nchar(getPrevMentionField(text)),
              fieldaccess = "auto")
#Get the number of zero mentions and zero status-matching mentions in the last 20 units
sbc007$trackDF$default %>%
rez_mutate(isZero = text == "<0>") %>%
rez_mutate(noPrevZeroMentionsIn20 = countPrevMentionsIf(20, isZero),
            noPrevZeroMentionsIn20 = countPrevMentionsMatch(20, isZero))
#> # A tibble: 236 × 37
#>    id        doc   chain sourceLink token gapWords charCount tokenCount gapUnits
#>    <chr>     <chr> <chr> <chr>      <chr> <chr>        <dbl>      <dbl> <chr>   
#>  1 1096E4AF… sbc0… 278D… ""         37EF… N/A              1          1 N/A     
#>  2 92F20ACA… sbc0… 278D… "174E697E… 9363… 2                1          1 0       
#>  3 7E5BB650… sbc0… 2B67… ""         744A… N/A             17          5 N/A     
#>  4 1F74D2B0… sbc0… 2A01… "52452779… 1265… N/A              4          1 N/A     
#>  5 2485C4F7… sbc0… 278D… "CB1D9787… 2113… 10               3          1 1       
#>  6 1BF2260B… sbc0… 2A01… ""         35E3… 5               12          3 1       
#>  7 6B37B5A8… sbc0… 2A01… "ED8C9230… 233E… 5                3          1 1       
#>  8 259C2C29… sbc0… 251A… ""         1F6B… N/A             40          8 N/A     
#>  9 1D1F2B70… sbc0… 10FA… ""         24FE… N/A             25          5 N/A     
#> 10 1FA38066… sbc0… 3067… ""         158B… N/A             11          2 N/A     
#> # ℹ 226 more rows
#> # ℹ 28 more variables: kind <chr>, place <chr>, text <chr>, transcript <chr>,
#> #   endNote <chr>, order <chr>, negPlace <chr>, corpusSeq <chr>,
#> #   pSentOrder <chr>, POS_dft <chr>, tokenSeq <chr>, chunkType <chr>,
#> #   turnOrder <chr>, largerChunk <chr>, tokenOrderFirst <dbl>,
#> #   tokenOrderLast <dbl>, docTokenSeqFirst <dbl>, docTokenSeqLast <dbl>,
#> #   chainCreateSeq <dbl>, name <chr>, chainSize <dbl>, layer <chr>, …